From c41122b1a020f2dcfe3f7904a11bcf046fb9a732 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 24 Jul 2023 09:36:09 -0700 Subject: [PATCH 001/386] Even more HyperClockCache refactoring (#11630) Summary: ... ahead of dynamic variant. * Introduce an Unref function for a common pattern. Cases that were previously using std::memory_order_acq_rel we doing so because we were saving the pre-updated value in case it might be used. Now we are explicitly throwing away the pre-updated value so do not need the acquire semantic, just release. * Introduce a reusable EvictionData struct and TrackAndReleaseEvictedEntry() function. * Based on a linter suggesting, use const Func& parameter type instead of Func for templated callable parameters. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11630 Test Plan: existing tests, and performance test with release build of cache_bench. Getting 1-2% difference between before & after from run to run, but inconsistent about which one is faster. Reviewed By: jowlyzhang Differential Revision: D47657334 Pulled By: pdillinger fbshipit-source-id: 5cf2377c0d47a39143b04be6735f98c550e8bdc3 --- cache/clock_cache.cc | 137 ++++++++++++++++++++----------------------- cache/clock_cache.h | 15 +++-- 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index e5a650d86..16955004c 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -73,6 +73,16 @@ inline void FreeDataMarkEmpty(ClockHandle& h, MemoryAllocator* allocator) { MarkEmpty(h); } +// Called to undo the effect of referencing an entry for internal purposes, +// so it should not be marked as having been used. +inline void Unref(const ClockHandle& h, uint64_t count = 1) { + // Pretend we never took the reference + // WART: there's a tiny chance we release last ref to invisible + // entry here. If that happens, we let eviction take care of it. + h.meta.fetch_sub(ClockHandle::kAcquireIncrement * count, + std::memory_order_release); +} + inline bool ClockUpdate(ClockHandle& h) { uint64_t meta = h.meta.load(std::memory_order_relaxed); @@ -231,25 +241,18 @@ inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h, *already_matches = true; return false; } else { - // Mismatch. Pretend we never took the reference - old_meta = - h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown, - std::memory_order_acq_rel); + // Mismatch. + Unref(h, initial_countdown); } } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateInvisible)) { // Pretend we never took the reference - // WART/FIXME?: there's a tiny chance we release last ref to invisible - // entry here. If that happens, we let eviction take care of it. - old_meta = - h.meta.fetch_sub(ClockHandle::kAcquireIncrement * initial_countdown, - std::memory_order_acq_rel); + Unref(h, initial_countdown); } else { // For other states, incrementing the acquire counter has no effect // so we don't need to undo it. // Slot not usable / touchable now. } - (void)old_meta; return false; } @@ -289,9 +292,10 @@ bool TryInsert(const ClockHandleBasicData& proto, ClockHandle& h, return b; } +// Func must be const HandleImpl& -> void callable template -void ConstApplyToEntriesRange(Func /*const HandleImpl& -> void*/ func, - const HandleImpl* begin, const HandleImpl* end, +void ConstApplyToEntriesRange(const Func& func, const HandleImpl* begin, + const HandleImpl* end, bool apply_if_will_be_deleted) { uint64_t check_state_mask = ClockHandle::kStateShareableBit; if (!apply_if_will_be_deleted) { @@ -316,8 +320,7 @@ void ConstApplyToEntriesRange(Func /*const HandleImpl& -> void*/ func, func(*h); } // Pretend we never took the reference - h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + Unref(*h); // No net change, so don't need to check for overflow } else { // For other states, incrementing the acquire counter has no effect @@ -419,22 +422,20 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( request_evict_charge = 1; } if (request_evict_charge > 0) { - size_t evicted_charge = 0; - size_t evicted_count = 0; - static_cast(this)->Evict(request_evict_charge, &evicted_charge, - &evicted_count, state); - occupancy_.fetch_sub(evicted_count, std::memory_order_release); - if (LIKELY(evicted_charge > need_evict_charge)) { - assert(evicted_count > 0); + EvictionData data; + static_cast(this)->Evict(request_evict_charge, state, &data); + occupancy_.fetch_sub(data.freed_count, std::memory_order_release); + if (LIKELY(data.freed_charge > need_evict_charge)) { + assert(data.freed_count > 0); // Evicted more than enough - usage_.fetch_sub(evicted_charge - need_evict_charge, + usage_.fetch_sub(data.freed_charge - need_evict_charge, std::memory_order_relaxed); - } else if (evicted_charge < need_evict_charge || - (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0)) { + } else if (data.freed_charge < need_evict_charge || + (UNLIKELY(need_evict_for_occupancy) && data.freed_count == 0)) { // Roll back to old usage minus evicted - usage_.fetch_sub(evicted_charge + (new_usage - old_usage), + usage_.fetch_sub(data.freed_charge + (new_usage - old_usage), std::memory_order_relaxed); - if (evicted_charge < need_evict_charge) { + if (data.freed_charge < need_evict_charge) { return Status::MemoryLimit( "Insert failed because unable to evict entries to stay within " "capacity limit."); @@ -446,7 +447,7 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( } // If we needed to evict something and we are proceeding, we must have // evicted something. - assert(evicted_count > 0); + assert(data.freed_count > 0); } return Status::OK(); } @@ -488,29 +489,47 @@ inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( // deal with occupancy need_evict_charge = 1; } - size_t evicted_charge = 0; - size_t evicted_count = 0; + EvictionData data; if (need_evict_charge > 0) { - static_cast(this)->Evict(need_evict_charge, &evicted_charge, - &evicted_count, state); + static_cast(this)->Evict(need_evict_charge, state, &data); // Deal with potential occupancy deficit - if (UNLIKELY(need_evict_for_occupancy) && evicted_count == 0) { - assert(evicted_charge == 0); + if (UNLIKELY(need_evict_for_occupancy) && data.freed_count == 0) { + assert(data.freed_charge == 0); // Can't meet occupancy requirement return false; } else { // Update occupancy for evictions - occupancy_.fetch_sub(evicted_count, std::memory_order_release); + occupancy_.fetch_sub(data.freed_count, std::memory_order_release); } } // Track new usage even if we weren't able to evict enough - usage_.fetch_add(total_charge - evicted_charge, std::memory_order_relaxed); + usage_.fetch_add(total_charge - data.freed_charge, std::memory_order_relaxed); // No underflow assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); // Success return true; } +void BaseClockTable::TrackAndReleaseEvictedEntry( + ClockHandle* h, BaseClockTable::EvictionData* data) { + data->freed_charge += h->GetTotalCharge(); + data->freed_count += 1; + + bool took_value_ownership = false; + if (eviction_callback_) { + // For key reconstructed from hash + UniqueId64x2 unhashed; + took_value_ownership = + eviction_callback_(ClockCacheShard::ReverseHash( + h->GetHash(), &unhashed, hash_seed_), + reinterpret_cast(h)); + } + if (!took_value_ownership) { + h->FreeData(allocator_); + } + MarkEmpty(*h); +} + template Status BaseClockTable::Insert(const ClockHandleBasicData& proto, typename Table::HandleImpl** handle, @@ -800,23 +819,18 @@ HyperClockTable::HandleImpl* HyperClockTable::Lookup( return true; } else { // Mismatch. Pretend we never took the reference - old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + Unref(*h); } } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateInvisible)) { // Pretend we never took the reference - // WART: there's a tiny chance we release last ref to invisible - // entry here. If that happens, we let eviction take care of it. - old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + Unref(*h); } else { // For other states, incrementing the acquire counter has no effect // so we don't need to undo it. Furthermore, we cannot safely undo // it because we did not acquire a read reference to lock the // entry in a Shareable state. } - (void)old_meta; return false; }, [&](HandleImpl* h) { @@ -941,8 +955,7 @@ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { if (refcount > 1) { // Not last ref at some point in time during this Erase call // Pretend we never took the reference - h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + Unref(*h); break; } else if (h->meta.compare_exchange_weak( old_meta, @@ -962,16 +975,12 @@ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { } } else { // Mismatch. Pretend we never took the reference - h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + Unref(*h); } } else if (UNLIKELY((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateInvisible)) { // Pretend we never took the reference - // WART: there's a tiny chance we release last ref to invisible - // entry here. If that happens, we let eviction take care of it. - h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + Unref(*h); } else { // For other states, incrementing the acquire counter has no effect // so we don't need to undo it. @@ -1007,8 +1016,8 @@ void HyperClockTable::EraseUnRefEntries() { template inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot( - const UniqueId64x2& hashed_key, MatchFn match_fn, AbortFn abort_fn, - UpdateFn update_fn) { + const UniqueId64x2& hashed_key, const MatchFn& match_fn, + const AbortFn& abort_fn, const UpdateFn& update_fn) { // NOTE: upper 32 bits of hashed_key[0] is used for sharding // // We use double-hashing probing. Every probe in the sequence is a @@ -1062,9 +1071,8 @@ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) { assert(old_usage >= total_charge); } -inline void HyperClockTable::Evict(size_t requested_charge, - size_t* freed_charge, size_t* freed_count, - InsertState&) { +inline void HyperClockTable::Evict(size_t requested_charge, InsertState&, + EvictionData* data) { // precondition assert(requested_charge > 0); @@ -1083,33 +1091,18 @@ inline void HyperClockTable::Evict(size_t requested_charge, uint64_t max_clock_pointer = old_clock_pointer + (ClockHandle::kMaxCountdown << length_bits_); - // For key reconstructed from hash - UniqueId64x2 unhashed; - for (;;) { for (size_t i = 0; i < step_size; i++) { HandleImpl& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))]; bool evicting = ClockUpdate(h); if (evicting) { Rollback(h.hashed_key, &h); - *freed_charge += h.GetTotalCharge(); - *freed_count += 1; - bool took_ownership = false; - if (eviction_callback_) { - took_ownership = - eviction_callback_(ClockCacheShard::ReverseHash( - h.GetHash(), &unhashed, hash_seed_), - reinterpret_cast(&h)); - } - if (!took_ownership) { - h.FreeData(allocator_); - } - MarkEmpty(h); + TrackAndReleaseEvictedEntry(&h, data); } } // Loop exit condition - if (*freed_charge >= requested_charge) { + if (data->freed_charge >= requested_charge) { return; } if (old_clock_pointer >= max_clock_pointer) { diff --git a/cache/clock_cache.h b/cache/clock_cache.h index fee2eb11a..75a7b43a3 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -422,6 +422,13 @@ class BaseClockTable { uint32_t GetHashSeed() const { return hash_seed_; } + struct EvictionData { + size_t freed_charge = 0; + size_t freed_count = 0; + }; + + void TrackAndReleaseEvictedEntry(ClockHandle* h, EvictionData* data); + #ifndef NDEBUG // Acquire N references void TEST_RefN(ClockHandle& handle, size_t n); @@ -528,8 +535,7 @@ class HyperClockTable : public BaseClockTable { // Runs the clock eviction algorithm trying to reclaim at least // requested_charge. Returns how much is evicted, which could be less // if it appears impossible to evict the requested amount without blocking. - void Evict(size_t requested_charge, size_t* freed_charge, size_t* freed_count, - InsertState& state); + void Evict(size_t requested_charge, InsertState& state, EvictionData* data); HandleImpl* Lookup(const UniqueId64x2& hashed_key); @@ -570,8 +576,9 @@ class HyperClockTable : public BaseClockTable { // slot probed. This function uses templates instead of std::function to // minimize the risk of heap-allocated closures being created. template - inline HandleImpl* FindSlot(const UniqueId64x2& hashed_key, MatchFn match_fn, - AbortFn abort_fn, UpdateFn update_fn); + inline HandleImpl* FindSlot(const UniqueId64x2& hashed_key, + const MatchFn& match_fn, const AbortFn& abort_fn, + const UpdateFn& update_fn); // Re-decrement all displacements in probe path starting from beginning // until (not including) the given handle From 9cc0986ae2e652c8d121fcebdd027ac281849e2a Mon Sep 17 00:00:00 2001 From: ywave Date: Mon, 24 Jul 2023 10:19:37 -0700 Subject: [PATCH 002/386] Fix comment in WriteBatchWithIndex::NewIteratorWithBase (#11636) Summary: Remove obsolete comment. Support for WriteBatchWithIndex::NewIteratorWithBase when overwrite_key=false is added in https://github.com/facebook/rocksdb/pull/8135, as you can clearly see in the HISTORY.md. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11636 Reviewed By: jowlyzhang Differential Revision: D47722955 Pulled By: ajkr fbshipit-source-id: 4fa44a309d9708e9f4a1530918a9aaf7114c9032 --- include/rocksdb/utilities/write_batch_with_index.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/rocksdb/utilities/write_batch_with_index.h b/include/rocksdb/utilities/write_batch_with_index.h index d5867567b..ae1c08840 100644 --- a/include/rocksdb/utilities/write_batch_with_index.h +++ b/include/rocksdb/utilities/write_batch_with_index.h @@ -189,9 +189,6 @@ class WriteBatchWithIndex : public WriteBatchBase { // Will create a new Iterator that will use WBWIIterator as a delta and // base_iterator as base. // - // This function is only supported if the WriteBatchWithIndex was - // constructed with overwrite_key=true. - // // The returned iterator should be deleted by the caller. // The base_iterator is now 'owned' by the returned iterator. Deleting the // returned iterator will also delete the base_iterator. From 5c2a063c499911eddd276e5b0b484deb9cc4e875 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 26 Jul 2023 11:31:54 -0700 Subject: [PATCH 003/386] Clarify usage for options `ttl` and `periodic_compaction_seconds` for universal compaction (#11552) Summary: this is stacked on https://github.com/facebook/rocksdb/issues/11550 to further clarify usage of these two options for universal compaction. Similar to FIFO, the two options have the same meaning for universal compaction, which can be confusing to use. For example, for universal compaction, dynamically changing the value of `ttl` has no impact on periodic compactions. Users should dynamically change `periodic_compaction_seconds` instead. From feature matrix (https://fburl.com/daiquery/5s647hwh), there are instances where users set `ttl` to non-zero value and `periodic_compaction_seconds` to 0. For backward compatibility reason, instead of deprecating `ttl`, comments are added to mention that `periodic_compaction_seconds` are preferred. In `SanitizeOptions()`, we update the value of `periodic_compaction_seconds` to take into account value of `ttl`. The logic is documented in relevant option comment. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11552 Test Plan: * updated existing unit test `DBTestUniversalCompaction2.PeriodicCompactionDefault` Reviewed By: ajkr Differential Revision: D47381434 Pulled By: cbi42 fbshipit-source-id: bc41f29f77318bae9a96be84dd89bf5617c7fd57 --- db/column_family.cc | 32 ++++++------ db/db_universal_compaction_test.cc | 14 ++++- include/rocksdb/advanced_options.h | 51 +++++++++++++++---- .../universal_ttl_periodic_compaction.md | 1 + 4 files changed, 71 insertions(+), 27 deletions(-) create mode 100644 unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md diff --git a/db/column_family.cc b/db/column_family.cc index 23349ba58..7ad3d408f 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -383,7 +383,8 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, const uint64_t kAdjustedTtl = 30 * 24 * 60 * 60; if (result.ttl == kDefaultTtl) { if (is_block_based_table) { - // For FIFO, max_open_files is checked in ValidateOptions(). + // FIFO also requires max_open_files=-1, which is checked in + // ValidateOptions(). result.ttl = kAdjustedTtl; } else { result.ttl = 0; @@ -391,20 +392,20 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, } const uint64_t kAdjustedPeriodicCompSecs = 30 * 24 * 60 * 60; - - // Turn on periodic compactions and set them to occur once every 30 days if - // compaction filters are used and periodic_compaction_seconds is set to the - // default value. - if (result.compaction_style != kCompactionStyleFIFO) { + if (result.compaction_style == kCompactionStyleLevel) { if ((result.compaction_filter != nullptr || result.compaction_filter_factory != nullptr) && result.periodic_compaction_seconds == kDefaultPeriodicCompSecs && is_block_based_table) { result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs; } - } else { - if (result.periodic_compaction_seconds != kDefaultPeriodicCompSecs && - result.periodic_compaction_seconds > 0) { + } else if (result.compaction_style == kCompactionStyleUniversal) { + if (result.periodic_compaction_seconds == kDefaultPeriodicCompSecs && + is_block_based_table) { + result.periodic_compaction_seconds = kAdjustedPeriodicCompSecs; + } + } else if (result.compaction_style == kCompactionStyleFIFO) { + if (result.periodic_compaction_seconds != kDefaultPeriodicCompSecs) { ROCKS_LOG_WARN( db_options.info_log.get(), "periodic_compaction_seconds does not support FIFO compaction. You" @@ -412,15 +413,14 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options, } } - // TTL compactions would work similar to Periodic Compactions in Universal in - // most of the cases. So, if ttl is set, execute the periodic compaction - // codepath. - if (result.compaction_style == kCompactionStyleUniversal && result.ttl != 0) { - if (result.periodic_compaction_seconds != 0) { + // For universal compaction, `ttl` and `periodic_compaction_seconds` mean the + // same thing, take the stricter value. + if (result.compaction_style == kCompactionStyleUniversal) { + if (result.periodic_compaction_seconds == 0) { + result.periodic_compaction_seconds = result.ttl; + } else if (result.ttl != 0) { result.periodic_compaction_seconds = std::min(result.ttl, result.periodic_compaction_seconds); - } else { - result.periodic_compaction_seconds = result.ttl; } } diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 84f01b3d1..282a57849 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -2145,7 +2145,19 @@ TEST_F(DBTestUniversalCompaction2, PeriodicCompactionDefault) { options.ttl = 60 * 24 * 60 * 60; options.compaction_filter = nullptr; Reopen(options); - ASSERT_EQ(60 * 24 * 60 * 60, + ASSERT_EQ(30 * 24 * 60 * 60, + dbfull()->GetOptions().periodic_compaction_seconds); + + options.periodic_compaction_seconds = 45 * 24 * 60 * 60; + options.ttl = 50 * 24 * 60 * 60; + Reopen(options); + ASSERT_EQ(45 * 24 * 60 * 60, + dbfull()->GetOptions().periodic_compaction_seconds); + + options.periodic_compaction_seconds = 0; + options.ttl = 50 * 24 * 60 * 60; + Reopen(options); + ASSERT_EQ(50 * 24 * 60 * 60, dbfull()->GetOptions().periodic_compaction_seconds); } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index df7bb4e32..31bea00f3 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -869,6 +869,15 @@ struct AdvancedColumnFamilyOptions { // FIFO: Files with all keys older than TTL will be deleted. TTL is only // supported if option max_open_files is set to -1. // + // Universal: users should only set the option `periodic_compaction_seconds` + // below instead. For backward compatibility, this option has the same + // meaning as `periodic_compaction_seconds`. See more in comments for + // `periodic_compaction_seconds` on the interaction between these two + // options. + // + // This option only supports block based table format for any compaction + // style. + // // unit: seconds. Ex: 1 day = 1 * 24 * 60 * 60 // 0 means disabling. // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to @@ -877,10 +886,32 @@ struct AdvancedColumnFamilyOptions { // Default: 30 days if using block based table. 0 (disable) otherwise. // // Dynamically changeable through SetOptions() API + // Note that dynamically changing this option only works for leveled and FIFO + // compaction. For universal compaction, dynamically changing this option has + // no effect, users should dynamically change `periodic_compaction_seconds` + // instead. uint64_t ttl = 0xfffffffffffffffe; - // Files older than this value will be picked up for compaction, and - // re-written to the same level as they were before. + // This option has different meanings for different compaction styles: + // + // Leveled: files older than `periodic_compaction_seconds` will be picked up + // for compaction and will be re-written to the same level as they were + // before. + // + // FIFO: not supported. Setting this option has no effect for FIFO compaction. + // + // Universal: when there are files older than `periodic_compaction_seconds`, + // rocksdb will try to do as large a compaction as possible including the + // last level. Such compaction is only skipped if only last level is to + // be compacted and no file in last level is older than + // `periodic_compaction_seconds`. See more in + // UniversalCompactionBuilder::PickPeriodicCompaction(). + // For backward compatibility, the effective value of this option takes + // into account the value of option `ttl`. The logic is as follows: + // - both options are set to 30 days if they have the default value. + // - if both options are zero, zero is picked. Otherwise, we take the min + // value among non-zero options values (i.e. takes the stricter limit). + // // One main use of the feature is to make sure a file goes through compaction // filters periodically. Users can also use the feature to clear up SST // files using old format. @@ -890,19 +921,19 @@ struct AdvancedColumnFamilyOptions { // age is based on the file's last modified time (given by the underlying // Env). // - // Supported in leveled and universal compaction. - // In Universal compaction, rocksdb will try to do a full compaction when - // possible, see more in UniversalCompactionBuilder::PickPeriodicCompaction(). + // This option only supports block based table format for any compaction + // style. + // // unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60 // // Values: // 0: Turn off Periodic compactions. - // UINT64_MAX - 1 (i.e 0xfffffffffffffffe): Let RocksDB control this feature - // as needed. For now, RocksDB will change this value to 30 days - // (i.e 30 * 24 * 60 * 60) so that every file goes through the compaction - // process at least once every 30 days if not compacted sooner. + // UINT64_MAX - 1 (0xfffffffffffffffe) is special flag to allow RocksDB to + // pick default. // - // Default: UINT64_MAX - 1 (allow RocksDB to auto-tune) + // Default: 30 days if using block based table format + compaction filter + + // leveled compaction or block based table format + universal compaction. + // 0 (disabled) otherwise. // // Dynamically changeable through SetOptions() API uint64_t periodic_compaction_seconds = 0xfffffffffffffffe; diff --git a/unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md b/unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md new file mode 100644 index 000000000..14fbf4f69 --- /dev/null +++ b/unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md @@ -0,0 +1 @@ +For Universal Compaction users, periodic compaction (option `periodic_compaction_seconds`) will be set to 30 days by default if block based table is used. \ No newline at end of file From 4ea7b796b7d324258c467bfd841d8d3139eaebb9 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 26 Jul 2023 16:25:06 -0700 Subject: [PATCH 004/386] Respect cutoff timestamp during flush (#11599) Summary: Make flush respect the cutoff timestamp `full_history_ts_low` as much as possible for the user-defined timestamps in Memtables only feature. We achieve this by not proceeding with the actual flushing but instead reschedule the same `FlushRequest` so a follow up flush job can continue with the check after some interval. This approach doesn't work well for atomic flush, so this feature currently is not supported in combination with atomic flush. Furthermore, this approach also requires a customized method to get the next immediately bigger user-defined timestamp. So currently it's limited to comparator that use uint64_t as the user-defined timestamp format. This support can be extended when we add such a customized method to `AdvancedColumnFamilyOptions`. For non atomic flush request, at any single time, a column family can only have as many as one FlushRequest for it in the `flush_queue_`. There is deduplication done at `FlushRequest` enqueueing(`SchedulePendingFlush`) and dequeueing time (`PopFirstFromFlushQueue`). We hold the db mutex between when a `FlushRequest` is popped from the queue and the same FlushRequest get rescheduled, so no other `FlushRequest` with a higher `max_memtable_id` can be added to the `flush_queue_` blocking us from re-enqueueing the same `FlushRequest`. Flush is continued nevertheless if there is risk of entering write stall mode had the flush being postponed, e.g. due to accumulation of write buffers, exceeding the `max_write_buffer_number` setting. When this happens, the newest user-defined timestamp in the involved Memtables need to be tracked and we use it to increase the `full_history_ts_low`, which is an inclusive cutoff timestamp for which RocksDB promises to keep all user-defined timestamps equal to and newer than it. Tet plan: ``` ./column_family_test --gtest_filter="*RetainUDT*" ./memtable_list_test --gtest_filter="*WithTimestamp*" ./flush_job_test --gtest_filter="*WithTimestamp*" ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11599 Reviewed By: ajkr Differential Revision: D47561586 Pulled By: jowlyzhang fbshipit-source-id: 9400445f983dd6eac489e9dd0fb5d9b99637fe89 --- db/column_family.cc | 64 ++++++++ db/column_family.h | 6 + db/column_family_test.cc | 203 +++++++++++++++++++++++++ db/db_compaction_test.cc | 6 + db/db_impl/db_impl.h | 11 ++ db/db_impl/db_impl_compaction_flush.cc | 153 +++++++++++++++---- db/db_wal_test.cc | 64 +++++--- db/db_with_timestamp_basic_test.cc | 13 +- db/flush_job.cc | 61 ++++++++ db/flush_job.h | 18 +++ db/flush_job_test.cc | 15 ++ db/memtable.cc | 44 ++++-- db/memtable.h | 27 ++++ db/memtable_list.h | 19 +++ db/memtable_list_test.cc | 90 ++++++++++- db/repair_test.cc | 13 +- include/rocksdb/advanced_options.h | 16 +- 17 files changed, 746 insertions(+), 77 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 7ad3d408f..185ec729c 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1376,6 +1376,33 @@ Status ColumnFamilyData::ValidateOptions( } } + const auto* ucmp = cf_options.comparator; + assert(ucmp); + if (ucmp->timestamp_size() > 0 && + !cf_options.persist_user_defined_timestamps) { + if (db_options.atomic_flush) { + return Status::NotSupported( + "Not persisting user-defined timestamps feature is not supported" + "in combination with atomic flush."); + } + if (db_options.allow_concurrent_memtable_write) { + return Status::NotSupported( + "Not persisting user-defined timestamps feature is not supported" + " in combination with concurrent memtable write."); + } + const char* comparator_name = cf_options.comparator->Name(); + size_t name_size = strlen(comparator_name); + const char* suffix = ".u64ts"; + size_t suffix_size = strlen(suffix); + if (name_size <= suffix_size || + strcmp(comparator_name + name_size - suffix_size, suffix) != 0) { + return Status::NotSupported( + "Not persisting user-defined timestamps" + "feature only support user-defined timestamps formatted as " + "uint64_t."); + } + } + if (cf_options.enable_blob_garbage_collection) { if (cf_options.blob_garbage_collection_age_cutoff < 0.0 || cf_options.blob_garbage_collection_age_cutoff > 1.0) { @@ -1515,6 +1542,43 @@ FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const { return data_dirs_[path_id].get(); } +bool ColumnFamilyData::ShouldPostponeFlushToRetainUDT( + uint64_t max_memtable_id) { + const Comparator* ucmp = user_comparator(); + const size_t ts_sz = ucmp->timestamp_size(); + if (ts_sz == 0 || ioptions_.persist_user_defined_timestamps) { + return false; + } + // If users set the `persist_user_defined_timestamps` flag to false, they + // should also set the `full_history_ts_low` flag to indicate the range of + // user-defined timestamps to retain in memory. Otherwise, we do not + // explicitly postpone flush to retain UDTs. + const std::string& full_history_ts_low = GetFullHistoryTsLow(); + if (full_history_ts_low.empty()) { + return false; + } +#ifndef NDEBUG + Slice last_table_newest_udt; +#endif /* !NDEBUG */ + for (const Slice& table_newest_udt : + imm()->GetTablesNewestUDT(max_memtable_id)) { + assert(table_newest_udt.size() == full_history_ts_low.size()); + assert(last_table_newest_udt.empty() || + ucmp->CompareTimestamp(table_newest_udt, last_table_newest_udt) >= + 0); + // Checking the newest UDT contained in MemTable with ascending ID up to + // `max_memtable_id`. MemTable with bigger ID will have newer UDT, return + // immediately on finding the first MemTable that needs postponing. + if (ucmp->CompareTimestamp(table_newest_udt, full_history_ts_low) >= 0) { + return true; + } +#ifndef NDEBUG + last_table_newest_udt = table_newest_udt; +#endif /* !NDEBUG */ + } + return false; +} + void ColumnFamilyData::RecoverEpochNumbers() { assert(current_); auto* vstorage = current_->storage_info(); diff --git a/db/column_family.h b/db/column_family.h index 05f126ae6..f976c24cc 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -506,6 +506,12 @@ class ColumnFamilyData { return full_history_ts_low_; } + // REQUIRES: DB mutex held. + // Return true if flushing up to MemTables with ID `max_memtable_id` + // should be postponed to retain user-defined timestamps according to the + // user's setting. Called by background flush job. + bool ShouldPostponeFlushToRetainUDT(uint64_t max_memtable_id); + ThreadLocalPtr* TEST_GetLocalSV() { return local_sv_.get(); } WriteBufferManager* write_buffer_mgr() { return write_buffer_manager_; } std::shared_ptr diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 08393c350..c0574ee55 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -17,6 +17,7 @@ #include "options/options_parser.h" #include "port/port.h" #include "port/stack_trace.h" +#include "rocksdb/comparator.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" @@ -63,6 +64,9 @@ class ColumnFamilyTestBase : public testing::Test { db_options_.create_if_missing = true; db_options_.fail_if_options_file_error = true; db_options_.env = env_; + } + + void SetUp() override { EXPECT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_))); } @@ -3380,6 +3384,205 @@ TEST(ColumnFamilyTest, ValidateMemtableKVChecksumOption) { ASSERT_OK(ColumnFamilyData::ValidateOptions(db_options, cf_options)); } +// Tests the flushing behavior of a column family to retain user-defined +// timestamp when `persist_user_defined_timestamp` is false. +class ColumnFamilyRetainUDTTest : public ColumnFamilyTestBase { + public: + ColumnFamilyRetainUDTTest() : ColumnFamilyTestBase(kLatestFormatVersion) {} + + void SetUp() override { + db_options_.allow_concurrent_memtable_write = false; + column_family_options_.comparator = + test::BytewiseComparatorWithU64TsWrapper(); + column_family_options_.persist_user_defined_timestamps = false; + ColumnFamilyTestBase::SetUp(); + } + + Status Put(int cf, const std::string& key, const std::string& ts, + const std::string& value) { + return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(ts), + Slice(value)); + } +}; + +class TestTsComparator : public Comparator { + public: + TestTsComparator() : Comparator(8 /*ts_sz*/) {} + + int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/, + const ROCKSDB_NAMESPACE::Slice& /*b*/) const override { + return 0; + } + const char* Name() const override { return "TestTs"; } + void FindShortestSeparator( + std::string* /*start*/, + const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {} + void FindShortSuccessor(std::string* /*key*/) const override {} +}; + +TEST_F(ColumnFamilyRetainUDTTest, SanityCheck) { + Open(); + ColumnFamilyOptions cf_options; + cf_options.persist_user_defined_timestamps = false; + TestTsComparator test_comparator; + cf_options.comparator = &test_comparator; + ColumnFamilyHandle* handle; + // Not persisting user-defined timestamps feature only supports user-defined + // timestamps formatted as uint64_t. + ASSERT_TRUE( + db_->CreateColumnFamily(cf_options, "pikachu", &handle).IsNotSupported()); + + Destroy(); + // Not persisting user-defined timestamps feature doesn't work in combination + // with atomic flush. + db_options_.atomic_flush = true; + ASSERT_TRUE(TryOpen({"default"}).IsNotSupported()); + + // Not persisting user-defined timestamps feature doesn't work in combination + // with concurrent memtable write. + db_options_.atomic_flush = false; + db_options_.allow_concurrent_memtable_write = true; + ASSERT_TRUE(TryOpen({"default"}).IsNotSupported()); + Close(); +} + +TEST_F(ColumnFamilyRetainUDTTest, FullHistoryTsLowNotSet) { + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto reschedule_count = *static_cast(arg); + ASSERT_EQ(1, reschedule_count); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + Open(); + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(Put(0, "foo", write_ts, "v1")); + // No `full_history_ts_low` explicitly set by user, flush is continued + // without checking if its UDTs expired. + ASSERT_OK(Flush(0)); + + // After flush, `full_history_ts_low` should be automatically advanced to + // the effective cutoff timestamp: write_ts + 1 + std::string cutoff_ts; + PutFixed64(&cutoff_ts, 2); + std::string effective_full_history_ts_low; + ASSERT_OK( + db_->GetFullHistoryTsLow(handles_[0], &effective_full_history_ts_low)); + ASSERT_EQ(cutoff_ts, effective_full_history_ts_low); + Close(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(ColumnFamilyRetainUDTTest, AllKeysExpired) { + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto reschedule_count = *static_cast(arg); + ASSERT_EQ(1, reschedule_count); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + Open(); + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(Put(0, "foo", write_ts, "v1")); + std::string cutoff_ts; + PutFixed64(&cutoff_ts, 3); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], cutoff_ts)); + // All keys expired w.r.t the configured `full_history_ts_low`, flush continue + // without the need for a re-schedule. + ASSERT_OK(Flush(0)); + + // `full_history_ts_low` stays unchanged after flush. + std::string effective_full_history_ts_low; + ASSERT_OK( + db_->GetFullHistoryTsLow(handles_[0], &effective_full_history_ts_low)); + ASSERT_EQ(cutoff_ts, effective_full_history_ts_low); + Close(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} +TEST_F(ColumnFamilyRetainUDTTest, NotAllKeysExpiredFlushToAvoidWriteStall) { + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto reschedule_count = *static_cast(arg); + ASSERT_EQ(1, reschedule_count); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + Open(); + std::string cutoff_ts; + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(Put(0, "foo", write_ts, "v1")); + PutFixed64(&cutoff_ts, 1); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], cutoff_ts)); + ASSERT_OK(db_->SetOptions(handles_[0], {{"max_write_buffer_number", "1"}})); + // Not all keys expired, but flush is continued without a re-schedule because + // of risk of write stall. + ASSERT_OK(Flush(0)); + + // After flush, `full_history_ts_low` should be automatically advanced to + // the effective cutoff timestamp: write_ts + 1 + std::string effective_full_history_ts_low; + ASSERT_OK( + db_->GetFullHistoryTsLow(handles_[0], &effective_full_history_ts_low)); + + cutoff_ts.clear(); + PutFixed64(&cutoff_ts, 2); + ASSERT_EQ(cutoff_ts, effective_full_history_ts_low); + Close(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_F(ColumnFamilyRetainUDTTest, NotAllKeysExpiredFlushRescheduled) { + std::string cutoff_ts; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::AfterRetainUDTReschedule:cb", [&](void* /*arg*/) { + // Increasing full_history_ts_low so all keys expired after the initial + // FlushRequest is rescheduled + cutoff_ts.clear(); + PutFixed64(&cutoff_ts, 3); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], cutoff_ts)); + }); + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundFlush:CheckFlushRequest:cb", [&](void* arg) { + ASSERT_NE(nullptr, arg); + auto reschedule_count = *static_cast(arg); + ASSERT_EQ(2, reschedule_count); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + Open(); + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(Put(0, "foo", write_ts, "v1")); + PutFixed64(&cutoff_ts, 1); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(handles_[0], cutoff_ts)); + // Not all keys expired, and there is no risk of write stall. Flush is + // rescheduled. The actual flush happens after `full_history_ts_low` is + // increased to mark all keys expired. + ASSERT_OK(Flush(0)); + + std::string effective_full_history_ts_low; + ASSERT_OK( + db_->GetFullHistoryTsLow(handles_[0], &effective_full_history_ts_low)); + // `full_history_ts_low` stays unchanged. + ASSERT_EQ(cutoff_ts, effective_full_history_ts_low); + Close(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index a1b4035e0..00a33669b 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -5231,6 +5231,12 @@ TEST_F(DBCompactionTest, CompactRangeDelayedByImmMemTableCount) { } auto manual_compaction_thread = port::Thread([this]() { + // Write something to make the current Memtable non-empty, so an extra + // immutable Memtable will be created upon manual flush requested by + // CompactRange, triggering a write stall mode to be entered because of + // accumulation of write buffers due to manual flush. + Random compact_rnd(301); + ASSERT_OK(Put(Key(0), compact_rnd.RandomString(1024))); CompactRangeOptions cro; cro.allow_write_stall = false; ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 57c43b517..27f539182 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2063,6 +2063,10 @@ class DBImpl : public DB { // flush is considered complete. std::unordered_map cfd_to_max_mem_id_to_persist; + +#ifndef NDEBUG + int reschedule_count = 1; +#endif /* !NDEBUG */ }; void GenerateFlushRequest(const autovector& cfds, @@ -2091,6 +2095,7 @@ class DBImpl : public DB { Env::Priority thread_pri); Status BackgroundFlush(bool* madeProgress, JobContext* job_context, LogBuffer* log_buffer, FlushReason* reason, + bool* flush_rescheduled_to_retain_udt, Env::Priority thread_pri); bool EnoughRoomForCompaction(ColumnFamilyData* cfd, @@ -2103,6 +2108,12 @@ class DBImpl : public DB { std::unique_ptr* token, LogBuffer* log_buffer); + // Return true if the `FlushRequest` can be rescheduled to retain the UDT. + // Only true if there are user-defined timestamps in the involved MemTables + // with newer than cutoff timestamp `full_history_ts_low` and not flushing + // immediately will not cause entering write stall mode. + bool ShouldRescheduleFlushRequestToRetainUDT(const FlushRequest& flush_req); + // Schedule background tasks Status StartPeriodicTaskScheduler(); diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index c64d4ecdb..4e0372e69 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -21,6 +21,7 @@ #include "monitoring/thread_status_util.h" #include "test_util/sync_point.h" #include "util/cast_util.h" +#include "util/coding.h" #include "util/concurrent_task_limiter_impl.h" namespace ROCKSDB_NAMESPACE { @@ -76,6 +77,40 @@ bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force, return false; } +bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT( + const FlushRequest& flush_req) { + mutex_.AssertHeld(); + assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1); + ColumnFamilyData* cfd = flush_req.cfd_to_max_mem_id_to_persist.begin()->first; + uint64_t max_memtable_id = + flush_req.cfd_to_max_mem_id_to_persist.begin()->second; + if (cfd->IsDropped() || + !cfd->ShouldPostponeFlushToRetainUDT(max_memtable_id)) { + return false; + } + // Check if holding on the flush will cause entering write stall mode. + // Write stall entered because of the accumulation of write buffers can be + // alleviated if we continue with the flush instead of postponing it. + const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions(); + + // Taking the status of the active Memtable into consideration so that we are + // not just checking if DB is currently already in write stall mode. + int mem_to_flush = cfd->mem()->ApproximateMemoryUsageFast() >= + cfd->mem()->write_buffer_size() / 2 + ? 1 + : 0; + WriteStallCondition write_stall = + ColumnFamilyData::GetWriteStallConditionAndCause( + cfd->imm()->NumNotFlushed() + mem_to_flush, /*num_l0_files=*/0, + /*num_compaction_needed_bytes=*/0, mutable_cf_options, + *cfd->ioptions()) + .first; + if (write_stall != WriteStallCondition::kNormal) { + return false; + } + return true; +} + IOStatus DBImpl::SyncClosedLogs(JobContext* job_context, VersionEdit* synced_wals) { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start"); @@ -2506,8 +2541,11 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd, // check whether one extra immutable memtable or an extra L0 file would // cause write stalling mode to be entered. It could still enter stall // mode due to pending compaction bytes, but that's less common + // No extra immutable Memtable will be created if the current Memtable is + // empty. + int mem_to_flush = cfd->mem()->IsEmpty() ? 0 : 1; write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause( - cfd->imm()->NumNotFlushed() + 1, + cfd->imm()->NumNotFlushed() + mem_to_flush, vstorage->l0_delay_trigger_count() + 1, vstorage->estimated_compaction_needed_bytes(), mutable_cf_options, *cfd->ioptions()) @@ -2945,6 +2983,7 @@ void DBImpl::UnscheduleFlushCallback(void* arg) { Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context, LogBuffer* log_buffer, FlushReason* reason, + bool* flush_rescheduled_to_retain_udt, Env::Priority thread_pri) { mutex_.AssertHeld(); @@ -2970,12 +3009,43 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context, autovector column_families_not_to_flush; while (!flush_queue_.empty()) { // This cfd is already referenced - auto [flush_reason, cfd_to_max_mem_id_to_persist] = - PopFirstFromFlushQueue(); + FlushRequest flush_req = PopFirstFromFlushQueue(); + FlushReason flush_reason = flush_req.flush_reason; + if (!immutable_db_options_.atomic_flush && + ShouldRescheduleFlushRequestToRetainUDT(flush_req)) { + assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1); + ColumnFamilyData* cfd = + flush_req.cfd_to_max_mem_id_to_persist.begin()->first; + if (cfd->UnrefAndTryDelete()) { + return Status::OK(); + } + ROCKS_LOG_BUFFER(log_buffer, + "FlushRequest for column family %s is re-scheduled to " + "retain user-defined timestamps.", + cfd->GetName().c_str()); + // Reschedule the `FlushRequest` as is without checking dropped column + // family etc. The follow-up job will do the check anyways, so save the + // duplication. Column family is deduplicated by `SchdulePendingFlush` and + // `PopFirstFromFlushQueue` contains at flush request enqueueing and + // dequeueing time. + // This flush request is rescheduled right after it's popped from the + // queue while the db mutex is held, so there should be no other + // FlushRequest for the same column family with higher `max_memtable_id` + // in the queue to block the reschedule from succeeding. +#ifndef NDEBUG + flush_req.reschedule_count += 1; +#endif /* !NDEBUG */ + SchedulePendingFlush(flush_req); + *reason = flush_reason; + *flush_rescheduled_to_retain_udt = true; + return Status::TryAgain(); + } superversion_contexts.clear(); - superversion_contexts.reserve(cfd_to_max_mem_id_to_persist.size()); + superversion_contexts.reserve( + flush_req.cfd_to_max_mem_id_to_persist.size()); - for (const auto& [cfd, max_memtable_id] : cfd_to_max_mem_id_to_persist) { + for (const auto& [cfd, max_memtable_id] : + flush_req.cfd_to_max_mem_id_to_persist) { if (cfd->GetMempurgeUsed()) { // If imm() contains silent memtables (e.g.: because // MemPurge was activated), requesting a flush will @@ -2992,7 +3062,13 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context, bg_flush_args.emplace_back(cfd, max_memtable_id, &(superversion_contexts.back()), flush_reason); } - if (!bg_flush_args.empty()) { + // `MaybeScheduleFlushOrCompaction` schedules as many `BackgroundCallFlush` + // jobs as the number of `FlushRequest` in the `flush_queue_`, a.k.a + // `unscheduled_flushes_`. So it's sufficient to make each `BackgroundFlush` + // handle one `FlushRequest` and each have a Status returned. + if (!bg_flush_args.empty() || !column_families_not_to_flush.empty()) { + TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundFlush:CheckFlushRequest:cb", + const_cast(&flush_req.reschedule_count)); break; } } @@ -3054,11 +3130,20 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) { pending_outputs_inserted_elem(new std::list::iterator( CaptureCurrentFileNumberInPendingOutputs())); FlushReason reason; - - Status s = BackgroundFlush(&made_progress, &job_context, &log_buffer, - &reason, thread_pri); - if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() && - reason != FlushReason::kErrorRecovery) { + bool flush_rescheduled_to_retain_udt = false; + Status s = + BackgroundFlush(&made_progress, &job_context, &log_buffer, &reason, + &flush_rescheduled_to_retain_udt, thread_pri); + if (s.IsTryAgain() && flush_rescheduled_to_retain_udt) { + bg_cv_.SignalAll(); // In case a waiter can proceed despite the error + mutex_.Unlock(); + TEST_SYNC_POINT_CALLBACK("DBImpl::AfterRetainUDTReschedule:cb", nullptr); + immutable_db_options_.clock->SleepForMicroseconds( + 100000); // prevent hot loop + mutex_.Lock(); + } else if (!s.ok() && !s.IsShutdownInProgress() && + !s.IsColumnFamilyDropped() && + reason != FlushReason::kErrorRecovery) { // Wait a little bit before retrying background flush in // case this is an environmental problem and we do not want to // chew up resources for failed flushes for the duration of @@ -3079,29 +3164,33 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) { TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0"); ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); - - // If flush failed, we want to delete all temporary files that we might have - // created. Thus, we force full scan in FindObsoleteFiles() - FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() && - !s.IsColumnFamilyDropped()); - // delete unnecessary files if any, this is done outside the mutex - if (job_context.HaveSomethingToClean() || - job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { - mutex_.Unlock(); - TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound"); - // Have to flush the info logs before bg_flush_scheduled_-- - // because if bg_flush_scheduled_ becomes 0 and the lock is - // released, the deconstructor of DB can kick in and destroy all the - // states of DB so info_log might not be available after that point. - // It also applies to access other states that DB owns. - log_buffer.FlushBufferToLog(); - if (job_context.HaveSomethingToDelete()) { - PurgeObsoleteFiles(job_context); + // There is no need to do these clean up if the flush job is rescheduled + // to retain user-defined timestamps because the job doesn't get to the + // stage of actually flushing the MemTables. + if (!flush_rescheduled_to_retain_udt) { + // If flush failed, we want to delete all temporary files that we might + // have created. Thus, we force full scan in FindObsoleteFiles() + FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() && + !s.IsColumnFamilyDropped()); + // delete unnecessary files if any, this is done outside the mutex + if (job_context.HaveSomethingToClean() || + job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { + mutex_.Unlock(); + TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound"); + // Have to flush the info logs before bg_flush_scheduled_-- + // because if bg_flush_scheduled_ becomes 0 and the lock is + // released, the deconstructor of DB can kick in and destroy all the + // states of DB so info_log might not be available after that point. + // It also applies to access other states that DB owns. + log_buffer.FlushBufferToLog(); + if (job_context.HaveSomethingToDelete()) { + PurgeObsoleteFiles(job_context); + } + job_context.Clean(); + mutex_.Lock(); } - job_context.Clean(); - mutex_.Lock(); + TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp"); } - TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp"); assert(num_running_flushes_ > 0); num_running_flushes_--; diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 232d32972..915ebe0b0 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -324,10 +324,14 @@ class DBWALTestWithTimestamp } Status CreateAndReopenWithCFWithTs(const std::vector& cfs, - const Options& options, + Options& ts_options, bool avoid_flush_during_recovery = false) { - CreateColumnFamilies(cfs, options); - return ReopenColumnFamiliesWithTs(cfs, options, + Options default_options = CurrentOptions(); + default_options.allow_concurrent_memtable_write = + persist_udt_ ? true : false; + DestroyAndReopen(default_options); + CreateColumnFamilies(cfs, ts_options); + return ReopenColumnFamiliesWithTs(cfs, ts_options, avoid_flush_during_recovery); } @@ -336,6 +340,8 @@ class DBWALTestWithTimestamp bool avoid_flush_during_recovery = false) { Options default_options = CurrentOptions(); default_options.create_if_missing = false; + default_options.allow_concurrent_memtable_write = + persist_udt_ ? true : false; default_options.avoid_flush_during_recovery = avoid_flush_during_recovery; ts_options.create_if_missing = false; @@ -370,12 +376,11 @@ class DBWALTestWithTimestamp TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { // Set up the option that enables user defined timestmp size. - std::string ts1 = Timestamp(1, 0); - const size_t kTimestampSize = ts1.size(); - TestComparator test_cmp(kTimestampSize); + std::string ts1; + PutFixed64(&ts1, 1); Options ts_options; ts_options.create_if_missing = true; - ts_options.comparator = &test_cmp; + ts_options.comparator = test::BytewiseComparatorWithU64TsWrapper(); // Test that user-defined timestamps are recovered from WAL regardless of // the value of this flag because UDTs are saved in WAL nonetheless. // We however need to explicitly disable flush during recovery by setting @@ -405,14 +410,16 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { // Write more value versions for key "foo" and "bar" before and after second // reopen. - std::string ts2 = Timestamp(2, 0); + std::string ts2; + PutFixed64(&ts2, 2); ASSERT_OK(Put(1, "bar", ts2, "v2")); ASSERT_OK(Put(1, "foo", ts2, "v3")); ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options, avoid_flush_during_recovery)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), 0U); - std::string ts3 = Timestamp(3, 0); + std::string ts3; + PutFixed64(&ts3, 3); ASSERT_OK(Put(1, "foo", ts3, "v4")); // Do a timestamped read with ts1 after third reopen. @@ -435,11 +442,26 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { } while (ChangeWalOptions()); } +class TestTsSzComparator : public Comparator { + public: + explicit TestTsSzComparator(size_t ts_sz) : Comparator(ts_sz) {} + + int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/, + const ROCKSDB_NAMESPACE::Slice& /*b*/) const override { + return 0; + } + const char* Name() const override { return "TestTsSzComparator.u64ts"; } + void FindShortestSeparator( + std::string* /*start*/, + const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {} + void FindShortSuccessor(std::string* /*key*/) const override {} +}; + TEST_P(DBWALTestWithTimestamp, RecoverInconsistentTimestamp) { // Set up the option that enables user defined timestmp size. - std::string ts = Timestamp(1, 0); - const size_t kTimestampSize = ts.size(); - TestComparator test_cmp(kTimestampSize); + std::string ts; + PutFixed16(&ts, 1); + TestTsSzComparator test_cmp(2); Options ts_options; ts_options.create_if_missing = true; ts_options.comparator = &test_cmp; @@ -452,11 +474,11 @@ TEST_P(DBWALTestWithTimestamp, RecoverInconsistentTimestamp) { // In real use cases, switching to a different user comparator is prohibited // by a sanity check during DB open that does a user comparator name // comparison. This test mocked and bypassed that sanity check because the - // before and after user comparator are both named "TestComparator". This is - // to test the user-defined timestamp recovery logic for WAL files have - // the intended consistency check. + // before and after user comparator are both named "TestTsSzComparator.u64ts". + // This is to test the user-defined timestamp recovery logic for WAL files + // have the intended consistency check. // `HandleWriteBatchTimestampSizeDifference` in udt_util.h has more details. - TestComparator diff_test_cmp(kTimestampSize + 1); + TestTsSzComparator diff_test_cmp(3); ts_options.comparator = &diff_test_cmp; ASSERT_TRUE( ReopenColumnFamiliesWithTs({"pikachu"}, ts_options).IsInvalidArgument()); @@ -464,13 +486,13 @@ TEST_P(DBWALTestWithTimestamp, RecoverInconsistentTimestamp) { TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { // Set up the option that enables user defined timestamp size. - std::string min_ts = Timestamp(0, 0); - std::string write_ts = Timestamp(1, 0); - const size_t kTimestampSize = write_ts.size(); - TestComparator test_cmp(kTimestampSize); + std::string min_ts; + std::string write_ts; + PutFixed64(&min_ts, 0); + PutFixed64(&write_ts, 1); Options ts_options; ts_options.create_if_missing = true; - ts_options.comparator = &test_cmp; + ts_options.comparator = test::BytewiseComparatorWithU64TsWrapper(); ts_options.persist_user_defined_timestamps = persist_udt_; std::string smallest_ukey_without_ts = "baz"; diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index bd82e49e0..9b8ca31d3 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -3289,15 +3289,18 @@ TEST_P(HandleFileBoundariesTest, ConfigurePersistUdt) { options.env = env_; // Write a timestamp that is not the min timestamp to help test the behavior // of flag `persist_user_defined_timestamps`. - std::string write_ts = Timestamp(1, 0); - std::string min_ts = Timestamp(0, 0); + std::string write_ts; + std::string min_ts; + PutFixed64(&write_ts, 1); + PutFixed64(&min_ts, 0); std::string smallest_ukey_without_ts = "bar"; std::string largest_ukey_without_ts = "foo"; - const size_t kTimestampSize = write_ts.size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); bool persist_udt = test::ShouldPersistUDT(GetParam()); options.persist_user_defined_timestamps = persist_udt; + if (!persist_udt) { + options.allow_concurrent_memtable_write = false; + } DestroyAndReopen(options); ASSERT_OK( diff --git a/db/flush_job.cc b/db/flush_job.cc index 3854e967a..bfdd9a059 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -189,6 +189,10 @@ void FlushJob::PickMemTable() { return; } + // Track effective cutoff user-defined timestamp during flush if + // user-defined timestamps can be stripped. + GetEffectiveCutoffUDTForPickedMemTables(); + ReportFlushInputSize(mems_); // entries mems are (implicitly) sorted in ascending order by their created @@ -294,6 +298,10 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta, s = Status::ShutdownInProgress("Database shutdown"); } + if (s.ok()) { + s = MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT(); + } + if (!s.ok()) { cfd_->imm()->RollbackMemtableFlush(mems_, meta_.fd.GetNumber()); } else if (write_manifest_) { @@ -1097,4 +1105,57 @@ std::unique_ptr FlushJob::GetFlushJobInfo() const { return info; } +void FlushJob::GetEffectiveCutoffUDTForPickedMemTables() { + db_mutex_->AssertHeld(); + assert(pick_memtable_called); + const auto* ucmp = cfd_->internal_comparator().user_comparator(); + assert(ucmp); + const size_t ts_sz = ucmp->timestamp_size(); + if (db_options_.atomic_flush || ts_sz == 0 || + cfd_->ioptions()->persist_user_defined_timestamps) { + return; + } + for (MemTable* m : mems_) { + Slice table_newest_udt = m->GetNewestUDT(); + // The picked Memtables should have ascending ID, and should have + // non-decreasing newest user-defined timestamps. + if (!cutoff_udt_.empty()) { + assert(table_newest_udt.size() == cutoff_udt_.size()); + assert(ucmp->CompareTimestamp(table_newest_udt, cutoff_udt_) >= 0); + cutoff_udt_.clear(); + } + cutoff_udt_.assign(table_newest_udt.data(), table_newest_udt.size()); + } +} + +Status FlushJob::MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT() { + db_mutex_->AssertHeld(); + const auto* ucmp = cfd_->user_comparator(); + assert(ucmp); + const std::string& full_history_ts_low = cfd_->GetFullHistoryTsLow(); + // Update full_history_ts_low to right above cutoff udt only if that would + // increase it. + if (cutoff_udt_.empty() || + (!full_history_ts_low.empty() && + ucmp->CompareTimestamp(cutoff_udt_, full_history_ts_low) < 0)) { + return Status::OK(); + } + Slice cutoff_udt_slice = cutoff_udt_; + uint64_t cutoff_udt_ts = 0; + bool format_res = GetFixed64(&cutoff_udt_slice, &cutoff_udt_ts); + assert(format_res); + (void)format_res; + std::string new_full_history_ts_low; + // TODO(yuzhangyu): Add a member to AdvancedColumnFamilyOptions for an + // operation to get the next immediately larger user-defined timestamp to + // expand this feature to other user-defined timestamp formats. + PutFixed64(&new_full_history_ts_low, cutoff_udt_ts + 1); + VersionEdit edit; + edit.SetColumnFamily(cfd_->GetID()); + edit.SetFullHistoryTsLow(new_full_history_ts_low); + return versions_->LogAndApply(cfd_, *cfd_->GetLatestMutableCFOptions(), + ReadOptions(), &edit, db_mutex_, + output_file_directory_); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/flush_job.h b/db/flush_job.h index d3902f0bd..43d10ffe9 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -127,6 +127,20 @@ class FlushJob { Env::IOPriority GetRateLimiterPriorityForWrite(); std::unique_ptr GetFlushJobInfo() const; + // Require db_mutex held. + // Called only when UDT feature is enabled and + // `persist_user_defined_timestamps` flag is false. Because we will refrain + // from flushing as long as there are still UDTs in a memtable that hasn't + // expired w.r.t `full_history_ts_low`. However, flush is continued if there + // is risk of entering write stall mode. In that case, we need + // to track the effective cutoff timestamp below which all the udts are + // removed because of flush, and use it to increase `full_history_ts_low` if + // the effective cutoff timestamp is newer. See + // `MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT` for details. + void GetEffectiveCutoffUDTForPickedMemTables(); + + Status MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT(); + const std::string& dbname_; const std::string db_id_; const std::string db_session_id_; @@ -195,6 +209,10 @@ class FlushJob { // db mutex const SeqnoToTimeMapping& db_impl_seqno_time_mapping_; SeqnoToTimeMapping seqno_to_time_mapping_; + + // Keeps track of the newest user-defined timestamp for this flush job if + // `persist_user_defined_timestamps` flag is false. + std::string cutoff_udt_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index f2915ed39..2e6c4d426 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -654,6 +654,10 @@ class FlushJobTimestampTest installed_file_meta->smallest.Encode()); ASSERT_EQ(expected_largest.Encode(), installed_file_meta->largest.Encode()); } + void CheckFullHistoryTsLow(ColumnFamilyData* cfd, + const std::string& expected_full_history_ts_low) { + ASSERT_EQ(expected_full_history_ts_low, cfd->GetFullHistoryTsLow()); + } }; TEST_P(FlushJobTimestampTest, AllKeysExpired) { @@ -684,6 +688,7 @@ TEST_P(FlushJobTimestampTest, AllKeysExpired) { EventLogger event_logger(db_options_.info_log.get()); std::string full_history_ts_low; PutFixed64(&full_history_ts_low, std::numeric_limits::max()); + cfd->SetFullHistoryTsLow(full_history_ts_low); FlushJob flush_job( dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(), std::numeric_limits::max() /* memtable_id */, env_options_, @@ -714,6 +719,7 @@ TEST_P(FlushJobTimestampTest, AllKeysExpired) { } InternalKey ikey(key, curr_seq_ - 1, ValueType::kTypeDeletionWithTimestamp); CheckFileMetaData(cfd, ikey, ikey, &fmeta); + CheckFullHistoryTsLow(cfd, full_history_ts_low); } job_context.Clean(); @@ -744,6 +750,7 @@ TEST_P(FlushJobTimestampTest, NoKeyExpired) { EventLogger event_logger(db_options_.info_log.get()); std::string full_history_ts_low; PutFixed64(&full_history_ts_low, 0); + cfd->SetFullHistoryTsLow(full_history_ts_low); FlushJob flush_job( dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(), std::numeric_limits::max() /* memtable_id */, env_options_, @@ -765,6 +772,7 @@ TEST_P(FlushJobTimestampTest, NoKeyExpired) { std::string ukey = test::EncodeInt(0); std::string smallest_key; std::string largest_key; + std::string expected_full_history_ts_low; if (!persist_udt_) { // When `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` flag // is set to false. The user-defined timestamp is stripped from user key @@ -772,14 +780,21 @@ TEST_P(FlushJobTimestampTest, NoKeyExpired) { // timestamp, which is hardcoded to be all zeros for now. smallest_key = ukey + test::EncodeInt(0); largest_key = ukey + test::EncodeInt(0); + // When not all keys have expired and `persist_user_defined_timestamps` is + // false. UDTs will be removed during flush, `full_history_ts_low` should + // be automatically increased to above the effective cutoff UDT in the + // flush. + PutFixed64(&expected_full_history_ts_low, curr_ts_.fetch_add(1)); } else { smallest_key = ukey + test::EncodeInt(curr_ts_.load(std::memory_order_relaxed) - 1); largest_key = ukey + test::EncodeInt(kStartTs); + expected_full_history_ts_low = full_history_ts_low; } InternalKey smallest(smallest_key, curr_seq_ - 1, ValueType::kTypeValue); InternalKey largest(largest_key, kStartSeq, ValueType::kTypeValue); CheckFileMetaData(cfd, smallest, largest, &fmeta); + CheckFullHistoryTsLow(cfd, expected_full_history_ts_low); } job_context.Clean(); ASSERT_TRUE(to_delete.empty()); diff --git a/db/memtable.cc b/db/memtable.cc index dfef13a15..216bb8d6e 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -143,6 +143,10 @@ MemTable::MemTable(const InternalKeyComparator& cmp, new_cache.get()), std::memory_order_relaxed); } + const Comparator* ucmp = cmp.user_comparator(); + assert(ucmp); + ts_sz_ = ucmp->timestamp_size(); + persist_user_defined_timestamps_ = ioptions.persist_user_defined_timestamps; } MemTable::~MemTable() { @@ -357,7 +361,8 @@ class MemTableIterator : public InternalIterator { !mem.GetImmutableMemTableOptions()->inplace_update_support), protection_bytes_per_key_(mem.moptions_.protection_bytes_per_key), status_(Status::OK()), - logger_(mem.moptions_.info_log) { + logger_(mem.moptions_.info_log), + ts_sz_(mem.ts_sz_) { if (use_range_del_table) { iter_ = mem.range_del_table_->GetIterator(arena); } else if (prefix_extractor_ != nullptr && !read_options.total_order_seek && @@ -400,8 +405,7 @@ class MemTableIterator : public InternalIterator { PERF_COUNTER_ADD(seek_on_memtable_count, 1); if (bloom_) { // iterator should only use prefix bloom filter - auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size(); - Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz)); + Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz_)); if (prefix_extractor_->InDomain(user_k_without_ts)) { if (!bloom_->MayContain( prefix_extractor_->Transform(user_k_without_ts))) { @@ -421,8 +425,7 @@ class MemTableIterator : public InternalIterator { PERF_TIMER_GUARD(seek_on_memtable_time); PERF_COUNTER_ADD(seek_on_memtable_count, 1); if (bloom_) { - auto ts_sz = comparator_.comparator.user_comparator()->timestamp_size(); - Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz)); + Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz_)); if (prefix_extractor_->InDomain(user_k_without_ts)) { if (!bloom_->MayContain( prefix_extractor_->Transform(user_k_without_ts))) { @@ -512,6 +515,7 @@ class MemTableIterator : public InternalIterator { uint32_t protection_bytes_per_key_; Status status_; Logger* logger_; + size_t ts_sz_; void VerifyEntryChecksum() { if (protection_bytes_per_key_ > 0 && Valid()) { @@ -625,8 +629,7 @@ Status MemTable::VerifyEncodedEntry(Slice encoded, if (!GetVarint32(&encoded, &ikey_len)) { return Status::Corruption("Unable to parse internal key length"); } - size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); - if (ikey_len < 8 + ts_sz) { + if (ikey_len < 8 + ts_sz_) { return Status::Corruption("Internal key length too short"); } if (ikey_len > encoded.size()) { @@ -725,8 +728,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type, } } - size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); - Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz); + Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz_); if (!allow_concurrent) { // Extract prefix for insert with hint. @@ -776,6 +778,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type, assert(first_seqno_.load() >= earliest_seqno_.load()); } assert(post_process_info == nullptr); + // TODO(yuzhangyu): support updating newest UDT for when `allow_concurrent` + // is true. + MaybeUpdateNewestUDT(key_slice); UpdateFlushState(); } else { bool res = (hint == nullptr) @@ -1286,8 +1291,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, bool found_final_value = false; bool merge_in_progress = s->IsMergeInProgress(); bool may_contain = true; - size_t ts_sz = GetInternalKeyComparator().user_comparator()->timestamp_size(); - Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz); + Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz_); bool bloom_checked = false; if (bloom_filter_) { // when both memtable_whole_key_filtering and prefix_extractor_ are set, @@ -1672,4 +1676,22 @@ uint64_t MemTable::GetMinLogContainingPrepSection() { return min_prep_log_referenced_.load(); } +void MemTable::MaybeUpdateNewestUDT(const Slice& user_key) { + if (ts_sz_ == 0 || persist_user_defined_timestamps_) { + return; + } + const Comparator* ucmp = GetInternalKeyComparator().user_comparator(); + Slice udt = ExtractTimestampFromUserKey(user_key, ts_sz_); + if (newest_udt_.empty() || ucmp->CompareTimestamp(udt, newest_udt_) > 0) { + newest_udt_ = udt; + } +} + +const Slice& MemTable::GetNewestUDT() const { + // This path should not be invoked for MemTables that does not enable the UDT + // in Memtable only feature. + assert(ts_sz_ > 0 && !persist_user_defined_timestamps_); + return newest_udt_; +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/memtable.h b/db/memtable.h index a461d908b..bfe882b0e 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -353,6 +353,10 @@ class MemTable { return data_size_.load(std::memory_order_relaxed); } + size_t write_buffer_size() const { + return write_buffer_size_.load(std::memory_order_relaxed); + } + // Dynamically change the memtable's capacity. If set below the current usage, // the next key added will trigger a flush. Can only increase size when // memtable prefix bloom is disabled, since we can't easily allocate more @@ -527,6 +531,14 @@ class MemTable { } } + // Get the newest user-defined timestamp contained in this MemTable. Check + // `newest_udt_` for what newer means. This method should only be invoked for + // an MemTable that has enabled user-defined timestamp feature and set + // `persist_user_defined_timestamps` to false. The tracked newest UDT will be + // used by flush job in the background to help check the MemTable's + // eligibility for Flush. + const Slice& GetNewestUDT() const; + // Returns Corruption status if verification fails. static Status VerifyEntryChecksum(const char* entry, uint32_t protection_bytes_per_key, @@ -617,6 +629,19 @@ class MemTable { // Flush job info of the current memtable. std::unique_ptr flush_job_info_; + // Size in bytes for the user-defined timestamps. + size_t ts_sz_; + + // Whether to persist user-defined timestamps + bool persist_user_defined_timestamps_; + + // Newest user-defined timestamp contained in this MemTable. For ts1, and ts2 + // if Comparator::CompareTimestamp(ts1, ts2) > 0, ts1 is considered newer than + // ts2. We track this field for a MemTable if its column family has UDT + // feature enabled and the `persist_user_defined_timestamp` flag is false. + // Otherwise, this field just contains an empty Slice. + Slice newest_udt_; + // Updates flush_state_ using ShouldFlushNow() void UpdateFlushState(); @@ -653,6 +678,8 @@ class MemTable { void UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info, const Slice& key, const Slice& value, ValueType type, SequenceNumber s, char* checksum_ptr); + + void MaybeUpdateNewestUDT(const Slice& user_key); }; extern const char* EncodeKey(std::string* scratch, const Slice& target); diff --git a/db/memtable_list.h b/db/memtable_list.h index 1ad28a59e..e95493b6f 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -382,6 +382,25 @@ class MemTableList { return memlist.front()->GetID(); } + // DB mutex held. + // Gets the newest user-defined timestamp for the Memtables in ascending ID + // order, up to the `max_memtable_id`. Used by background flush job + // to check Memtables' eligibility for flush w.r.t retaining UDTs. + std::vector GetTablesNewestUDT(uint64_t max_memtable_id) { + std::vector newest_udts; + auto& memlist = current_->memlist_; + // Iterating through the memlist starting at the end, the vector + // ret is filled with memtables already sorted in increasing MemTable ID. + for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) { + MemTable* m = *it; + if (m->GetID() > max_memtable_id) { + break; + } + newest_udts.push_back(m->GetNewestUDT()); + } + return newest_udts; + } + void AssignAtomicFlushSeq(const SequenceNumber& seq) { const auto& memlist = current_->memlist_; // Scan the memtable list from new to old diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index c63952b12..dfa1dbfc7 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -43,6 +43,9 @@ class MemTableListTest : public testing::Test { // Open DB only with default column family ColumnFamilyOptions cf_options; std::vector cf_descs; + if (udt_enabled_) { + cf_options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + } cf_descs.emplace_back(kDefaultColumnFamilyName, cf_options); Status s = DB::Open(options, dbname, cf_descs, &handles, &db); EXPECT_OK(s); @@ -200,6 +203,9 @@ class MemTableListTest : public testing::Test { nullptr /* prep_tracker */, &mutex, file_meta_ptrs, committed_flush_jobs_info, to_delete, nullptr, &log_buffer); } + + protected: + bool udt_enabled_ = false; }; TEST_F(MemTableListTest, Empty) { @@ -868,7 +874,7 @@ TEST_F(MemTableListTest, FlushPendingTest) { to_delete.clear(); } -TEST_F(MemTableListTest, EmptyAtomicFlusTest) { +TEST_F(MemTableListTest, EmptyAtomicFlushTest) { autovector lists; autovector cf_ids; autovector options_list; @@ -880,7 +886,7 @@ TEST_F(MemTableListTest, EmptyAtomicFlusTest) { ASSERT_TRUE(to_delete.empty()); } -TEST_F(MemTableListTest, AtomicFlusTest) { +TEST_F(MemTableListTest, AtomicFlushTest) { const int num_cfs = 3; const int num_tables_per_cf = 2; SequenceNumber seq = 1; @@ -1028,6 +1034,86 @@ TEST_F(MemTableListTest, AtomicFlusTest) { } } +class MemTableListWithTimestampTest : public MemTableListTest { + public: + MemTableListWithTimestampTest() : MemTableListTest() {} + + void SetUp() override { udt_enabled_ = true; } +}; + +TEST_F(MemTableListWithTimestampTest, GetTableNewestUDT) { + const int num_tables = 3; + const int num_entries = 5; + SequenceNumber seq = 1; + + auto factory = std::make_shared(); + options.memtable_factory = factory; + options.persist_user_defined_timestamps = false; + ImmutableOptions ioptions(options); + const Comparator* ucmp = test::BytewiseComparatorWithU64TsWrapper(); + InternalKeyComparator cmp(ucmp); + WriteBufferManager wb(options.db_write_buffer_size); + + // Create MemTableList + int min_write_buffer_number_to_merge = 1; + int max_write_buffer_number_to_maintain = 4; + int64_t max_write_buffer_size_to_maintain = + 4 * static_cast(options.write_buffer_size); + MemTableList list(min_write_buffer_number_to_merge, + max_write_buffer_number_to_maintain, + max_write_buffer_size_to_maintain); + + // Create some MemTables + uint64_t memtable_id = 0; + std::vector tables; + MutableCFOptions mutable_cf_options(options); + uint64_t current_ts = 0; + autovector to_delete; + std::vector newest_udts; + + std::string key; + std::string write_ts; + for (int i = 0; i < num_tables; i++) { + MemTable* mem = new MemTable(cmp, ioptions, mutable_cf_options, &wb, + kMaxSequenceNumber, 0 /* column_family_id */); + mem->SetID(memtable_id++); + mem->Ref(); + + std::string value; + MergeContext merge_context; + + for (int j = 0; j < num_entries; j++) { + key = "key1"; + write_ts.clear(); + PutFixed64(&write_ts, current_ts); + key.append(write_ts); + ASSERT_OK(mem->Add(++seq, kTypeValue, key, std::to_string(i), + nullptr /* kv_prot_info */)); + current_ts++; + } + + tables.push_back(mem); + list.Add(tables.back(), &to_delete); + newest_udts.push_back(write_ts); + } + + ASSERT_EQ(num_tables, list.NumNotFlushed()); + ASSERT_TRUE(list.IsFlushPending()); + std::vector tables_newest_udts = list.GetTablesNewestUDT(num_tables); + ASSERT_EQ(newest_udts.size(), tables_newest_udts.size()); + for (size_t i = 0; i < tables_newest_udts.size(); i++) { + const Slice& table_newest_udt = tables_newest_udts[i]; + const Slice expected_newest_udt = newest_udts[i]; + ASSERT_EQ(expected_newest_udt, table_newest_udt); + } + + list.current()->Unref(&to_delete); + for (MemTable* m : to_delete) { + delete m; + } + to_delete.clear(); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/repair_test.cc b/db/repair_test.cc index 8cca48424..e8cc40aab 100644 --- a/db/repair_test.cc +++ b/db/repair_test.cc @@ -365,12 +365,15 @@ TEST_P(RepairTestWithTimestamp, UnflushedSst) { Options options = CurrentOptions(); options.env = env_; options.create_if_missing = true; - std::string min_ts = Timestamp(0, 0); - std::string write_ts = Timestamp(1, 0); - const size_t kTimestampSize = write_ts.size(); - TestComparator test_cmp(kTimestampSize); - options.comparator = &test_cmp; + std::string min_ts; + std::string write_ts; + PutFixed64(&min_ts, 0); + PutFixed64(&write_ts, 1); + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); options.persist_user_defined_timestamps = persist_udt; + if (!persist_udt) { + options.allow_concurrent_memtable_write = false; + } options.paranoid_file_checks = paranoid_file_checks; ColumnFamilyOptions cf_options(options); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 31bea00f3..5be134a9d 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -1155,10 +1155,20 @@ struct AdvancedColumnFamilyOptions { // while set this flag to be `false`: user keys in the newly generated SST // files are of the same format as the existing SST files. // + // Currently only user comparator that formats user-defined timesamps as + // uint64_t via using one of the RocksDB provided comparator + // `ComparatorWithU64TsImpl` are supported. + // // When setting this flag to `false`, users should also call // `DB::IncreaseFullHistoryTsLow` to set a cutoff timestamp for flush. RocksDB // refrains from flushing a memtable with data still above - // the cutoff timestamp with best effort. Users can do user-defined + // the cutoff timestamp with best effort. If this cutoff timestamp is not set, + // flushing continues normally. + // NOTE: in order for the cutoff timestamp to work properly, users of this + // feature need to ensure to write to a column family with globally + // non-decreasing user-defined timestamps. + // + // Users can do user-defined // multi-versioned read above the cutoff timestamp. When users try to read // below the cutoff timestamp, an error will be returned. // @@ -1169,6 +1179,10 @@ struct AdvancedColumnFamilyOptions { // downgrade or toggling on / off the user-defined timestamp feature on a // column family. // + // Note that setting this flag to false is not supported in combination with + // atomic flush, or concurrent memtable write enabled by + // `allow_concurrent_memtable_write`. + // // Default: true (user-defined timestamps are persisted) // Not dynamically changeable, change it requires db restart and // only compatible changes are allowed. From c24ef26ca7224261c734e0bb5911d7d2386c17f4 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 26 Jul 2023 20:16:32 -0700 Subject: [PATCH 005/386] Support switching on / off UDT together with in-Memtable-only feature (#11623) Summary: Add support to allow enabling / disabling user-defined timestamps feature for an existing column family in combination with the in-Memtable only feature. To do this, this PR includes: 1) Log the `persist_user_defined_timestamps` option per column family in Manifest to facilitate detecting an attempt to enable / disable UDT. This entry is enforced to be logged in the same VersionEdit as the user comparator name entry. 2) User-defined timestamps related options are validated when re-opening a column family, including user comparator name and the `persist_user_defined_timestamps` flag. These type of settings and settings change are considered valid: a) no user comparator change and no effective `persist_user_defined_timestamp` flag change. b) switch user comparator to enable UDT provided the immediately effective `persist_user_defined_timestamps` flag is false. c) switch user comparator to disable UDT provided that the before-change `persist_user_defined_timestamps` is already false. 3) when an attempt to enable UDT is detected, we mark all its existing SST files as "having no UDT" by marking its `FileMetaData.user_defined_timestamps_persisted` flag to false and handle their file boundaries `FileMetaData.smallest`, `FileMetaData.largest` by padding a min timestamp. 4) while enabling / disabling UDT feature, timestamp size inconsistency in existing WAL logs are handled to make it compatible with the running user comparator. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11623 Test Plan: ``` make all check ./db_with_timestamp_basic_test --gtest-filter="*EnableDisableUDT*" ./db_wal_test --gtest_filter="*EnableDisableUDT*" ``` Reviewed By: ltamasi Differential Revision: D47636862 Pulled By: jowlyzhang fbshipit-source-id: dcd19f67292da3c3cc9584c09ad00331c9ab9322 --- db/db_impl/db_impl.cc | 2 + db/db_impl/db_impl_open.cc | 21 +-- db/db_wal_test.cc | 137 +++++++++--------- db/db_with_timestamp_basic_test.cc | 63 ++++++++ db/flush_job_test.cc | 1 + db/repair.cc | 3 + db/version_edit.cc | 25 ++++ db/version_edit.h | 14 ++ db/version_edit_handler.cc | 35 ++++- db/version_edit_handler.h | 1 + db/version_edit_test.cc | 8 +- db/version_set.cc | 2 + db/version_set_test.cc | 2 + include/rocksdb/advanced_options.h | 3 +- .../test/java/org/rocksdb/RocksDBTest.java | 2 +- .../new_features/enable_disable_udt.md | 1 + util/udt_util.cc | 79 ++++++++++ util/udt_util.h | 31 ++++ util/udt_util_test.cc | 116 +++++++++++++++ 19 files changed, 458 insertions(+), 88 deletions(-) create mode 100644 unreleased_history/new_features/enable_disable_udt.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 1e47e6dd2..4433c310e 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3191,6 +3191,8 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, edit.SetColumnFamily(new_id); edit.SetLogNumber(logfile_number_); edit.SetComparatorName(cf_options.comparator->Name()); + edit.SetPersistUserDefinedTimestamps( + cf_options.persist_user_defined_timestamps); // LogAndApply will both write the creation in MANIFEST and create // ColumnFamilyData object diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 988283381..3d41520c8 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1203,10 +1203,10 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, Status::Corruption("log record too small")); continue; } - // We create a new batch and initialize with a valid prot_info_ to store // the data checksums WriteBatch batch; + std::unique_ptr new_batch; status = WriteBatchInternal::SetContents(&batch, record); if (!status.ok()) { @@ -1215,26 +1215,29 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, const UnorderedMap& record_ts_sz = reader.GetRecordedTimestampSize(); - // TODO(yuzhangyu): update mode to kReconcileInconsistency when user - // comparator can be changed. status = HandleWriteBatchTimestampSizeDifference( &batch, running_ts_sz, record_ts_sz, - TimestampSizeConsistencyMode::kVerifyConsistency); + TimestampSizeConsistencyMode::kReconcileInconsistency, &new_batch); if (!status.ok()) { return status; } + + bool batch_updated = new_batch != nullptr; + WriteBatch* batch_to_use = batch_updated ? new_batch.get() : &batch; TEST_SYNC_POINT_CALLBACK( - "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch", &batch); + "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch", + batch_to_use); TEST_SYNC_POINT_CALLBACK( "DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum", &record_checksum); status = WriteBatchInternal::UpdateProtectionInfo( - &batch, 8 /* bytes_per_key */, &record_checksum); + batch_to_use, 8 /* bytes_per_key */, + batch_updated ? nullptr : &record_checksum); if (!status.ok()) { return status; } - SequenceNumber sequence = WriteBatchInternal::Sequence(&batch); + SequenceNumber sequence = WriteBatchInternal::Sequence(batch_to_use); if (immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) { @@ -1255,7 +1258,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, // and returns true. if (!InvokeWalFilterIfNeededOnWalRecord(wal_number, fname, reporter, status, stop_replay_by_wal_filter, - batch)) { + *batch_to_use)) { continue; } @@ -1266,7 +1269,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, // That's why we set ignore missing column families to true bool has_valid_writes = false; status = WriteBatchInternal::InsertInto( - &batch, column_family_memtables_.get(), &flush_scheduler_, + batch_to_use, column_family_memtables_.get(), &flush_scheduler_, &trim_history_scheduler_, true, wal_number, this, false /* concurrent_memtable_writes */, next_sequence, &has_valid_writes, seq_per_batch_, batch_per_txn_); diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 915ebe0b0..72b6f7c7b 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -318,30 +318,25 @@ class DBWALTestWithTimestamp DBWALTestWithTimestamp() : DBBasicTestWithTimestampBase("db_wal_test_with_timestamp") {} - void SetUp() override { - persist_udt_ = test::ShouldPersistUDT(GetParam()); - DBBasicTestWithTimestampBase::SetUp(); - } - - Status CreateAndReopenWithCFWithTs(const std::vector& cfs, - Options& ts_options, - bool avoid_flush_during_recovery = false) { + Status CreateAndReopenWithTs(const std::vector& cfs, + const Options& ts_options, bool persist_udt, + bool avoid_flush_during_recovery = false) { Options default_options = CurrentOptions(); default_options.allow_concurrent_memtable_write = - persist_udt_ ? true : false; + persist_udt ? true : false; DestroyAndReopen(default_options); CreateColumnFamilies(cfs, ts_options); - return ReopenColumnFamiliesWithTs(cfs, ts_options, + return ReopenColumnFamiliesWithTs(cfs, ts_options, persist_udt, avoid_flush_during_recovery); } Status ReopenColumnFamiliesWithTs(const std::vector& cfs, - Options ts_options, + Options ts_options, bool persist_udt, bool avoid_flush_during_recovery = false) { Options default_options = CurrentOptions(); default_options.create_if_missing = false; default_options.allow_concurrent_memtable_write = - persist_udt_ ? true : false; + persist_udt ? true : false; default_options.avoid_flush_during_recovery = avoid_flush_during_recovery; ts_options.create_if_missing = false; @@ -369,9 +364,6 @@ class DBWALTestWithTimestamp ASSERT_EQ(expected_value, actual_value); ASSERT_EQ(expected_ts, actual_ts); } - - protected: - bool persist_udt_; }; TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { @@ -388,20 +380,21 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { // stripped when the `persist_user_defined_timestamps` flag is false, so that // all written timestamps are available for testing user-defined time travel // read. - ts_options.persist_user_defined_timestamps = persist_udt_; + bool persist_udt = test::ShouldPersistUDT(GetParam()); + ts_options.persist_user_defined_timestamps = persist_udt; bool avoid_flush_during_recovery = true; ReadOptions read_opts; do { Slice ts_slice = ts1; read_opts.timestamp = &ts_slice; - ASSERT_OK(CreateAndReopenWithCFWithTs({"pikachu"}, ts_options, - avoid_flush_during_recovery)); + ASSERT_OK(CreateAndReopenWithTs({"pikachu"}, ts_options, persist_udt, + avoid_flush_during_recovery)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), 0U); ASSERT_OK(Put(1, "foo", ts1, "v1")); ASSERT_OK(Put(1, "baz", ts1, "v5")); - ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options, + ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options, persist_udt, avoid_flush_during_recovery)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), 0U); // Do a timestamped read with ts1 after second reopen. @@ -415,14 +408,19 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { ASSERT_OK(Put(1, "bar", ts2, "v2")); ASSERT_OK(Put(1, "foo", ts2, "v3")); - ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options, + ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options, persist_udt, avoid_flush_during_recovery)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), 0U); std::string ts3; PutFixed64(&ts3, 3); ASSERT_OK(Put(1, "foo", ts3, "v4")); + // All the key value pairs available for read: + // "foo" -> [(ts1, "v1"), (ts2, "v3"), (ts3, "v4")] + // "bar" -> [(ts2, "v2")] + // "baz" -> [(ts1, "v5")] // Do a timestamped read with ts1 after third reopen. + // read_opts.timestamp is set to ts1 for below reads CheckGet(read_opts, 1, "foo", "v1", ts1); std::string value; ASSERT_TRUE(db_->Get(read_opts, handles_[1], "bar", &value).IsNotFound()); @@ -430,60 +428,20 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { // Do a timestamped read with ts2 after third reopen. ts_slice = ts2; + // read_opts.timestamp is set to ts2 for below reads. CheckGet(read_opts, 1, "foo", "v3", ts2); CheckGet(read_opts, 1, "bar", "v2", ts2); CheckGet(read_opts, 1, "baz", "v5", ts1); // Do a timestamped read with ts3 after third reopen. ts_slice = ts3; + // read_opts.timestamp is set to ts3 for below reads. CheckGet(read_opts, 1, "foo", "v4", ts3); CheckGet(read_opts, 1, "bar", "v2", ts2); CheckGet(read_opts, 1, "baz", "v5", ts1); } while (ChangeWalOptions()); } -class TestTsSzComparator : public Comparator { - public: - explicit TestTsSzComparator(size_t ts_sz) : Comparator(ts_sz) {} - - int Compare(const ROCKSDB_NAMESPACE::Slice& /*a*/, - const ROCKSDB_NAMESPACE::Slice& /*b*/) const override { - return 0; - } - const char* Name() const override { return "TestTsSzComparator.u64ts"; } - void FindShortestSeparator( - std::string* /*start*/, - const ROCKSDB_NAMESPACE::Slice& /*limit*/) const override {} - void FindShortSuccessor(std::string* /*key*/) const override {} -}; - -TEST_P(DBWALTestWithTimestamp, RecoverInconsistentTimestamp) { - // Set up the option that enables user defined timestmp size. - std::string ts; - PutFixed16(&ts, 1); - TestTsSzComparator test_cmp(2); - Options ts_options; - ts_options.create_if_missing = true; - ts_options.comparator = &test_cmp; - ts_options.persist_user_defined_timestamps = persist_udt_; - - ASSERT_OK(CreateAndReopenWithCFWithTs({"pikachu"}, ts_options)); - ASSERT_OK(Put(1, "foo", ts, "v1")); - ASSERT_OK(Put(1, "baz", ts, "v5")); - - // In real use cases, switching to a different user comparator is prohibited - // by a sanity check during DB open that does a user comparator name - // comparison. This test mocked and bypassed that sanity check because the - // before and after user comparator are both named "TestTsSzComparator.u64ts". - // This is to test the user-defined timestamp recovery logic for WAL files - // have the intended consistency check. - // `HandleWriteBatchTimestampSizeDifference` in udt_util.h has more details. - TestTsSzComparator diff_test_cmp(3); - ts_options.comparator = &diff_test_cmp; - ASSERT_TRUE( - ReopenColumnFamiliesWithTs({"pikachu"}, ts_options).IsInvalidArgument()); -} - TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { // Set up the option that enables user defined timestamp size. std::string min_ts; @@ -493,18 +451,19 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { Options ts_options; ts_options.create_if_missing = true; ts_options.comparator = test::BytewiseComparatorWithU64TsWrapper(); - ts_options.persist_user_defined_timestamps = persist_udt_; + bool persist_udt = test::ShouldPersistUDT(GetParam()); + ts_options.persist_user_defined_timestamps = persist_udt; std::string smallest_ukey_without_ts = "baz"; std::string largest_ukey_without_ts = "foo"; - ASSERT_OK(CreateAndReopenWithCFWithTs({"pikachu"}, ts_options)); + ASSERT_OK(CreateAndReopenWithTs({"pikachu"}, ts_options, persist_udt)); // No flush, no sst files, because of no data. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), 0U); ASSERT_OK(Put(1, largest_ukey_without_ts, write_ts, "v1")); ASSERT_OK(Put(1, smallest_ukey_without_ts, write_ts, "v5")); - ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options)); + ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, ts_options, persist_udt)); // Memtable recovered from WAL flushed because `avoid_flush_during_recovery` // defaults to false, created one L0 file. ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "pikachu"), 1U); @@ -515,7 +474,7 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { // L0 only has one SST file. ASSERT_EQ(level_to_files[0].size(), 1); auto meta = level_to_files[0][0]; - if (persist_udt_) { + if (persist_udt) { ASSERT_EQ(smallest_ukey_without_ts + write_ts, meta.smallest.user_key()); ASSERT_EQ(largest_ukey_without_ts + write_ts, meta.largest.user_key()); } else { @@ -526,11 +485,55 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { // Param 0: test mode for the user-defined timestamp feature INSTANTIATE_TEST_CASE_P( - DBWALTestWithTimestamp, DBWALTestWithTimestamp, + P, DBWALTestWithTimestamp, ::testing::Values( test::UserDefinedTimestampTestMode::kStripUserDefinedTimestamp, test::UserDefinedTimestampTestMode::kNormal)); +TEST_F(DBWALTestWithTimestamp, EnableDisableUDT) { + Options options; + options.create_if_missing = true; + options.comparator = BytewiseComparator(); + bool avoid_flush_during_recovery = true; + ASSERT_OK(CreateAndReopenWithTs({"pikachu"}, options, true /* persist_udt */, + avoid_flush_during_recovery)); + + ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "foo", "v1")); + ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "baz", "v5")); + + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + options.persist_user_defined_timestamps = false; + // Test handle timestamp size inconsistency in WAL when enabling user-defined + // timestamps. + ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, options, + false /* persist_udt */, + avoid_flush_during_recovery)); + + std::string ts; + PutFixed64(&ts, 0); + Slice ts_slice = ts; + ReadOptions read_opts; + read_opts.timestamp = &ts_slice; + // Pre-existing entries are treated as if they have the min timestamp. + CheckGet(read_opts, 1, "foo", "v1", ts); + CheckGet(read_opts, 1, "baz", "v5", ts); + ts.clear(); + PutFixed64(&ts, 1); + ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "foo", ts, "v2")); + ASSERT_OK(db_->Put(WriteOptions(), handles_[1], "baz", ts, "v6")); + CheckGet(read_opts, 1, "foo", "v2", ts); + CheckGet(read_opts, 1, "baz", "v6", ts); + + options.comparator = BytewiseComparator(); + // Open the column family again with the UDT feature disabled. Test handle + // timestamp size inconsistency in WAL when disabling user-defined timestamps + ASSERT_OK(ReopenColumnFamiliesWithTs({"pikachu"}, options, + true /* persist_udt */, + avoid_flush_during_recovery)); + ASSERT_EQ("v2", Get(1, "foo")); + ASSERT_EQ("v6", Get(1, "baz")); +} + TEST_F(DBWALTest, RecoverWithTableHandle) { do { Options options = CurrentOptions(); diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 9b8ca31d3..202c4c345 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -3346,6 +3346,69 @@ INSTANTIATE_TEST_CASE_P( test::UserDefinedTimestampTestMode::kStripUserDefinedTimestamp, test::UserDefinedTimestampTestMode::kNormal)); +TEST_F(DBBasicTestWithTimestamp, EnableDisableUDT) { + Options options = CurrentOptions(); + options.env = env_; + // Create a column family without user-defined timestamps. + options.comparator = BytewiseComparator(); + options.persist_user_defined_timestamps = true; + DestroyAndReopen(options); + + // Create one SST file, its user keys have no user-defined timestamps. + ASSERT_OK(db_->Put(WriteOptions(), "foo", "val1")); + ASSERT_OK(Flush(0)); + Close(); + + // Reopen the existing column family and enable user-defined timestamps + // feature for it. + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + options.persist_user_defined_timestamps = false; + options.allow_concurrent_memtable_write = false; + Reopen(options); + + std::string value; + ASSERT_TRUE(db_->Get(ReadOptions(), "foo", &value).IsInvalidArgument()); + std::string read_ts; + PutFixed64(&read_ts, 0); + ReadOptions ropts; + Slice read_ts_slice = read_ts; + ropts.timestamp = &read_ts_slice; + std::string key_ts; + // Entries in pre-existing SST files are treated as if they have minimum + // user-defined timestamps. + ASSERT_OK(db_->Get(ropts, "foo", &value, &key_ts)); + ASSERT_EQ("val1", value); + ASSERT_EQ(read_ts, key_ts); + + // Do timestamped read / write. + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(db_->Put(WriteOptions(), "foo", write_ts, "val2")); + read_ts.clear(); + PutFixed64(&read_ts, 1); + ASSERT_OK(db_->Get(ropts, "foo", &value, &key_ts)); + ASSERT_EQ("val2", value); + ASSERT_EQ(write_ts, key_ts); + // The user keys in this SST file don't have user-defined timestamps either, + // because `persist_user_defined_timestamps` flag is set to false. + ASSERT_OK(Flush(0)); + Close(); + + // Reopen the existing column family while disabling user-defined timestamps. + options.comparator = BytewiseComparator(); + Reopen(options); + + ASSERT_TRUE(db_->Get(ropts, "foo", &value).IsInvalidArgument()); + ASSERT_OK(db_->Get(ReadOptions(), "foo", &value)); + ASSERT_EQ("val2", value); + + // Continue to write / read the column family without user-defined timestamps. + ASSERT_OK(db_->Put(WriteOptions(), "foo", "val3")); + ASSERT_OK(db_->Get(ReadOptions(), "foo", &value)); + ASSERT_EQ("val3", value); + Close(); +} + TEST_F(DBBasicTestWithTimestamp, GCPreserveRangeTombstoneWhenNoOrSmallFullHistoryLow) { Options options = CurrentOptions(); diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 2e6c4d426..9fd9c13fa 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -70,6 +70,7 @@ class FlushJobTestBase : public testing::Test { new_cf.AddColumnFamily(column_family_names_[i]); new_cf.SetColumnFamily(cf_id++); new_cf.SetComparatorName(ucmp_->Name()); + new_cf.SetPersistUserDefinedTimestamps(persist_udt_); new_cf.SetLogNumber(0); new_cf.SetNextFile(2); new_cf.SetLastSequence(last_seq++); diff --git a/db/repair.cc b/db/repair.cc index 58ada3aeb..4b28ec2cd 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -157,6 +157,7 @@ class Repairer { VersionEdit edit; edit.SetComparatorName(opts.comparator->Name()); + edit.SetPersistUserDefinedTimestamps(opts.persist_user_defined_timestamps); edit.SetLogNumber(0); edit.SetColumnFamily(cf_id); ColumnFamilyData* cfd; @@ -720,6 +721,8 @@ class Repairer { // recovered epoch numbers VersionEdit edit; edit.SetComparatorName(cfd->user_comparator()->Name()); + edit.SetPersistUserDefinedTimestamps( + cfd->ioptions()->persist_user_defined_timestamps); edit.SetLogNumber(0); edit.SetNextFile(next_file_number_); edit.SetColumnFamily(cfd->GetID()); diff --git a/db/version_edit.cc b/db/version_edit.cc index f5783eacd..6459c2ff8 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -100,6 +100,7 @@ bool VersionEdit::EncodeTo(std::string* dst, PutLengthPrefixedSlice(dst, db_id_); } if (has_comparator_) { + assert(has_persist_user_defined_timestamps_); PutVarint32(dst, kComparator); PutLengthPrefixedSlice(dst, comparator_); } @@ -308,6 +309,15 @@ bool VersionEdit::EncodeTo(std::string* dst, PutVarint32(dst, kFullHistoryTsLow); PutLengthPrefixedSlice(dst, full_history_ts_low_); } + + if (HasPersistUserDefinedTimestamps()) { + // persist_user_defined_timestamps flag should be logged in the same + // VersionEdit as the user comparator name. + assert(has_comparator_); + PutVarint32(dst, kPersistUserDefinedTimestamps); + char p = static_cast(persist_user_defined_timestamps_); + PutLengthPrefixedSlice(dst, Slice(&p, 1)); + } return true; } @@ -777,6 +787,17 @@ Status VersionEdit::DecodeFrom(const Slice& src) { } break; + case kPersistUserDefinedTimestamps: + if (!GetLengthPrefixedSlice(&input, &str)) { + msg = "persist_user_defined_timestamps"; + } else if (str.size() != 1) { + msg = "persist_user_defined_timestamps field wrong size"; + } else { + persist_user_defined_timestamps_ = (str[0] == 1); + has_persist_user_defined_timestamps_ = true; + } + break; + default: if (tag & kTagSafeIgnoreMask) { // Tag from future which can be safely ignored. @@ -819,6 +840,10 @@ std::string VersionEdit::DebugString(bool hex_key) const { r.append("\n Comparator: "); r.append(comparator_); } + if (has_persist_user_defined_timestamps_) { + r.append("\n PersistUserDefinedTimestamps: "); + r.append(persist_user_defined_timestamps_ ? "true" : "false"); + } if (has_log_number_) { r.append("\n LogNumber: "); AppendNumberTo(&r, log_number_); diff --git a/db/version_edit.h b/db/version_edit.h index cedccb3a2..a13d8e65f 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -71,6 +71,7 @@ enum Tag : uint32_t { kFullHistoryTsLow, kWalAddition2, kWalDeletion2, + kPersistUserDefinedTimestamps, }; enum NewFileCustomTag : uint32_t { @@ -397,6 +398,17 @@ class VersionEdit { bool HasComparatorName() const { return has_comparator_; } const std::string& GetComparatorName() const { return comparator_; } + void SetPersistUserDefinedTimestamps(bool persist_user_defined_timestamps) { + has_persist_user_defined_timestamps_ = true; + persist_user_defined_timestamps_ = persist_user_defined_timestamps; + } + bool HasPersistUserDefinedTimestamps() const { + return has_persist_user_defined_timestamps_; + } + bool GetPersistUserDefinedTimestamps() const { + return persist_user_defined_timestamps_; + } + void SetLogNumber(uint64_t num) { has_log_number_ = true; log_number_ = num; @@ -697,6 +709,7 @@ class VersionEdit { bool has_max_column_family_ = false; bool has_min_log_number_to_keep_ = false; bool has_last_sequence_ = false; + bool has_persist_user_defined_timestamps_ = false; // Compaction cursors for round-robin compaction policy CompactCursors compact_cursors_; @@ -724,6 +737,7 @@ class VersionEdit { uint32_t remaining_entries_ = 0; std::string full_history_ts_low_; + bool persist_user_defined_timestamps_ = true; }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/version_edit_handler.cc b/db/version_edit_handler.cc index 732723996..7f8e30390 100644 --- a/db/version_edit_handler.cc +++ b/db/version_edit_handler.cc @@ -17,6 +17,7 @@ #include "db/version_edit.h" #include "logging/logging.h" #include "monitoring/persistent_stats_history.h" +#include "util/udt_util.h" namespace ROCKSDB_NAMESPACE { @@ -613,15 +614,21 @@ Status VersionEditHandler::ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, version_edit_params_.SetLogNumber(edit.log_number_); } } - if (edit.has_comparator_ && - edit.comparator_ != cfd->user_comparator()->Name()) { - if (!cf_to_cmp_names_) { - s = Status::InvalidArgument( - cfd->user_comparator()->Name(), - "does not match existing comparator " + edit.comparator_); - } else { + if (edit.has_comparator_) { + bool mark_sst_files_has_no_udt = false; + // If `persist_user_defined_timestamps` flag is recorded in manifest, it + // is guaranteed to be in the same VersionEdit as comparator. Otherwise, + // it's not recorded and it should have default value true. + s = ValidateUserDefinedTimestampsOptions( + cfd->user_comparator(), edit.comparator_, + cfd->ioptions()->persist_user_defined_timestamps, + edit.persist_user_defined_timestamps_, &mark_sst_files_has_no_udt); + if (!s.ok() && cf_to_cmp_names_) { cf_to_cmp_names_->emplace(cfd->GetID(), edit.comparator_); } + if (mark_sst_files_has_no_udt) { + cfds_to_mark_no_udt_.insert(cfd->GetID()); + } } if (edit.HasFullHistoryTsLow()) { const std::string& new_ts = edit.GetFullHistoryTsLow(); @@ -673,10 +680,17 @@ Status VersionEditHandler::MaybeHandleFileBoundariesForNewFiles( VersionEdit::NewFiles& new_files = edit.GetMutableNewFiles(); assert(!new_files.empty()); + // If true, enabling user-defined timestamp is detected for this column + // family. All its existing SST files need to have the file boundaries handled + // and their `persist_user_defined_timestamps` flag set to false regardless of + // its existing value. + bool mark_existing_ssts_with_no_udt = + cfds_to_mark_no_udt_.find(cfd->GetID()) != cfds_to_mark_no_udt_.end(); bool file_boundaries_need_handling = false; for (auto& new_file : new_files) { FileMetaData& meta = new_file.second; - if (meta.user_defined_timestamps_persisted) { + if (meta.user_defined_timestamps_persisted && + !mark_existing_ssts_with_no_udt) { // `FileMetaData.user_defined_timestamps_persisted` field is the value of // the flag `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` // at the time when the SST file was created. As a result, all added SST @@ -689,6 +703,11 @@ Status VersionEditHandler::MaybeHandleFileBoundariesForNewFiles( break; } file_boundaries_need_handling = true; + assert(!meta.user_defined_timestamps_persisted || + mark_existing_ssts_with_no_udt); + if (mark_existing_ssts_with_no_udt) { + meta.user_defined_timestamps_persisted = false; + } std::string smallest_buf; std::string largest_buf; PadInternalKeyWithMinTimestamp(&smallest_buf, meta.smallest.Encode(), diff --git a/db/version_edit_handler.h b/db/version_edit_handler.h index 54454cf70..dd55a4de9 100644 --- a/db/version_edit_handler.h +++ b/db/version_edit_handler.h @@ -202,6 +202,7 @@ class VersionEditHandler : public VersionEditHandlerBase { bool initialized_; std::unique_ptr> cf_to_cmp_names_; EpochNumberRequirement epoch_number_requirement_; + std::unordered_set cfds_to_mark_no_udt_; private: Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index f3473b476..c47389901 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -58,6 +58,7 @@ TEST_F(VersionEditTest, EncodeDecode) { } edit.SetComparatorName("foo"); + edit.SetPersistUserDefinedTimestamps(true); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); @@ -95,6 +96,7 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); + edit.SetPersistUserDefinedTimestamps(false); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); @@ -125,6 +127,7 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { ASSERT_FALSE(new_files[1].second.user_defined_timestamps_persisted); ASSERT_TRUE(new_files[2].second.user_defined_timestamps_persisted); ASSERT_TRUE(new_files[3].second.user_defined_timestamps_persisted); + ASSERT_FALSE(parsed.GetPersistUserDefinedTimestamps()); } TEST_F(VersionEditTest, EncodeDecodeNewFile4HandleFileBoundary) { @@ -195,6 +198,7 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); + edit.SetPersistUserDefinedTimestamps(true); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); @@ -230,6 +234,7 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); ASSERT_EQ(1u, parsed.GetDeletedFiles().size()); + ASSERT_TRUE(parsed.GetPersistUserDefinedTimestamps()); } TEST_F(VersionEditTest, NewFile4NotSupportedField) { @@ -240,9 +245,10 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) { kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, 300 /* epoch_number */, kUnknownFileChecksum, - kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, true); + kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0, 0, false); edit.SetComparatorName("foo"); + edit.SetPersistUserDefinedTimestamps(false); edit.SetLogNumber(kBig + 100); edit.SetNextFile(kBig + 200); edit.SetLastSequence(kBig + 1000); diff --git a/db/version_set.cc b/db/version_set.cc index e95e98f79..32dd4b8d9 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -6448,6 +6448,8 @@ Status VersionSet::WriteCurrentStateToManifest( } edit.SetComparatorName( cfd->internal_comparator().user_comparator()->Name()); + edit.SetPersistUserDefinedTimestamps( + cfd->ioptions()->persist_user_defined_timestamps); std::string record; if (!edit.EncodeTo(&record)) { return Status::Corruption("Unable to Encode VersionEdit:" + diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 1d5971c59..135b2d64f 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1349,6 +1349,8 @@ class VersionSetTestBase { new_cf.SetColumnFamily(new_id); new_cf.SetLogNumber(0); new_cf.SetComparatorName(cf_options.comparator->Name()); + new_cf.SetPersistUserDefinedTimestamps( + cf_options.persist_user_defined_timestamps); Status s; mutex_.Lock(); s = versions_->LogAndApply(/*column_family_data=*/nullptr, diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 5be134a9d..ab394977d 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -1176,8 +1176,7 @@ struct AdvancedColumnFamilyOptions { // persisted to WAL even if this flag is set to `false`. The benefit of this // is that user-defined timestamps can be recovered with the caveat that users // should flush all memtables so there is no active WAL files before doing a - // downgrade or toggling on / off the user-defined timestamp feature on a - // column family. + // downgrade. // // Note that setting this flag to false is not supported in combination with // atomic flush, or concurrent memtable write enabled by diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 3da65a848..3f6ebc71e 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1434,7 +1434,7 @@ public void getLiveFiles() throws RocksDBException { try (final RocksDB db = RocksDB.open(options, dbPath)) { final RocksDB.LiveFiles livefiles = db.getLiveFiles(true); assertThat(livefiles).isNotNull(); - assertThat(livefiles.manifestFileSize).isEqualTo(66); + assertThat(livefiles.manifestFileSize).isEqualTo(70); assertThat(livefiles.files.size()).isEqualTo(3); assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT"); assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005"); diff --git a/unreleased_history/new_features/enable_disable_udt.md b/unreleased_history/new_features/enable_disable_udt.md new file mode 100644 index 000000000..d4a7ce850 --- /dev/null +++ b/unreleased_history/new_features/enable_disable_udt.md @@ -0,0 +1 @@ +Add support to allow enabling / disabling user-defined timestamps feature for an existing column family in combination with the in-Memtable only feature. \ No newline at end of file diff --git a/util/udt_util.cc b/util/udt_util.cc index 39422fa96..9380f4560 100644 --- a/util/udt_util.cc +++ b/util/udt_util.cc @@ -100,6 +100,40 @@ Status CheckWriteBatchTimestampSizeConsistency( } return Status::OK(); } + +enum class ToggleUDT { + kUnchanged, + kEnableUDT, + kDisableUDT, + kInvalidChange, +}; + +ToggleUDT CompareComparator(const Comparator* new_comparator, + const std::string& old_comparator_name) { + static const char* kUDTSuffix = ".u64ts"; + static const Slice kSuffixSlice = kUDTSuffix; + static const size_t kSuffixSize = 6; + size_t ts_sz = new_comparator->timestamp_size(); + (void)ts_sz; + Slice new_ucmp_name(new_comparator->Name()); + Slice old_ucmp_name(old_comparator_name); + if (new_ucmp_name.compare(old_ucmp_name) == 0) { + return ToggleUDT::kUnchanged; + } + if (new_ucmp_name.size() == old_ucmp_name.size() + kSuffixSize && + new_ucmp_name.starts_with(old_ucmp_name) && + new_ucmp_name.ends_with(kSuffixSlice)) { + assert(ts_sz == 8); + return ToggleUDT::kEnableUDT; + } + if (old_ucmp_name.size() == new_ucmp_name.size() + kSuffixSize && + old_ucmp_name.starts_with(new_ucmp_name) && + old_ucmp_name.ends_with(kSuffixSlice)) { + assert(ts_sz == 0); + return ToggleUDT::kDisableUDT; + } + return ToggleUDT::kInvalidChange; +} } // namespace TimestampRecoveryHandler::TimestampRecoveryHandler( @@ -261,4 +295,49 @@ Status HandleWriteBatchTimestampSizeDifference( } return Status::OK(); } + +Status ValidateUserDefinedTimestampsOptions( + const Comparator* new_comparator, const std::string& old_comparator_name, + bool new_persist_udt, bool old_persist_udt, + bool* mark_sst_files_has_no_udt) { + size_t ts_sz = new_comparator->timestamp_size(); + ToggleUDT res = CompareComparator(new_comparator, old_comparator_name); + switch (res) { + case ToggleUDT::kUnchanged: + if (old_persist_udt == new_persist_udt) { + return Status::OK(); + } + if (ts_sz == 0) { + return Status::OK(); + } + return Status::InvalidArgument( + "Cannot toggle the persist_user_defined_timestamps flag for a column " + "family with user-defined timestamps feature enabled."); + case ToggleUDT::kEnableUDT: + if (!new_persist_udt) { + *mark_sst_files_has_no_udt = true; + return Status::OK(); + } + return Status::InvalidArgument( + "Cannot open a column family and enable user-defined timestamps " + "feature without setting persist_user_defined_timestamps flag to " + "false."); + case ToggleUDT::kDisableUDT: + if (!old_persist_udt) { + return Status::OK(); + } + return Status::InvalidArgument( + "Cannot open a column family and disable user-defined timestamps " + "feature if its existing persist_user_defined_timestamps flag is not " + "false."); + case ToggleUDT::kInvalidChange: + return Status::InvalidArgument( + new_comparator->Name(), + "does not match existing comparator " + old_comparator_name); + default: + break; + } + return Status::InvalidArgument( + "Unsupported user defined timestamps settings change."); +} } // namespace ROCKSDB_NAMESPACE diff --git a/util/udt_util.h b/util/udt_util.h index fc980a04b..4bc837739 100644 --- a/util/udt_util.h +++ b/util/udt_util.h @@ -215,4 +215,35 @@ Status HandleWriteBatchTimestampSizeDifference( const UnorderedMap& record_ts_sz, TimestampSizeConsistencyMode check_mode, std::unique_ptr* new_batch = nullptr); + +// This util function is used when opening an existing column family and +// processing its VersionEdit. It does a sanity check for the column family's +// old user comparator and the persist_user_defined_timestamps flag as recorded +// in the VersionEdit, against its new settings from the column family's +// ImmutableCFOptions. +// +// Valid settings change include: +// 1) no user comparator change and no effective persist_user_defined_timestamp +// flag change. +// 2) switch user comparator to enable user-defined timestamps feature provided +// the immediately effective persist_user_defined_timestamps flag is false. +// 3) switch user comparator to disable user-defined timestamps feature provided +// that the before-change persist_user_defined_timestamps is already false. +// +// Switch user comparator to disable/enable UDT is only sanity checked by a user +// comparator name comparison. The full check includes enforcing the new user +// comparator ranks user keys exactly the same as the old user comparator and +// only add / remove the user-defined timestamp comparison. We don't have ways +// to strictly enforce this so currently only the RocksDB builtin comparator +// wrapper `ComparatorWithU64TsImpl` is supported to enable / disable +// user-defined timestamps. It formats user-defined timestamps as uint64_t. +// +// When the settings indicate a legit change to enable user-defined timestamps +// feature on a column family, `mark_sst_files_has_no_udt` will be set to true +// to indicate marking all existing SST files has no user-defined timestamps +// when re-writing the manifest. +Status ValidateUserDefinedTimestampsOptions( + const Comparator* new_comparator, const std::string& old_comparator_name, + bool new_persist_udt, bool old_persist_udt, + bool* mark_sst_files_has_no_udt); } // namespace ROCKSDB_NAMESPACE diff --git a/util/udt_util_test.cc b/util/udt_util_test.cc index 63f35992a..9fcbc9bc7 100644 --- a/util/udt_util_test.cc +++ b/util/udt_util_test.cc @@ -321,6 +321,122 @@ TEST_F(HandleTimestampSizeDifferenceTest, UnrecoverableInconsistency) { TimestampSizeConsistencyMode::kReconcileInconsistency) .IsInvalidArgument()); } + +TEST(ValidateUserDefinedTimestampsOptionsTest, EnableUserDefinedTimestamps) { + bool mark_sst_files = false; + const Comparator* new_comparator = test::BytewiseComparatorWithU64TsWrapper(); + const Comparator* old_comparator = BytewiseComparator(); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + false /*new_persist_udt*/, true /*old_persist_udt*/, &mark_sst_files)); + ASSERT_TRUE(mark_sst_files); + + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + false /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); + ASSERT_TRUE(mark_sst_files); +} + +TEST(ValidateUserDefinedTimestampsOptionsTest, + EnableUserDefinedTimestampsNewPersistUDTFlagIncorrect) { + bool mark_sst_files = false; + const Comparator* new_comparator = test::BytewiseComparatorWithU64TsWrapper(); + const Comparator* old_comparator = BytewiseComparator(); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + true /*new_persist_udt*/, true /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + true /*new_persist_udt*/, false /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); +} + +TEST(ValidateUserDefinedTimestampsOptionsTest, DisableUserDefinedTimestamps) { + bool mark_sst_files = false; + const Comparator* new_comparator = BytewiseComparator(); + const Comparator* old_comparator = test::BytewiseComparatorWithU64TsWrapper(); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + false /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + true /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); +} + +TEST(ValidateUserDefinedTimestampsOptionsTest, + DisableUserDefinedTimestampsOldPersistUDTFlagIncorrect) { + bool mark_sst_files = false; + const Comparator* new_comparator = BytewiseComparator(); + const Comparator* old_comparator = test::BytewiseComparatorWithU64TsWrapper(); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + false /*new_persist_udt*/, true /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + true /*new_persist_udt*/, true /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); +} + +TEST(ValidateUserDefinedTimestampsOptionsTest, UserComparatorUnchanged) { + bool mark_sst_files = false; + const Comparator* ucmp_without_ts = BytewiseComparator(); + const Comparator* ucmp_with_ts = test::BytewiseComparatorWithU64TsWrapper(); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + ucmp_without_ts, std::string(ucmp_without_ts->Name()), + false /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + ucmp_without_ts, std::string(ucmp_without_ts->Name()), + true /*new_persist_udt*/, true /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + ucmp_without_ts, std::string(ucmp_without_ts->Name()), + true /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + ucmp_without_ts, std::string(ucmp_without_ts->Name()), + false /*new_persist_udt*/, true /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + ucmp_with_ts, std::string(ucmp_with_ts->Name()), true /*new_persist_udt*/, + true /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + ASSERT_OK(ValidateUserDefinedTimestampsOptions( + ucmp_with_ts, std::string(ucmp_with_ts->Name()), + false /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); + ASSERT_FALSE(mark_sst_files); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + ucmp_with_ts, std::string(ucmp_with_ts->Name()), + true /*new_persist_udt*/, false /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + ucmp_with_ts, std::string(ucmp_with_ts->Name()), + false /*new_persist_udt*/, true /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); +} + +TEST(ValidateUserDefinedTimestampsOptionsTest, InvalidUserComparatorChange) { + bool mark_sst_files = false; + const Comparator* new_comparator = BytewiseComparator(); + const Comparator* old_comparator = ReverseBytewiseComparator(); + ASSERT_TRUE(ValidateUserDefinedTimestampsOptions( + new_comparator, std::string(old_comparator->Name()), + false /*new_persist_udt*/, true /*old_persist_udt*/, + &mark_sst_files) + .IsInvalidArgument()); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 63a5125a5220d953bf504daf33694f038403cc7c Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Thu, 27 Jul 2023 12:02:03 -0700 Subject: [PATCH 006/386] Fix use_after_free bug when underlying FS enables kFSBuffer (#11645) Summary: Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest Since it's an experimental feature, added a hack for now to fix the bug. Planning to make public API change to remove const from the callback as it doesn't make sense to use const. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11645 Test Plan: tested locally Reviewed By: ltamasi Differential Revision: D47819907 Pulled By: akankshamahajan15 fbshipit-source-id: 1faf5ef795bf27e2b3a60960374d91274931df8d --- unreleased_history/bug_fixes/fsbuffer_bug_fix.md | 1 + util/async_file_reader.cc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 unreleased_history/bug_fixes/fsbuffer_bug_fix.md diff --git a/unreleased_history/bug_fixes/fsbuffer_bug_fix.md b/unreleased_history/bug_fixes/fsbuffer_bug_fix.md new file mode 100644 index 000000000..bec91bc4f --- /dev/null +++ b/unreleased_history/bug_fixes/fsbuffer_bug_fix.md @@ -0,0 +1 @@ +Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest. Right now it's an experimental feature. diff --git a/util/async_file_reader.cc b/util/async_file_reader.cc index 080c1ae96..9ce13b99f 100644 --- a/util/async_file_reader.cc +++ b/util/async_file_reader.cc @@ -26,6 +26,11 @@ bool AsyncFileReader::MultiReadAsyncImpl(ReadAwaiter* awaiter) { FSReadRequest* read_req = static_cast(cb_arg); read_req->status = req.status; read_req->result = req.result; + if (req.fs_scratch != nullptr) { + // TODO akanksha: Revisit to remove the const in the callback. + FSReadRequest& req_tmp = const_cast(req); + read_req->fs_scratch = std::move(req_tmp.fs_scratch); + } }, &awaiter->read_reqs_[i], &awaiter->io_handle_[i], &awaiter->del_fn_[i], /*aligned_buf=*/nullptr); From 5dd8c114bb3316686ef8fb99d1e456f4cc193766 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 27 Jul 2023 15:31:22 -0700 Subject: [PATCH 007/386] Add a UDT comparator for ReverseBytewiseComparator to object library (#11647) Summary: Add a built-in comparator that supports uint64_t style user-defined timestamps for ReverseBytewiseComparator. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11647 Test Plan: Added a test wrapper for retrieving this comparator from registry and used it in this test: `./udt_util_test` Reviewed By: ltamasi Differential Revision: D47848303 Pulled By: jowlyzhang fbshipit-source-id: 5af5534a8c2d9195997d0308c8e194c1c797548c --- test_util/testutil.cc | 10 ++++++++++ test_util/testutil.h | 3 +++ util/comparator.cc | 30 +++++++++++++++++++++++------- util/udt_util_test.cc | 5 +++-- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/test_util/testutil.cc b/test_util/testutil.cc index d958cb0cd..b128b797a 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -150,6 +150,16 @@ const Comparator* BytewiseComparatorWithU64TsWrapper() { return user_comparator; } +const Comparator* ReverseBytewiseComparatorWithU64TsWrapper() { + ConfigOptions config_options; + const Comparator* user_comparator = nullptr; + Status s = Comparator::CreateFromString( + config_options, "rocksdb.ReverseBytewiseComparator.u64ts", + &user_comparator); + s.PermitUncheckedError(); + return user_comparator; +} + void CorruptKeyType(InternalKey* ikey) { std::string keystr = ikey->Encode().ToString(); keystr[keystr.size() - 8] = kTypeLogData; diff --git a/test_util/testutil.h b/test_util/testutil.h index c40fcdcb0..eca1ff794 100644 --- a/test_util/testutil.h +++ b/test_util/testutil.h @@ -132,6 +132,9 @@ extern const Comparator* Uint64Comparator(); // A wrapper api for getting the ComparatorWithU64Ts extern const Comparator* BytewiseComparatorWithU64TsWrapper(); +// A wrapper api for getting the ComparatorWithU64Ts +extern const Comparator* ReverseBytewiseComparatorWithU64TsWrapper(); + class StringSink : public FSWritableFile { public: std::string contents_; diff --git a/util/comparator.cc b/util/comparator.cc index 19fd47387..d0ff1f7aa 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -316,24 +316,37 @@ const Comparator* BytewiseComparatorWithU64Ts() { return &comp_with_u64_ts; } +const Comparator* ReverseBytewiseComparatorWithU64Ts() { + STATIC_AVOID_DESTRUCTION( + ComparatorWithU64TsImpl, comp_with_u64_ts); + return &comp_with_u64_ts; +} + static int RegisterBuiltinComparators(ObjectLibrary& library, const std::string& /*arg*/) { library.AddFactory( BytewiseComparatorImpl::kClassName(), [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return BytewiseComparator(); }); + std::unique_ptr* /*guard*/, + std::string* /*errmsg*/) { return BytewiseComparator(); }); library.AddFactory( ReverseBytewiseComparatorImpl::kClassName(), [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return ReverseBytewiseComparator(); }); + std::unique_ptr* /*guard*/, + std::string* /*errmsg*/) { return ReverseBytewiseComparator(); }); library.AddFactory( ComparatorWithU64TsImpl::kClassName(), [](const std::string& /*uri*/, - std::unique_ptr* /*guard */, - std::string* /* errmsg */) { return BytewiseComparatorWithU64Ts(); }); - return 3; + std::unique_ptr* /*guard*/, + std::string* /*errmsg*/) { return BytewiseComparatorWithU64Ts(); }); + library.AddFactory( + ComparatorWithU64TsImpl::kClassName(), + [](const std::string& /*uri*/, + std::unique_ptr* /*guard*/, + std::string* /*errmsg*/) { + return ReverseBytewiseComparatorWithU64Ts(); + }); + return 4; } Status Comparator::CreateFromString(const ConfigOptions& config_options, @@ -357,6 +370,9 @@ Status Comparator::CreateFromString(const ConfigOptions& config_options, } else if (id == ComparatorWithU64TsImpl::kClassName()) { *result = BytewiseComparatorWithU64Ts(); + } else if (id == ComparatorWithU64TsImpl< + ReverseBytewiseComparatorImpl>::kClassName()) { + *result = ReverseBytewiseComparatorWithU64Ts(); } else if (value.empty()) { // No Id and no options. Clear the object *result = nullptr; diff --git a/util/udt_util_test.cc b/util/udt_util_test.cc index 9fcbc9bc7..47e1edf34 100644 --- a/util/udt_util_test.cc +++ b/util/udt_util_test.cc @@ -356,8 +356,9 @@ TEST(ValidateUserDefinedTimestampsOptionsTest, TEST(ValidateUserDefinedTimestampsOptionsTest, DisableUserDefinedTimestamps) { bool mark_sst_files = false; - const Comparator* new_comparator = BytewiseComparator(); - const Comparator* old_comparator = test::BytewiseComparatorWithU64TsWrapper(); + const Comparator* new_comparator = ReverseBytewiseComparator(); + const Comparator* old_comparator = + test::ReverseBytewiseComparatorWithU64TsWrapper(); ASSERT_OK(ValidateUserDefinedTimestampsOptions( new_comparator, std::string(old_comparator->Name()), false /*new_persist_udt*/, false /*old_persist_udt*/, &mark_sst_files)); From 6a0f63763383e969d7bde6fd2b47c0d0084bcb4a Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 28 Jul 2023 09:47:31 -0700 Subject: [PATCH 008/386] Compare the number of input keys and processed keys for compactions (#11571) Summary: ... to improve data integrity validation during compaction. A new option `compaction_verify_record_count` is introduced for this verification and is enabled by default. One exception when the verification is not done is when a compaction filter returns kRemoveAndSkipUntil which can cause CompactionIterator to seek until some key and hence not able to keep track of the number of keys processed. For expected number of input keys, we sum over the number of total keys - number of range tombstones across compaction input files (`CompactionJob::UpdateCompactionStats()`). Table properties are consulted if `FileMetaData` is not initialized for some input file. Since table properties for all input files were also constructed during `DBImpl::NotifyOnCompactionBegin()`, `Compaction::GetTableProperties()` is introduced to reduce duplicated code. For actual number of keys processed, each subcompaction will record its number of keys processed to `sub_compact->compaction_job_stats.num_input_records` and aggregated when all subcompactions finish (`CompactionJob::AggregateCompactionStats()`). In the case when some subcompaction encountered kRemoveAndSkipUntil from compaction filter and does not have accurate count, it propagates this information through `sub_compact->compaction_job_stats.has_num_input_records`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11571 Test Plan: * Add a new unit test `DBCompactionTest.VerifyRecordCount` for the corruption case. * All other unit tests for non-corrupted case. * Ran crash test for a few hours: `python3 ./tools/db_crashtest.py whitebox --simple` Reviewed By: ajkr Differential Revision: D47131965 Pulled By: cbi42 fbshipit-source-id: cc8e94565dd526c4347e9d3843ecf32f6727af92 --- db/builder.cc | 4 +- db/compaction/compaction.cc | 29 +++++ db/compaction/compaction.h | 19 ++- db/compaction/compaction_iterator.cc | 13 +- db/compaction/compaction_iterator.h | 23 +++- db/compaction/compaction_iterator_test.cc | 4 +- db/compaction/compaction_job.cc | 121 ++++++++++++++---- db/compaction/compaction_job.h | 19 ++- db/compaction/compaction_job_test.cc | 3 +- db/db_compaction_test.cc | 37 ++++++ db/db_impl/db_impl.h | 2 +- db/db_impl/db_impl_compaction_flush.cc | 25 +--- db/flush_job.cc | 1 + db/version_edit.h | 3 +- include/rocksdb/compaction_job_stats.h | 3 + include/rocksdb/options.h | 20 ++- options/db_options.cc | 7 + options/db_options.h | 1 + options/options_helper.cc | 2 + options/options_settable_test.cc | 1 + table/mock_table.cc | 8 +- .../compaction_verify_input_count.md | 1 + util/compaction_job_stats_impl.cc | 2 + 23 files changed, 270 insertions(+), 78 deletions(-) create mode 100644 unreleased_history/new_features/compaction_verify_input_count.md diff --git a/db/builder.cc b/db/builder.cc index 84d2396c3..a3a6bc47e 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -203,6 +203,7 @@ Status BuildTable( blob_file_builder.get(), ioptions.allow_data_in_errors, ioptions.enforce_single_del_contracts, /*manual_compaction_canceled=*/kManualCompactionCanceledFalse, + true /* must_count_input_entries */, /*compaction=*/nullptr, compaction_filter.get(), /*shutting_down=*/nullptr, db_options.info_log, full_history_ts_low); @@ -286,8 +287,9 @@ Status BuildTable( TEST_SYNC_POINT("BuildTable:BeforeFinishBuildTable"); const bool empty = builder->IsEmpty(); if (num_input_entries != nullptr) { + assert(c_iter.HasNumInputEntryScanned()); *num_input_entries = - c_iter.num_input_entry_scanned() + num_unfragmented_tombstones; + c_iter.NumInputEntryScanned() + num_unfragmented_tombstones; } if (!s.ok() || empty) { builder->Abandon(); diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index ceed9d104..e28257d65 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -13,6 +13,7 @@ #include #include "db/column_family.h" +#include "logging/logging.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/sst_partitioner.h" #include "test_util/sync_point.h" @@ -203,6 +204,34 @@ bool Compaction::IsFullCompaction( return num_files_in_compaction == total_num_files; } +const TablePropertiesCollection& Compaction::GetTableProperties() { + if (!input_table_properties_initialized_) { + const ReadOptions read_options(Env::IOActivity::kCompaction); + for (size_t i = 0; i < num_input_levels(); ++i) { + for (const FileMetaData* fmd : *(this->inputs(i))) { + std::shared_ptr tp; + std::string file_name = + TableFileName(immutable_options_.cf_paths, fmd->fd.GetNumber(), + fmd->fd.GetPathId()); + Status s = input_version_->GetTableProperties(read_options, &tp, fmd, + &file_name); + if (s.ok()) { + table_properties_[file_name] = tp; + } else { + ROCKS_LOG_ERROR(immutable_options_.info_log, + "Unable to load table properties for file %" PRIu64 + " --- %s\n", + fmd->fd.GetNumber(), s.ToString().c_str()); + } + } + } + + input_table_properties_initialized_ = true; + }; + + return table_properties_; +} + Compaction::Compaction( VersionStorageInfo* vstorage, const ImmutableOptions& _immutable_options, const MutableCFOptions& _mutable_cf_options, diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h index 1bd406bc9..fcb0f3003 100644 --- a/db/compaction/compaction.h +++ b/db/compaction/compaction.h @@ -326,12 +326,16 @@ class Compaction { int output_level, VersionStorageInfo* vstorage, const std::vector& inputs); - TablePropertiesCollection GetOutputTableProperties() const { - return output_table_properties_; - } - - void SetOutputTableProperties(TablePropertiesCollection tp) { - output_table_properties_ = std::move(tp); + // If called before a compaction finishes, will return + // table properties of all compaction input files. + // If called after a compaction finished, will return + // table properties of all compaction input and output files. + const TablePropertiesCollection& GetTableProperties(); + + void SetOutputTableProperties( + const std::string& file_name, + const std::shared_ptr& tp) { + table_properties_[file_name] = tp; } Slice GetSmallestUserKey() const { return smallest_user_key_; } @@ -518,8 +522,9 @@ class Compaction { // Does input compression match the output compression? bool InputCompressionMatchesOutput() const; + bool input_table_properties_initialized_ = false; // table properties of output files - TablePropertiesCollection output_table_properties_; + TablePropertiesCollection table_properties_; // smallest user keys in compaction // includes timestamp if user-defined timestamp is enabled. diff --git a/db/compaction/compaction_iterator.cc b/db/compaction/compaction_iterator.cc index 5be7b565a..1c3ca5e1e 100644 --- a/db/compaction/compaction_iterator.cc +++ b/db/compaction/compaction_iterator.cc @@ -31,7 +31,8 @@ CompactionIterator::CompactionIterator( BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, bool enforce_single_del_contracts, const std::atomic& manual_compaction_canceled, - const Compaction* compaction, const CompactionFilter* compaction_filter, + bool must_count_input_entries, const Compaction* compaction, + const CompactionFilter* compaction_filter, const std::atomic* shutting_down, const std::shared_ptr info_log, const std::string* full_history_ts_low, @@ -45,8 +46,9 @@ CompactionIterator::CompactionIterator( manual_compaction_canceled, std::unique_ptr( compaction ? new RealCompaction(compaction) : nullptr), - compaction_filter, shutting_down, info_log, full_history_ts_low, - preserve_time_min_seqno, preclude_last_level_min_seqno) {} + must_count_input_entries, compaction_filter, shutting_down, info_log, + full_history_ts_low, preserve_time_min_seqno, + preclude_last_level_min_seqno) {} CompactionIterator::CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, @@ -58,15 +60,14 @@ CompactionIterator::CompactionIterator( BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, bool enforce_single_del_contracts, const std::atomic& manual_compaction_canceled, - std::unique_ptr compaction, + std::unique_ptr compaction, bool must_count_input_entries, const CompactionFilter* compaction_filter, const std::atomic* shutting_down, const std::shared_ptr info_log, const std::string* full_history_ts_low, const SequenceNumber preserve_time_min_seqno, const SequenceNumber preclude_last_level_min_seqno) - : input_(input, cmp, - !compaction || compaction->DoesInputReferenceBlobFiles()), + : input_(input, cmp, must_count_input_entries), cmp_(cmp), merge_helper_(merge_helper), snapshots_(snapshots), diff --git a/db/compaction/compaction_iterator.h b/db/compaction/compaction_iterator.h index ea2dc062e..15193b587 100644 --- a/db/compaction/compaction_iterator.h +++ b/db/compaction/compaction_iterator.h @@ -38,15 +38,18 @@ class SequenceIterWrapper : public InternalIterator { bool Valid() const override { return inner_iter_->Valid(); } Status status() const override { return inner_iter_->status(); } void Next() override { - num_itered_++; + if (!inner_iter_->IsDeleteRangeSentinelKey()) { + num_itered_++; + } inner_iter_->Next(); } void Seek(const Slice& target) override { if (!need_count_entries_) { + has_num_itered_ = false; inner_iter_->Seek(target); } else { - // For flush cases, we need to count total number of entries, so we - // do Next() rather than Seek(). + // Need to count total number of entries, + // so we do Next() rather than Seek(). while (inner_iter_->Valid() && icmp_.Compare(inner_iter_->key(), target) < 0) { Next(); @@ -62,7 +65,8 @@ class SequenceIterWrapper : public InternalIterator { void SeekForPrev(const Slice& /* target */) override { assert(false); } void SeekToLast() override { assert(false); } - uint64_t num_itered() const { return num_itered_; } + uint64_t NumItered() const { return num_itered_; } + bool HasNumItered() const { return has_num_itered_; } bool IsDeleteRangeSentinelKey() const override { assert(Valid()); return inner_iter_->IsDeleteRangeSentinelKey(); @@ -73,6 +77,7 @@ class SequenceIterWrapper : public InternalIterator { InternalIterator* inner_iter_; // not owned uint64_t num_itered_ = 0; bool need_count_entries_; + bool has_num_itered_ = true; }; class CompactionIterator { @@ -189,6 +194,10 @@ class CompactionIterator { const Compaction* compaction_; }; + // @param must_count_input_entries if true, `NumInputEntryScanned()` will + // return the number of input keys scanned. If false, `NumInputEntryScanned()` + // will return this number if no Seek was called on `input`. User should call + // `HasNumInputEntryScanned()` first in this case. CompactionIterator( InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper, SequenceNumber last_sequence, std::vector* snapshots, @@ -199,7 +208,7 @@ class CompactionIterator { BlobFileBuilder* blob_file_builder, bool allow_data_in_errors, bool enforce_single_del_contracts, const std::atomic& manual_compaction_canceled, - const Compaction* compaction = nullptr, + bool must_count_input_entries, const Compaction* compaction = nullptr, const CompactionFilter* compaction_filter = nullptr, const std::atomic* shutting_down = nullptr, const std::shared_ptr info_log = nullptr, @@ -219,6 +228,7 @@ class CompactionIterator { bool enforce_single_del_contracts, const std::atomic& manual_compaction_canceled, std::unique_ptr compaction, + bool must_count_input_entries, const CompactionFilter* compaction_filter = nullptr, const std::atomic* shutting_down = nullptr, const std::shared_ptr info_log = nullptr, @@ -253,7 +263,8 @@ class CompactionIterator { return current_user_key_; } const CompactionIterationStats& iter_stats() const { return iter_stats_; } - uint64_t num_input_entry_scanned() const { return input_.num_itered(); } + bool HasNumInputEntryScanned() const { return input_.HasNumItered(); } + uint64_t NumInputEntryScanned() const { return input_.NumItered(); } // If the current key should be placed on penultimate level, only valid if // per_key_placement is supported bool output_to_penultimate_level() const { diff --git a/db/compaction/compaction_iterator_test.cc b/db/compaction/compaction_iterator_test.cc index 81362d792..20428a586 100644 --- a/db/compaction/compaction_iterator_test.cc +++ b/db/compaction/compaction_iterator_test.cc @@ -293,8 +293,8 @@ class CompactionIteratorTest : public testing::TestWithParam { nullptr /* blob_file_builder */, true /*allow_data_in_errors*/, true /*enforce_single_del_contracts*/, /*manual_compaction_canceled=*/kManualCompactionCanceledFalse_, - std::move(compaction), filter, &shutting_down_, /*info_log=*/nullptr, - full_history_ts_low)); + std::move(compaction), /*must_count_input_entries=*/false, filter, + &shutting_down_, /*info_log=*/nullptr, full_history_ts_low)); } void AddSnapshot(SequenceNumber snapshot, diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index d609e0154..8ea806816 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -796,15 +796,46 @@ Status CompactionJob::Run() { auto fn = TableFileName(state.compaction->immutable_options()->cf_paths, output.meta.fd.GetNumber(), output.meta.fd.GetPathId()); - tp[fn] = output.table_properties; + compact_->compaction->SetOutputTableProperties(fn, + output.table_properties); } } - compact_->compaction->SetOutputTableProperties(std::move(tp)); - // Finish up all book-keeping to unify the subcompaction results + // Finish up all bookkeeping to unify the subcompaction results. compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_); - UpdateCompactionStats(); - + uint64_t num_input_range_del = 0; + bool ok = UpdateCompactionStats(&num_input_range_del); + // (Sub)compactions returned ok, do sanity check on the number of input keys. + if (status.ok() && ok && compaction_job_stats_->has_num_input_records) { + size_t ts_sz = compact_->compaction->column_family_data() + ->user_comparator() + ->timestamp_size(); + // When trim_ts_ is non-empty, CompactionIterator takes + // HistoryTrimmingIterator as input iterator and sees a trimmed view of + // input keys. So the number of keys it processed is not suitable for + // verification here. + // TODO: support verification when trim_ts_ is non-empty. + if (!(ts_sz > 0 && !trim_ts_.empty()) && + db_options_.compaction_verify_record_count) { + assert(compaction_stats_.stats.num_input_records > 0); + // TODO: verify the number of range deletion entries. + uint64_t expected = + compaction_stats_.stats.num_input_records - num_input_range_del; + uint64_t actual = compaction_job_stats_->num_input_records; + if (expected != actual) { + std::string msg = + "Total number of input records: " + std::to_string(expected) + + ", but processed " + std::to_string(actual) + " records."; + ROCKS_LOG_WARN( + db_options_.info_log, "[%s] [JOB %d] Compaction %s", + compact_->compaction->column_family_data()->GetName().c_str(), + job_context_->job_id, msg.c_str()); + status = Status::Corruption( + "Compaction number of input keys does not match number of keys " + "processed."); + } + } + } RecordCompactionIOStats(); LogFlush(db_options_.info_log); TEST_SYNC_POINT("CompactionJob::Run():End"); @@ -1252,6 +1283,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { /*expect_valid_internal_key=*/true, range_del_agg.get(), blob_file_builder.get(), db_options_.allow_data_in_errors, db_options_.enforce_single_del_contracts, manual_compaction_canceled_, + sub_compact->compaction + ->DoesInputReferenceBlobFiles() /* must_count_input_entries */, sub_compact->compaction, compaction_filter, shutting_down_, db_options_.info_log, full_history_ts_low, preserve_time_min_seqno_, preclude_last_level_min_seqno_); @@ -1316,8 +1349,25 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { if (c_iter->status().IsManualCompactionPaused()) { break; } + +#ifndef NDEBUG + bool stop = false; + TEST_SYNC_POINT_CALLBACK("CompactionJob::ProcessKeyValueCompaction()::stop", + static_cast(&stop)); + if (stop) { + break; + } +#endif // NDEBUG } + // This number may not be accurate when CompactionIterator was created + // with `must_count_input_entries=false`. + assert(!sub_compact->compaction->DoesInputReferenceBlobFiles() || + c_iter->HasNumInputEntryScanned()); + sub_compact->compaction_job_stats.has_num_input_records = + c_iter->HasNumInputEntryScanned(); + sub_compact->compaction_job_stats.num_input_records = + c_iter->NumInputEntryScanned(); sub_compact->compaction_job_stats.num_blobs_read = c_iter_stats.num_blobs_read; sub_compact->compaction_job_stats.total_blob_bytes_read = @@ -1903,24 +1953,53 @@ void CopyPrefix(const Slice& src, size_t prefix_length, std::string* dst) { } } // namespace - -void CompactionJob::UpdateCompactionStats() { +bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) { assert(compact_); Compaction* compaction = compact_->compaction; compaction_stats_.stats.num_input_files_in_non_output_levels = 0; compaction_stats_.stats.num_input_files_in_output_level = 0; + + bool has_error = false; + const ReadOptions read_options(Env::IOActivity::kCompaction); + const auto& input_table_properties = compaction->GetTableProperties(); for (int input_level = 0; input_level < static_cast(compaction->num_input_levels()); ++input_level) { + size_t num_input_files = compaction->num_input_files(input_level); + uint64_t* bytes_read; if (compaction->level(input_level) != compaction->output_level()) { - UpdateCompactionInputStatsHelper( - &compaction_stats_.stats.num_input_files_in_non_output_levels, - &compaction_stats_.stats.bytes_read_non_output_levels, input_level); + compaction_stats_.stats.num_input_files_in_non_output_levels += + static_cast(num_input_files); + bytes_read = &compaction_stats_.stats.bytes_read_non_output_levels; } else { - UpdateCompactionInputStatsHelper( - &compaction_stats_.stats.num_input_files_in_output_level, - &compaction_stats_.stats.bytes_read_output_level, input_level); + compaction_stats_.stats.num_input_files_in_output_level += + static_cast(num_input_files); + bytes_read = &compaction_stats_.stats.bytes_read_output_level; + } + for (size_t i = 0; i < num_input_files; ++i) { + const FileMetaData* file_meta = compaction->input(input_level, i); + *bytes_read += file_meta->fd.GetFileSize(); + uint64_t file_input_entries = file_meta->num_entries; + uint64_t file_num_range_del = file_meta->num_range_deletions; + if (file_input_entries == 0) { + uint64_t file_number = file_meta->fd.GetNumber(); + // Try getting info from table property + std::string fn = + TableFileName(compaction->immutable_options()->cf_paths, + file_number, file_meta->fd.GetPathId()); + const auto& tp = input_table_properties.find(fn); + if (tp != input_table_properties.end()) { + file_input_entries = tp->second->num_entries; + file_num_range_del = tp->second->num_range_deletions; + } else { + has_error = true; + } + } + compaction_stats_.stats.num_input_records += file_input_entries; + if (num_input_range_del) { + *num_input_range_del += file_num_range_del; + } } } @@ -1930,21 +2009,7 @@ void CompactionJob::UpdateCompactionStats() { compaction_stats_.stats.num_dropped_records = compaction_stats_.DroppedRecords(); -} - -void CompactionJob::UpdateCompactionInputStatsHelper(int* num_files, - uint64_t* bytes_read, - int input_level) { - const Compaction* compaction = compact_->compaction; - auto num_input_files = compaction->num_input_files(input_level); - *num_files += static_cast(num_input_files); - - for (size_t i = 0; i < num_input_files; ++i) { - const auto* file_meta = compaction->input(input_level, i); - *bytes_read += file_meta->fd.GetFileSize(); - compaction_stats_.stats.num_input_records += - static_cast(file_meta->num_entries); - } + return !has_error; } void CompactionJob::UpdateCompactionJobStats( diff --git a/db/compaction/compaction_job.h b/db/compaction/compaction_job.h index a930c15f1..926f4a8f9 100644 --- a/db/compaction/compaction_job.h +++ b/db/compaction/compaction_job.h @@ -192,7 +192,21 @@ class CompactionJob { IOStatus io_status() const { return io_status_; } protected: - void UpdateCompactionStats(); + // Update the following stats in compaction_stats_.stats + // - num_input_files_in_non_output_levels + // - num_input_files_in_output_level + // - bytes_read_non_output_levels + // - bytes_read_output_level + // - num_input_records + // - bytes_read_blob + // - num_dropped_records + // + // @param num_input_range_del if non-null, will be set to the number of range + // deletion entries in this compaction input. + // + // Returns true iff compaction_stats_.stats.num_input_records and + // num_input_range_del are calculated successfully. + bool UpdateCompactionStats(uint64_t* num_input_range_del = nullptr); void LogCompaction(); virtual void RecordCompactionIOStats(); void CleanupCompaction(); @@ -267,9 +281,6 @@ class CompactionJob { void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats, CompactionJobStats* compaction_job_stats = nullptr); - void UpdateCompactionInputStatsHelper(int* num_files, uint64_t* bytes_read, - int input_level); - void NotifyOnSubcompactionBegin(SubcompactionState* sub_compact); void NotifyOnSubcompactionCompleted(SubcompactionState* sub_compact); diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index f7fc28c15..8f91cc04c 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -655,11 +655,12 @@ class CompactionJobTestBase : public testing::Test { ASSERT_TRUE(full_history_ts_low_.empty() || ucmp_->timestamp_size() == full_history_ts_low_.size()); const std::atomic kManualCompactionCanceledFalse{false}; + JobContext job_context(1, false /* create_superversion */); CompactionJob compaction_job( 0, &compaction, db_options_, mutable_db_options_, env_options_, versions_.get(), &shutting_down_, &log_buffer, nullptr, nullptr, nullptr, nullptr, &mutex_, &error_handler_, snapshots, - earliest_write_conflict_snapshot, snapshot_checker, nullptr, + earliest_write_conflict_snapshot, snapshot_checker, &job_context, table_cache_, &event_logger, false, false, dbname_, &compaction_job_stats_, Env::Priority::USER, nullptr /* IOTracer */, /*manual_compaction_canceled=*/kManualCompactionCanceledFalse, diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 00a33669b..3e565108a 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -9807,6 +9807,43 @@ TEST_F(DBCompactionTest, NumberOfSubcompactions) { } } +TEST_F(DBCompactionTest, VerifyRecordCount) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleLevel; + options.level0_file_num_compaction_trigger = 3; + options.compaction_verify_record_count = true; + DestroyAndReopen(options); + Random rnd(301); + + // Create 2 overlapping L0 files + for (int i = 1; i < 20; i += 2) { + ASSERT_OK(Put(Key(i), rnd.RandomString(100))); + } + ASSERT_OK(Flush()); + + for (int i = 0; i < 20; i += 2) { + ASSERT_OK(Put(Key(i), rnd.RandomString(100))); + } + ASSERT_OK(Flush()); + + // Only iterator through 10 keys and force compaction to finish. + int num_iter = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "CompactionJob::ProcessKeyValueCompaction()::stop", [&](void* stop_ptr) { + num_iter++; + if (num_iter == 10) { + *(bool*)stop_ptr = true; + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + + Status s = db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_TRUE(s.IsCorruption()); + const char* expect = + "Compaction number of input keys does not match number of keys " + "processed."; + ASSERT_TRUE(std::strstr(s.getState(), expect)); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 27f539182..0c654035b 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2200,7 +2200,7 @@ class DBImpl : public DB { void BuildCompactionJobInfo(const ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, - const int job_id, const Version* current, + const int job_id, CompactionJobInfo* compaction_job_info) const; // Reserve the next 'num' file numbers for to-be-ingested external SST files, // and return the current file_number in 'next_file_number'. diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 4e0372e69..9cde56061 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1546,7 +1546,7 @@ Status DBImpl::CompactFilesImpl( if (compaction_job_info != nullptr) { BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats, - job_context->job_id, version, compaction_job_info); + job_context->job_id, compaction_job_info); } if (status.ok()) { @@ -1643,21 +1643,18 @@ void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c, } c->SetNotifyOnCompactionCompleted(); - Version* current = cfd->current(); - current->Ref(); // release lock while notifying events mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex"); { CompactionJobInfo info{}; - BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, current, &info); + BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, &info); for (auto listener : immutable_db_options_.listeners) { listener->OnCompactionBegin(this, info); } info.status.PermitUncheckedError(); } mutex_.Lock(); - current->Unref(); } void DBImpl::NotifyOnCompactionCompleted( @@ -1675,21 +1672,17 @@ void DBImpl::NotifyOnCompactionCompleted( return; } - Version* current = cfd->current(); - current->Ref(); // release lock while notifying events mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::NotifyOnCompactionCompleted::UnlockMutex"); { CompactionJobInfo info{}; - BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, current, - &info); + BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, &info); for (auto listener : immutable_db_options_.listeners) { listener->OnCompactionCompleted(this, info); } } mutex_.Lock(); - current->Unref(); // no need to signal bg_cv_ as it will be signaled at the end of the // flush process. } @@ -3923,7 +3916,7 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) { void DBImpl::BuildCompactionJobInfo( const ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, const int job_id, - const Version* current, CompactionJobInfo* compaction_job_info) const { + CompactionJobInfo* compaction_job_info) const { assert(compaction_job_info != nullptr); compaction_job_info->cf_id = cfd->GetID(); compaction_job_info->cf_name = cfd->GetName(); @@ -3933,7 +3926,7 @@ void DBImpl::BuildCompactionJobInfo( compaction_job_info->base_input_level = c->start_level(); compaction_job_info->output_level = c->output_level(); compaction_job_info->stats = compaction_job_stats; - compaction_job_info->table_properties = c->GetOutputTableProperties(); + compaction_job_info->table_properties = c->GetTableProperties(); compaction_job_info->compaction_reason = c->compaction_reason(); compaction_job_info->compression = c->output_compression(); @@ -3947,15 +3940,9 @@ void DBImpl::BuildCompactionJobInfo( compaction_job_info->input_files.push_back(fn); compaction_job_info->input_file_infos.push_back(CompactionFileInfo{ static_cast(i), file_number, fmd->oldest_blob_file_number}); - if (compaction_job_info->table_properties.count(fn) == 0) { - std::shared_ptr tp; - auto s = current->GetTableProperties(read_options, &tp, fmd, &fn); - if (s.ok()) { - compaction_job_info->table_properties[fn] = tp; - } - } } } + for (const auto& newf : c->edit()->GetNewFiles()) { const FileMetaData& meta = newf.second; const FileDescriptor& desc = meta.fd; diff --git a/db/flush_job.cc b/db/flush_job.cc index bfdd9a059..b989cc8e3 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -490,6 +490,7 @@ Status FlushJob::MemPurge() { nullptr, ioptions->allow_data_in_errors, ioptions->enforce_single_del_contracts, /*manual_compaction_canceled=*/kManualCompactionCanceledFalse, + false /* must_count_input_entries */, /*compaction=*/nullptr, compaction_filter.get(), /*shutting_down=*/nullptr, ioptions->info_log, full_history_ts_low); diff --git a/db/version_edit.h b/db/version_edit.h index a13d8e65f..e6d54d31d 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -193,7 +193,8 @@ struct FileMetaData { uint64_t compensated_file_size = 0; // These values can mutate, but they can only be read or written from // single-threaded LogAndApply thread - uint64_t num_entries = 0; // the number of entries. + uint64_t num_entries = + 0; // The number of entries, including deletions and range deletions. // The number of deletion entries, including range deletions. uint64_t num_deletions = 0; uint64_t raw_key_size = 0; // total uncompressed key size. diff --git a/include/rocksdb/compaction_job_stats.h b/include/rocksdb/compaction_job_stats.h index 5ff8eccc8..7e8153044 100644 --- a/include/rocksdb/compaction_job_stats.h +++ b/include/rocksdb/compaction_job_stats.h @@ -24,6 +24,9 @@ struct CompactionJobStats { // the elapsed CPU time of this compaction in microseconds. uint64_t cpu_micros; + // Used internally indicating whether a subcompaction's + // `num_input_records` is accurate. + bool has_num_input_records; // the number of compaction input records. uint64_t num_input_records; // the number of blobs read from blob files diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 6cf91a491..53e534164 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -490,11 +490,29 @@ struct DBOptions { // If true, during memtable flush, RocksDB will validate total entries // read in flush, and compare with counter inserted into it. + // // The option is here to turn the feature off in case this new validation - // feature has a bug. + // feature has a bug. The option may be removed in the future once the + // feature is stable. + // // Default: true bool flush_verify_memtable_count = true; + // If true, during compaction, RocksDB will count the number of entries + // read and compare it against the number of entries in the compaction + // input files. This is intended to add protection against corruption + // during compaction. Note that + // - this verification is not done for compactions during which a compaction + // filter returns kRemoveAndSkipUntil, and + // - the number of range deletions is not verified. + // + // The option is here to turn the feature off in case this new validation + // feature has a bug. The option may be removed in the future once the + // feature is stable. + // + // Default: true + bool compaction_verify_record_count = true; + // If true, the log numbers and sizes of the synced WALs are tracked // in MANIFEST. During DB recovery, if a synced WAL is missing // from disk, or the WAL's size does not match the recorded size in diff --git a/options/db_options.cc b/options/db_options.cc index d81e72833..f009c1a59 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -222,6 +222,10 @@ static std::unordered_map {offsetof(struct ImmutableDBOptions, flush_verify_memtable_count), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"compaction_verify_record_count", + {offsetof(struct ImmutableDBOptions, compaction_verify_record_count), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, {"track_and_verify_wals_in_manifest", {offsetof(struct ImmutableDBOptions, track_and_verify_wals_in_manifest), @@ -679,6 +683,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) error_if_exists(options.error_if_exists), paranoid_checks(options.paranoid_checks), flush_verify_memtable_count(options.flush_verify_memtable_count), + compaction_verify_record_count(options.compaction_verify_record_count), track_and_verify_wals_in_manifest( options.track_and_verify_wals_in_manifest), verify_sst_unique_id_in_manifest( @@ -771,6 +776,8 @@ void ImmutableDBOptions::Dump(Logger* log) const { paranoid_checks); ROCKS_LOG_HEADER(log, " Options.flush_verify_memtable_count: %d", flush_verify_memtable_count); + ROCKS_LOG_HEADER(log, " Options.compaction_verify_record_count: %d", + compaction_verify_record_count); ROCKS_LOG_HEADER(log, " " "Options.track_and_verify_wals_in_manifest: %d", diff --git a/options/db_options.h b/options/db_options.h index 2a9d98b25..d00a06718 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -25,6 +25,7 @@ struct ImmutableDBOptions { bool error_if_exists; bool paranoid_checks; bool flush_verify_memtable_count; + bool compaction_verify_record_count; bool track_and_verify_wals_in_manifest; bool verify_sst_unique_id_in_manifest; Env* env; diff --git a/options/options_helper.cc b/options/options_helper.cc index abe5053d2..d221f9705 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -60,6 +60,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, options.paranoid_checks = immutable_db_options.paranoid_checks; options.flush_verify_memtable_count = immutable_db_options.flush_verify_memtable_count; + options.compaction_verify_record_count = + immutable_db_options.compaction_verify_record_count; options.track_and_verify_wals_in_manifest = immutable_db_options.track_and_verify_wals_in_manifest; options.verify_sst_unique_id_in_manifest = diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 19cb6310f..8b69e6079 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -308,6 +308,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "writable_file_max_buffer_size=1048576;" "paranoid_checks=true;" "flush_verify_memtable_count=true;" + "compaction_verify_record_count=true;" "track_and_verify_wals_in_manifest=true;" "verify_sst_unique_id_in_manifest=true;" "is_fd_close_on_exec=false;" diff --git a/table/mock_table.cc b/table/mock_table.cc index c251ea108..d6229ef60 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -230,7 +230,13 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key, std::shared_ptr MockTableReader::GetTableProperties() const { - return std::shared_ptr(new TableProperties()); + TableProperties* tp = new TableProperties(); + tp->num_entries = table_.size(); + tp->num_range_deletions = 0; + tp->raw_key_size = 1; + tp->raw_value_size = 1; + + return std::shared_ptr(tp); } MockTableFactory::MockTableFactory() diff --git a/unreleased_history/new_features/compaction_verify_input_count.md b/unreleased_history/new_features/compaction_verify_input_count.md new file mode 100644 index 000000000..32cfe0910 --- /dev/null +++ b/unreleased_history/new_features/compaction_verify_input_count.md @@ -0,0 +1 @@ +* RocksDB will compare the number of input keys to the number of keys processed after each compaction. Compaction will fail and report Corruption status if the verification fails. Option `compaction_verify_record_count` is introduced for this purpose and is enabled by default. diff --git a/util/compaction_job_stats_impl.cc b/util/compaction_job_stats_impl.cc index 587a26f24..cdb591f23 100644 --- a/util/compaction_job_stats_impl.cc +++ b/util/compaction_job_stats_impl.cc @@ -12,6 +12,7 @@ void CompactionJobStats::Reset() { elapsed_micros = 0; cpu_micros = 0; + has_num_input_records = true; num_input_records = 0; num_blobs_read = 0; num_input_files = 0; @@ -55,6 +56,7 @@ void CompactionJobStats::Add(const CompactionJobStats& stats) { elapsed_micros += stats.elapsed_micros; cpu_micros += stats.cpu_micros; + has_num_input_records &= stats.has_num_input_records; num_input_records += stats.num_input_records; num_blobs_read += stats.num_blobs_read; num_input_files += stats.num_input_files; From c205a217e6e84c620f8b65d65003f272bcbb6caf Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 28 Jul 2023 14:57:07 -0700 Subject: [PATCH 009/386] Strip leading and trailing whitespace for unreleased_history entries (#11652) Summary: Some trailing whitespace has leaked into HISTORY.md entries. This can lead to unexpected changes when modifying HISTORY.md with hygienic editors (e.g. for a patch release). This change should protect against future cases. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11652 Test Plan: manual Reviewed By: akankshamahajan15 Differential Revision: D47882814 Pulled By: pdillinger fbshipit-source-id: 148c3746d3b298cb6e1f655f0416d46619969086 --- HISTORY.md | 4 ++-- unreleased_history/release.sh | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index f4dce5cd8..41d4b8453 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -6,7 +6,7 @@ * Add FSReadRequest::fs_scratch which is a data buffer allocated and provided by underlying FileSystem to RocksDB during reads, when FS wants to provide its own buffer with data instead of using RocksDB provided FSReadRequest::scratch. This can help in cpu optimization by avoiding copy from file system's buffer to RocksDB buffer. More details on how to use/enable it in file_system.h. Right now its supported only for MultiReads(async + sync) with non direct io. * Start logging non-zero user-defined timestamp sizes in WAL to signal user key format in subsequent records and use it during recovery. This change will break recovery from WAL files written by early versions that contain user-defined timestamps. The workaround is to ensure there are no WAL files to recover (i.e. by flushing before close) before upgrade. * Added new property "rocksdb.obsolete-sst-files-size-property" that reports the size of SST files that have become obsolete but have not yet been deleted or scheduled for deletion -* Start to record the value of the flag `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` in the Manifest and table properties for a SST file when it is created. And use the recorded flag when creating a table reader for the SST file. This flag is only explicitly record if it's false. +* Start to record the value of the flag `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` in the Manifest and table properties for a SST file when it is created. And use the recorded flag when creating a table reader for the SST file. This flag is only explicitly record if it's false. * Add a new option OptimisticTransactionDBOptions::shared_lock_buckets that enables sharing mutexes for validating transactions between DB instances, for better balancing memory efficiency and validation contention across DB instances. Different column families and DBs also now use different hash seeds in this validation, so that the same set of key names will not contend across DBs or column families. * Add a new ticker `rocksdb.files.marked.trash.deleted` to track the number of trash files deleted by background thread from the trash queue. * Add an API NewTieredVolatileCache() in include/rocksdb/cache.h to allocate an instance of a block cache with a primary block cache tier and a compressed secondary cache tier. A cache of this type distributes memory reservations against the block cache, such as WriteBufferManager, table reader memory etc., proportionally across both the primary and compressed secondary cache. @@ -30,7 +30,7 @@ For Leveled Compaction users, `CompactRange()` with `bottommost_level_compaction ### Bug Fixes * Reduced cases of illegally using Env::Default() during static destruction by never destroying the internal PosixEnv itself (except for builds checking for memory leaks). (#11538) * Fix extra prefetching during seek in async_io when BlockBasedTableOptions.num_file_reads_for_auto_readahead is 1 leading to extra reads than required. -* Fix a bug where compactions that are qualified to be run as 2 subcompactions were only run as one subcompaction. +* Fix a bug where compactions that are qualified to be run as 2 subcompactions were only run as one subcompaction. * Fix a use-after-move bug in block.cc. ## 8.3.0 (05/19/2023) diff --git a/unreleased_history/release.sh b/unreleased_history/release.sh index 91bfed3ea..07f3a92f9 100755 --- a/unreleased_history/release.sh +++ b/unreleased_history/release.sh @@ -34,8 +34,12 @@ awk '/#define ROCKSDB_MAJOR/ { major = $3 } echo " (`date +%x`)" >> HISTORY.new function process_file () { - # use awk to correct extra or missing newlines, missing '* ' on first line - awk '/./ { if (notfirstline || $1 == "*") print; + # use awk to correct + # * extra or missing newlines + # * leading or trailing whitespace + # * missing '* ' on first line + awk '/./ { gsub(/^[ \t]+/, ""); gsub(/[ \t]+$/, ""); + if (notfirstline || $1 == "*") print; else print "* " $0; notfirstline=1; }' < $1 >> HISTORY.new echo git rm $1 From b3c54186ab289f40897895351f86522555e81e6e Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 28 Jul 2023 16:25:29 -0700 Subject: [PATCH 010/386] Allow TryAgain in db_stress with optimistic txn, and refactoring (#11653) Summary: In rare cases, optimistic transaction commit returns TryAgain. This change tolerates that intentional behavior in db_stress, up to a small limit in a row. This way, we don't miss a possible regression with excessive TryAgain, and trying again (rolling back the transaction) should have a well renewed chance of success as the writes will be associated with fresh sequence numbers. Also, some of the APIs were not clear about Transaction semantics, so I have clarified: * (Best I can tell....) Destroying a Transaction is safe without calling Rollback() (or at least should be). I don't know why it's a common pattern in our test code and examples to rollback before unconditional destruction. Stress test updated not to call Rollback unnecessarily (to test safe destruction). * Despite essentially doing what is asked, simply trying Commit() again when it returns TryAgain does not have a chance of success, because of the transaction being bound to the DB state at the time of operations before Commit. Similar logic applies to Busy AFAIK. Commit() API comments updated, and expanded unit test in optimistic_transaction_test. Also also, because I can't stop myself, I refactored a good portion of the transaction handling code in db_stress. * Avoid existing and new copy-paste for most transaction interactions with a new ExecuteTransaction (higher-order) function. * Use unique_ptr (nicely complements removing unnecessary Rollbacks) * Abstract out a pattern for safely calling std::terminate() and use it in more places. (The TryAgain errors we saw did not have stack traces because of "terminate called recursively".) Intended follow-up: resurrect use of `FLAGS_rollback_one_in` but also include non-trivial cases Pull Request resolved: https://github.com/facebook/rocksdb/pull/11653 Test Plan: this is the test :) Also, temporarily bypassed the new retry logic and boosted the chance of hitting TryAgain. Quickly reproduced the TryAgain error. Then re-enabled the new retry logic, and was not able to hit the error after running for tens of minutes, even with the boosted chances. Reviewed By: cbi42 Differential Revision: D47882995 Pulled By: pdillinger fbshipit-source-id: 21eadb1525423340dbf28d17cf166b9583311a0d --- db_stress_tool/db_stress_shared_state.h | 7 ++ db_stress_tool/db_stress_test_base.cc | 78 +++++++++++-------- db_stress_tool/db_stress_test_base.h | 13 ++-- db_stress_tool/multi_ops_txns_stress.cc | 37 +++------ db_stress_tool/no_batched_ops_stress.cc | 75 +++++++----------- include/rocksdb/utilities/transaction.h | 6 +- .../optimistic_transaction_test.cc | 41 +++++++--- 7 files changed, 132 insertions(+), 125 deletions(-) diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h index 0137f0b2e..604e8c631 100644 --- a/db_stress_tool/db_stress_shared_state.h +++ b/db_stress_tool/db_stress_shared_state.h @@ -342,6 +342,13 @@ class SharedState { uint64_t GetStartTimestamp() const { return start_timestamp_; } + void SafeTerminate() { + // Grab mutex so that we don't call terminate while another thread is + // attempting to print a stack trace due to the first one + MutexLock l(&mu_); + std::terminate(); + } + private: static void IgnoreReadErrorCallback(void*) { ignore_read_error = true; } diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 2c62049c3..ff004ae0f 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -504,14 +504,9 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, s = db_->Merge(write_opts, cfh, key, v); } } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Merge(cfh, key, v); - if (s.ok()) { - s = CommitTxn(txn); - } - } + s = ExecuteTransaction( + write_opts, /*thread=*/nullptr, + [&](Transaction& txn) { return txn.Merge(cfh, key, v); }); } } else if (FLAGS_use_put_entity_one_in > 0) { s = db_->PutEntity(write_opts, cfh, key, @@ -524,14 +519,9 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, s = db_->Put(write_opts, cfh, key, v); } } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Put(cfh, key, v); - if (s.ok()) { - s = CommitTxn(txn); - } - } + s = ExecuteTransaction( + write_opts, /*thread=*/nullptr, + [&](Transaction& txn) { return txn.Put(cfh, key, v); }); } } @@ -629,14 +619,15 @@ void StressTest::ProcessRecoveredPreparedTxnsHelper(Transaction* txn, } } -Status StressTest::NewTxn(WriteOptions& write_opts, Transaction** txn) { +Status StressTest::NewTxn(WriteOptions& write_opts, + std::unique_ptr* out_txn) { if (!FLAGS_use_txn) { return Status::InvalidArgument("NewTxn when FLAGS_use_txn is not set"); } write_opts.disableWAL = FLAGS_disable_wal; static std::atomic txn_id = {0}; if (FLAGS_use_optimistic_txn) { - *txn = optimistic_txn_db_->BeginTransaction(write_opts); + out_txn->reset(optimistic_txn_db_->BeginTransaction(write_opts)); return Status::OK(); } else { TransactionOptions txn_options; @@ -644,31 +635,31 @@ Status StressTest::NewTxn(WriteOptions& write_opts, Transaction** txn) { FLAGS_use_only_the_last_commit_time_batch_for_recovery; txn_options.lock_timeout = 600000; // 10 min txn_options.deadlock_detect = true; - *txn = txn_db_->BeginTransaction(write_opts, txn_options); + out_txn->reset(txn_db_->BeginTransaction(write_opts, txn_options)); auto istr = std::to_string(txn_id.fetch_add(1)); - Status s = (*txn)->SetName("xid" + istr); + Status s = (*out_txn)->SetName("xid" + istr); return s; } } -Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) { +Status StressTest::CommitTxn(Transaction& txn, ThreadState* thread) { if (!FLAGS_use_txn) { return Status::InvalidArgument("CommitTxn when FLAGS_use_txn is not set"); } Status s = Status::OK(); if (FLAGS_use_optimistic_txn) { assert(optimistic_txn_db_); - s = txn->Commit(); + s = txn.Commit(); } else { assert(txn_db_); - s = txn->Prepare(); + s = txn.Prepare(); std::shared_ptr timestamped_snapshot; if (s.ok()) { if (thread && FLAGS_create_timestamped_snapshot_one_in && thread->rand.OneIn(FLAGS_create_timestamped_snapshot_one_in)) { uint64_t ts = db_stress_env->NowNanos(); - s = txn->CommitAndTryCreateSnapshot(/*notifier=*/nullptr, ts, - ×tamped_snapshot); + s = txn.CommitAndTryCreateSnapshot(/*notifier=*/nullptr, ts, + ×tamped_snapshot); std::pair> res; if (thread->tid == 0) { @@ -686,7 +677,7 @@ Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) { } } } else { - s = txn->Commit(); + s = txn.Commit(); } } if (thread && FLAGS_create_timestamped_snapshot_one_in > 0 && @@ -696,18 +687,37 @@ Status StressTest::CommitTxn(Transaction* txn, ThreadState* thread) { txn_db_->ReleaseTimestampedSnapshotsOlderThan(now - time_diff); } } - delete txn; return s; } -Status StressTest::RollbackTxn(Transaction* txn) { - if (!FLAGS_use_txn) { - return Status::InvalidArgument( - "RollbackTxn when FLAGS_use_txn is not" - " set"); +Status StressTest::ExecuteTransaction( + WriteOptions& write_opts, ThreadState* thread, + std::function&& ops) { + std::unique_ptr txn; + Status s = NewTxn(write_opts, &txn); + if (s.ok()) { + for (int tries = 1;; ++tries) { + s = ops(*txn); + if (s.ok()) { + s = CommitTxn(*txn, thread); + if (s.ok()) { + break; + } + } + // Optimistic txn might return TryAgain, in which case rollback + // and try again. But that shouldn't happen too many times in a row. + if (!s.IsTryAgain() || !FLAGS_use_optimistic_txn) { + break; + } + if (tries >= 5) { + break; + } + s = txn->Rollback(); + if (!s.ok()) { + break; + } + } } - Status s = txn->Rollback(); - delete txn; return s; } diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h index 29159a494..dc235fcdf 100644 --- a/db_stress_tool/db_stress_test_base.h +++ b/db_stress_tool/db_stress_test_base.h @@ -64,11 +64,14 @@ class StressTest { virtual void ProcessRecoveredPreparedTxnsHelper(Transaction* txn, SharedState* shared); - Status NewTxn(WriteOptions& write_opts, Transaction** txn); - - Status CommitTxn(Transaction* txn, ThreadState* thread = nullptr); - - Status RollbackTxn(Transaction* txn); + // ExecuteTransaction is recommended instead + Status NewTxn(WriteOptions& write_opts, + std::unique_ptr* out_txn); + Status CommitTxn(Transaction& txn, ThreadState* thread = nullptr); + + // Creates a transaction, executes `ops`, and tries to commit + Status ExecuteTransaction(WriteOptions& write_opts, ThreadState* thread, + std::function&& ops); virtual void MaybeClearOneColumnFamily(ThreadState* /* thread */) {} diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc index 89b061004..23850da5c 100644 --- a/db_stress_tool/multi_ops_txns_stress.cc +++ b/db_stress_tool/multi_ops_txns_stress.cc @@ -560,7 +560,7 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, uint32_t new_a) { std::string old_pk = Record::EncodePrimaryKey(old_a); std::string new_pk = Record::EncodePrimaryKey(new_a); - Transaction* txn = nullptr; + std::unique_ptr txn; WriteOptions wopts; Status s = NewTxn(wopts, &txn); if (!s.ok()) { @@ -572,7 +572,7 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, assert(txn); txn->SetSnapshotOnNextOperation(/*notifier=*/nullptr); - const Defer cleanup([new_a, &s, thread, txn, this]() { + const Defer cleanup([new_a, &s, thread, this]() { if (s.ok()) { // Two gets, one for existing pk, one for locking potential new pk. thread->stats.AddGets(/*ngets=*/2, /*nfounds=*/1); @@ -594,7 +594,6 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, } auto& key_gen = key_gen_for_a_[thread->tid]; key_gen->UndoAllocation(new_a); - RollbackTxn(txn).PermitUncheckedError(); }); ReadOptions ropts; @@ -671,7 +670,6 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, auto& key_gen = key_gen_for_a_.at(thread->tid); if (s.ok()) { - delete txn; key_gen->Replace(old_a, old_a_pos, new_a); } return s; @@ -681,7 +679,7 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, uint32_t old_c, uint32_t old_c_pos, uint32_t new_c) { - Transaction* txn = nullptr; + std::unique_ptr txn; WriteOptions wopts; Status s = NewTxn(wopts, &txn); if (!s.ok()) { @@ -694,7 +692,7 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, Iterator* it = nullptr; long iterations = 0; - const Defer cleanup([new_c, &s, thread, &it, txn, this, &iterations]() { + const Defer cleanup([new_c, &s, thread, &it, this, &iterations]() { delete it; if (s.ok()) { thread->stats.AddIterations(iterations); @@ -719,7 +717,6 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, } auto& key_gen = key_gen_for_c_[thread->tid]; key_gen->UndoAllocation(new_c); - RollbackTxn(txn).PermitUncheckedError(); }); // TODO (yanqin) try SetSnapshotOnNextOperation(). We currently need to take @@ -868,7 +865,6 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, s = CommitAndCreateTimestampedSnapshotIfNeeded(thread, *txn); if (s.ok()) { - delete txn; auto& key_gen = key_gen_for_c_.at(thread->tid); key_gen->Replace(old_c, old_c_pos, new_c); } @@ -880,7 +876,7 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, uint32_t a, uint32_t b_delta) { std::string pk_str = Record::EncodePrimaryKey(a); - Transaction* txn = nullptr; + std::unique_ptr txn; WriteOptions wopts; Status s = NewTxn(wopts, &txn); if (!s.ok()) { @@ -891,7 +887,7 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, assert(txn); - const Defer cleanup([&s, thread, txn, this]() { + const Defer cleanup([&s, thread]() { if (s.ok()) { thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); thread->stats.AddBytesForWrites( @@ -908,7 +904,6 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, } else { thread->stats.AddErrors(1); } - RollbackTxn(txn).PermitUncheckedError(); }); ReadOptions ropts; ropts.rate_limiter_priority = @@ -952,9 +947,6 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, s = CommitAndCreateTimestampedSnapshotIfNeeded(thread, *txn); - if (s.ok()) { - delete txn; - } return s; } @@ -964,7 +956,7 @@ Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, // pk may or may not exist PinnableSlice value; - Transaction* txn = nullptr; + std::unique_ptr txn; WriteOptions wopts; Status s = NewTxn(wopts, &txn); if (!s.ok()) { @@ -975,7 +967,7 @@ Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, assert(txn); - const Defer cleanup([&s, thread, txn, this]() { + const Defer cleanup([&s, thread]() { if (s.ok()) { thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); return; @@ -984,7 +976,6 @@ Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, } else { thread->stats.AddErrors(1); } - RollbackTxn(txn).PermitUncheckedError(); }); std::shared_ptr snapshot; @@ -1001,9 +992,6 @@ Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, if (s.ok()) { s = txn->Commit(); } - if (s.ok()) { - delete txn; - } return s; } @@ -1011,7 +999,7 @@ Status MultiOpsTxnsStressTest::RangeScanTxn(ThreadState* thread, ReadOptions ropts, uint32_t c) { std::string sk = Record::EncodeSecondaryKey(c); - Transaction* txn = nullptr; + std::unique_ptr txn; WriteOptions wopts; Status s = NewTxn(wopts, &txn); if (!s.ok()) { @@ -1022,13 +1010,12 @@ Status MultiOpsTxnsStressTest::RangeScanTxn(ThreadState* thread, assert(txn); - const Defer cleanup([&s, thread, txn, this]() { + const Defer cleanup([&s, thread]() { if (s.ok()) { thread->stats.AddIterations(1); return; } thread->stats.AddErrors(1); - RollbackTxn(txn).PermitUncheckedError(); }); std::shared_ptr snapshot; @@ -1056,10 +1043,6 @@ Status MultiOpsTxnsStressTest::RangeScanTxn(ThreadState* thread, s = iter->status(); } - if (s.ok()) { - delete txn; - } - return s; } diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index de03e9795..31aac13ee 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -442,7 +442,7 @@ class NonBatchedOpsStressTest : public StressTest { if (!s.ok()) { fprintf(stderr, "dropping column family error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } s = db_->CreateColumnFamily(ColumnFamilyOptions(options_), new_name, &column_families_[cf]); @@ -451,7 +451,7 @@ class NonBatchedOpsStressTest : public StressTest { if (!s.ok()) { fprintf(stderr, "creating column family error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } thread->shared->UnlockColumnFamily(cf); } @@ -603,7 +603,7 @@ class NonBatchedOpsStressTest : public StressTest { // Create a transaction in order to write some data. The purpose is to // exercise WriteBatchWithIndex::MultiGetFromBatchAndDB. The transaction // will be rolled back once MultiGet returns. - Transaction* txn = nullptr; + std::unique_ptr txn; if (use_txn) { WriteOptions wo; if (FLAGS_rate_limit_auto_wal_flush) { @@ -612,7 +612,7 @@ class NonBatchedOpsStressTest : public StressTest { Status s = NewTxn(wo, &txn); if (!s.ok()) { fprintf(stderr, "NewTxn: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } for (size_t i = 0; i < num_keys; ++i) { @@ -662,7 +662,7 @@ class NonBatchedOpsStressTest : public StressTest { } if (!s.ok()) { fprintf(stderr, "Transaction put: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } else { ryw_expected_values.push_back(std::nullopt); @@ -865,9 +865,6 @@ class NonBatchedOpsStressTest : public StressTest { if (readoptionscopy.snapshot) { db_->ReleaseSnapshot(readoptionscopy.snapshot); } - if (use_txn) { - RollbackTxn(txn); - } return statuses; } @@ -1278,14 +1275,9 @@ class NonBatchedOpsStressTest : public StressTest { s = db_->Merge(write_opts, cfh, k, write_ts, v); } } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Merge(cfh, k, v); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } + s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) { + return txn.Merge(cfh, k, v); + }); } } else if (FLAGS_use_put_entity_one_in > 0 && (value_base % FLAGS_use_put_entity_one_in) == 0) { @@ -1299,14 +1291,9 @@ class NonBatchedOpsStressTest : public StressTest { s = db_->Put(write_opts, cfh, k, write_ts, v); } } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Put(cfh, k, v); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } + s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) { + return txn.Put(cfh, k, v); + }); } } @@ -1319,11 +1306,11 @@ class NonBatchedOpsStressTest : public StressTest { } else if (!is_db_stopped_ || s.severity() < Status::Severity::kFatalError) { fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } else { fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } @@ -1364,14 +1351,9 @@ class NonBatchedOpsStressTest : public StressTest { s = db_->Delete(write_opts, cfh, key, write_ts); } } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->Delete(cfh, key); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } + s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) { + return txn.Delete(cfh, key); + }); } pending_expected_value.Commit(); @@ -1384,11 +1366,11 @@ class NonBatchedOpsStressTest : public StressTest { } else if (!is_db_stopped_ || s.severity() < Status::Severity::kFatalError) { fprintf(stderr, "delete error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } else { fprintf(stderr, "delete error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } } else { @@ -1401,14 +1383,9 @@ class NonBatchedOpsStressTest : public StressTest { s = db_->SingleDelete(write_opts, cfh, key, write_ts); } } else { - Transaction* txn; - s = NewTxn(write_opts, &txn); - if (s.ok()) { - s = txn->SingleDelete(cfh, key); - if (s.ok()) { - s = CommitTxn(txn, thread); - } - } + s = ExecuteTransaction(write_opts, thread, [&](Transaction& txn) { + return txn.SingleDelete(cfh, key); + }); } pending_expected_value.Commit(); thread->stats.AddSingleDeletes(1); @@ -1420,11 +1397,11 @@ class NonBatchedOpsStressTest : public StressTest { } else if (!is_db_stopped_ || s.severity() < Status::Severity::kFatalError) { fprintf(stderr, "single delete error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } else { fprintf(stderr, "single delete error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } } @@ -1481,11 +1458,11 @@ class NonBatchedOpsStressTest : public StressTest { } else if (!is_db_stopped_ || s.severity() < Status::Severity::kFatalError) { fprintf(stderr, "delete range error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } else { fprintf(stderr, "delete range error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } } for (PendingExpectedValue& pending_expected_value : @@ -1567,7 +1544,7 @@ class NonBatchedOpsStressTest : public StressTest { } if (!s.ok()) { fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str()); - std::terminate(); + thread->shared->SafeTerminate(); } for (size_t i = 0; i < pending_expected_values.size(); ++i) { diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 947fcec55..4ac47ec04 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -140,6 +140,9 @@ class Transaction { Transaction(const Transaction&) = delete; void operator=(const Transaction&) = delete; + // The transaction is safely discarded on destruction, though must be + // discarded before the DB is closed or destroyed. (Calling Rollback() + // is not necessary before destruction.) virtual ~Transaction() {} // If a transaction has a snapshot set, the transaction will ensure that @@ -227,7 +230,8 @@ class Transaction { // Status::Busy() may be returned if the transaction could not guarantee // that there are no write conflicts. Status::TryAgain() may be returned // if the memtable history size is not large enough - // (See max_write_buffer_size_to_maintain). + // (see max_write_buffer_size_to_maintain). In either case, a Rollback() + // or new transaction is required to expect a different result. // // If this transaction was created by a TransactionDB(), Status::Expired() // may be returned if this transaction has lived for longer than diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 46d51956f..04e443a74 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -322,17 +322,11 @@ TEST_P(OptimisticTransactionTest, FlushTest) { delete txn; } -TEST_P(OptimisticTransactionTest, FlushTest2) { - WriteOptions write_options; - ReadOptions read_options, snapshot_read_options; +namespace { +void FlushTest2PopulateTxn(Transaction* txn) { + ReadOptions snapshot_read_options; std::string value; - ASSERT_OK(txn_db->Put(write_options, Slice("foo"), Slice("bar"))); - ASSERT_OK(txn_db->Put(write_options, Slice("foo2"), Slice("bar"))); - - Transaction* txn = txn_db->BeginTransaction(write_options); - ASSERT_NE(txn, nullptr); - snapshot_read_options.snapshot = txn->GetSnapshot(); ASSERT_OK(txn->GetForUpdate(snapshot_read_options, "foo", &value)); @@ -342,6 +336,21 @@ TEST_P(OptimisticTransactionTest, FlushTest2) { ASSERT_OK(txn->GetForUpdate(snapshot_read_options, "foo", &value)); ASSERT_EQ(value, "bar2"); +} +} // namespace + +TEST_P(OptimisticTransactionTest, FlushTest2) { + WriteOptions write_options; + ReadOptions read_options; + std::string value; + + ASSERT_OK(txn_db->Put(write_options, Slice("foo"), Slice("bar"))); + ASSERT_OK(txn_db->Put(write_options, Slice("foo2"), Slice("bar"))); + + Transaction* txn = txn_db->BeginTransaction(write_options); + ASSERT_NE(txn, nullptr); + + FlushTest2PopulateTxn(txn); // Put a random key so we have a MemTable to flush ASSERT_OK(txn_db->Put(write_options, "dummy", "dummy")); @@ -367,9 +376,23 @@ TEST_P(OptimisticTransactionTest, FlushTest2) { // txn should not commit since MemTableList History is not large enough ASSERT_TRUE(s.IsTryAgain()); + // simply trying Commit again doesn't help + s = txn->Commit(); + ASSERT_TRUE(s.IsTryAgain()); + ASSERT_OK(txn_db->Get(read_options, "foo", &value)); ASSERT_EQ(value, "bar"); + // But rolling back and redoing does + ASSERT_OK(txn->Rollback()); + + FlushTest2PopulateTxn(txn); + + ASSERT_OK(txn->Commit()); + + ASSERT_OK(txn_db->Get(read_options, "foo", &value)); + ASSERT_EQ(value, "bar2"); + delete txn; } From 7a1b0207e611a7d569253a407f541f5496f41467 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Sun, 30 Jul 2023 16:40:01 -0700 Subject: [PATCH 011/386] format_version=6 and context-aware block checksums (#9058) Summary: ## Context checksum All RocksDB checksums currently use 32 bits of checking power, which should be 1 in 4 billion false negative (FN) probability (failing to detect corruption). This is true for random corruptions, and in some cases small corruptions are guaranteed to be detected. But some possible corruptions, such as in storage metadata rather than storage payload data, would have a much higher FN rate. For example: * Data larger than one SST block is replaced by data from elsewhere in the same or another SST file. Especially with block_align=true, the probability of exact block size match is probably around 1 in 100, making the FN probability around that same. Without `block_align=true` the probability of same block start location is probably around 1 in 10,000, for FN probability around 1 in a million. To solve this problem in new format_version=6, we add "context awareness" to block checksum checks. The stored and expected checksum value is modified based on the block's position in the file and which file it is in. The modifications are cleverly chosen so that, for example * blocks within about 4GB of each other are guaranteed to use different context * blocks that are offset by exactly some multiple of 4GiB are guaranteed to use different context * files generated by the same process are guaranteed to use different context for the same offsets, until wrap-around after 2^32 - 1 files Thus, with format_version=6, if a valid SST block and checksum is misplaced, its checksum FN probability should be essentially ideal, 1 in 4B. ## Footer checksum This change also adds checksum protection to the SST footer (with format_version=6), for the first time without relying on whole file checksum. To prevent a corruption of the format_version in the footer (e.g. 6 -> 5) to defeat the footer checksum, we change much of the footer data format including an "extended magic number" in format_version 6 that would be interpreted as empty index and metaindex block handles in older footer versions. We also change the encoding of handles to free up space for other new data in footer. ## More detail: making space in footer In order to keep footer the same size in format_version=6 (avoid change to IO patterns), we have to free up some space for new data. We do this two ways: * Metaindex block handle is encoded down to 4 bytes (from 10) by assuming it immediately precedes the footer, and by assuming it is < 4GB. * Index block handle is moved into metaindex. (I don't know why it was in footer to begin with.) ## Performance In case of small performance penalty, I've made a "pay as you go" optimization to compensate: replace `MutableCFOptions` in BlockBasedTableBuilder::Rep with the only field used in that structure after construction: `prefix_extractor`. This makes the PR an overall performance improvement (results below). Nevertheless I'm seeing essentially no difference going from fv=5 to fv=6, even including that improvement for both. That's based on extreme case table write performance testing, many files with many blocks. This is relatively checksum intensive (small blocks) and salt generation intensive (small files). ``` (for I in `seq 1 100`; do TEST_TMPDIR=/dev/shm/dbbench2 ./db_bench -benchmarks=fillseq -memtablerep=vector -disable_wal=1 -allow_concurrent_memtable_write=false -num=3000000 -compaction_style=2 -fifo_compaction_max_table_files_size_mb=10000 -fifo_compaction_allow_compaction=0 -write_buffer_size=100000 -compression_type=none -block_size=1000; done) 2>&1 | grep micros/op | tee out awk '{ tot += $5; n += 1; } END { print int(1.0 * tot / n) }' < out ``` Each value below is ops/s averaged over 100 runs, run simultaneously with competing configuration for load fairness Before -> after (both fv=5): 483530 -> 483673 (negligible) Re-run 1: 480733 -> 485427 (1.0% faster) Re-run 2: 483821 -> 484541 (0.1% faster) Before (fv=5) -> after (fv=6): 482006 -> 485100 (0.6% faster) Re-run 1: 482212 -> 485075 (0.6% faster) Re-run 2: 483590 -> 484073 (0.1% faster) After fv=5 -> after fv=6: 483878 -> 485542 (0.3% faster) Re-run 1: 485331 -> 483385 (0.4% slower) Re-run 2: 485283 -> 483435 (0.4% slower) Re-run 3: 483647 -> 486109 (0.5% faster) Pull Request resolved: https://github.com/facebook/rocksdb/pull/9058 Test Plan: unit tests included (table_test, db_properties_test, salt in env_test). General DB tests and crash test updated to test new format_version. Also temporarily updated the default format version to 6 and saw some test failures. Almost all were due to an inadvertent additional read in VerifyChecksum to verify the index block checksum, though it's arguably a bug that VerifyChecksum does not appear to (re-)verify the index block checksum, just assuming it was verified in opening the index reader (probably *usually* true but probably not always true). Some other concerns about VerifyChecksum are left in FIXME comments. The only remaining test failure on change of default (in block_fetcher_test) now has a comment about how to upgrade the test. The format compatibility test does not need updating because we have not updated the default format_version. Reviewed By: ajkr, mrambacher Differential Revision: D33100915 Pulled By: pdillinger fbshipit-source-id: 8679e3e572fa580181a737fd6d113ed53c5422ee --- db/db_impl/db_impl.cc | 1 + db/db_properties_test.cc | 3 + env/env_test.cc | 2 +- include/rocksdb/table.h | 8 +- .../block_based/block_based_table_builder.cc | 71 +++++-- table/block_based/block_based_table_reader.cc | 25 ++- table/block_based/block_based_table_reader.h | 7 + .../block_based_table_reader_sync_and_async.h | 5 +- table/block_based/index_reader_common.cc | 2 +- table/block_based/reader_common.cc | 18 +- table/block_based/reader_common.h | 9 +- table/block_fetcher.cc | 6 +- table/block_fetcher_test.cc | 3 + table/cuckoo/cuckoo_table_builder.cc | 8 +- table/format.cc | 176 ++++++++++++++++-- table/format.h | 73 +++++++- table/meta_blocks.cc | 10 +- table/meta_blocks.h | 1 + table/plain/plain_table_builder.cc | 15 +- table/table_test.cc | 60 ++++-- test_util/testutil.cc | 3 + tools/db_crashtest.py | 2 +- tools/sst_dump_test.cc | 1 - .../new_features/1_context_checksum.md | 1 + 24 files changed, 422 insertions(+), 88 deletions(-) create mode 100644 unreleased_history/new_features/1_context_checksum.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 4433c310e..5dee77853 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -5824,6 +5824,7 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, return s; } } + // FIXME? What does it mean if read_options.verify_checksums == false? // TODO: simplify using GetRefedColumnFamilySet? std::vector cfd_list; diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index d689686c3..a7faa6414 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -2348,6 +2348,9 @@ TEST_F(DBPropertiesTest, TableMetaIndexKeys) { EXPECT_EQ("rocksdb.hashindex.prefixes", PopMetaIndexKey(meta_iter.get())); } + if (bbto->format_version >= 6) { + EXPECT_EQ("rocksdb.index", PopMetaIndexKey(meta_iter.get())); + } } EXPECT_EQ("rocksdb.properties", PopMetaIndexKey(meta_iter.get())); EXPECT_EQ("NOT_FOUND", PopMetaIndexKey(meta_iter.get())); diff --git a/env/env_test.cc b/env/env_test.cc index 4462b95b8..fb23bae13 100644 --- a/env/env_test.cc +++ b/env/env_test.cc @@ -3054,7 +3054,7 @@ TEST_F(EnvTest, PortGenerateRfcUuid) { VerifyRfcUuids(t.ids); } -// Test the atomic, linear generation of GenerateRawUuid +// Test the atomic, linear generation of GenerateRawUniqueId TEST_F(EnvTest, GenerateRawUniqueId) { struct MyStressTest : public NoDuplicateMiniStressTest { diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 6e8f60577..d19a95fa8 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -47,7 +47,10 @@ struct EnvOptions; // Types of checksums to use for checking integrity of logical blocks within // files. All checksums currently use 32 bits of checking power (1 in 4B -// chance of failing to detect random corruption). +// chance of failing to detect random corruption). Traditionally, the actual +// checking power can be far from ideal if the corruption is due to misplaced +// data (e.g. physical blocks out of order in a file, or from another file), +// which is fixed in format_version=6 (see below). enum ChecksumType : char { kNoChecksum = 0x0, kCRC32c = 0x1, @@ -512,6 +515,9 @@ struct BlockBasedTableOptions { // 5 -- Can be read by RocksDB's versions since 6.6.0. Full and partitioned // filters use a generally faster and more accurate Bloom filter // implementation, with a different schema. + // 6 -- Modified the file footer and checksum matching so that SST data + // misplaced within or between files is as likely to fail checksum + // verification as random corruption. Also checksum-protects SST footer. uint32_t format_version = 5; // Store index blocks on disk in compressed format. Changing this option to diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index d3c70536d..26e071abb 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -263,7 +263,9 @@ class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector struct BlockBasedTableBuilder::Rep { const ImmutableOptions ioptions; - const MutableCFOptions moptions; + // BEGIN from MutableCFOptions + std::shared_ptr prefix_extractor; + // END from MutableCFOptions const BlockBasedTableOptions table_options; const InternalKeyComparator& internal_comparator; // Size in bytes for the user-defined timestamps. @@ -361,6 +363,9 @@ struct BlockBasedTableBuilder::Rep { // all blocks after data blocks till the end of the SST file. uint64_t tail_size; + // See class Footer + uint32_t base_context_checksum; + uint64_t get_offset() { return offset.load(std::memory_order_relaxed); } void set_offset(uint64_t o) { offset.store(o, std::memory_order_relaxed); } @@ -389,6 +394,12 @@ struct BlockBasedTableBuilder::Rep { // to false, and this is ensured by io_status_mutex, so no special memory // order for io_status_ok is required. if (io_status_ok.load(std::memory_order_relaxed)) { +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED // Avoid unnecessary lock acquisition + auto ios = CopyIOStatus(); + ios.PermitUncheckedError(); + // Assume no races in unit tests + assert(ios.ok()); +#endif // ROCKSDB_ASSERT_STATUS_CHECKED return IOStatus::OK(); } else { return CopyIOStatus(); @@ -429,7 +440,7 @@ struct BlockBasedTableBuilder::Rep { Rep(const BlockBasedTableOptions& table_opt, const TableBuilderOptions& tbo, WritableFileWriter* f) : ioptions(tbo.ioptions), - moptions(tbo.moptions), + prefix_extractor(tbo.moptions.prefix_extractor), table_options(table_opt), internal_comparator(tbo.internal_comparator), ts_sz(tbo.internal_comparator.user_comparator()->timestamp_size()), @@ -456,7 +467,7 @@ struct BlockBasedTableBuilder::Rep { BlockBasedTableOptions::kDataBlockBinarySearch /* index_type */, 0.75 /* data_block_hash_table_util_ratio */, ts_sz, persist_user_defined_timestamps), - internal_prefix_transform(tbo.moptions.prefix_extractor.get()), + internal_prefix_transform(prefix_extractor.get()), compression_type(tbo.compression_type), sample_for_compression(tbo.moptions.sample_for_compression), compressible_input_data_bytes(0), @@ -557,7 +568,7 @@ struct BlockBasedTableBuilder::Rep { } filter_builder.reset(CreateFilterBlockBuilder( - ioptions, moptions, filter_context, + ioptions, tbo.moptions, filter_context, use_delta_encoding_for_index_values, p_index_builder_, ts_sz, persist_user_defined_timestamps)); } @@ -573,7 +584,7 @@ struct BlockBasedTableBuilder::Rep { table_properties_collectors.emplace_back( new BlockBasedTablePropertiesCollector( table_options.index_type, table_options.whole_key_filtering, - moptions.prefix_extractor != nullptr)); + prefix_extractor != nullptr)); if (ts_sz > 0 && persist_user_defined_timestamps) { table_properties_collectors.emplace_back( new TimestampTablePropertiesCollector( @@ -597,6 +608,17 @@ struct BlockBasedTableBuilder::Rep { if (!ReifyDbHostIdProperty(ioptions.env, &props.db_host_id).ok()) { ROCKS_LOG_INFO(ioptions.logger, "db_host_id property will not be set"); } + + if (FormatVersionUsesContextChecksum(table_options.format_version)) { + // Must be non-zero and semi- or quasi-random + // TODO: ideally guaranteed different for related files (e.g. use file + // number and db_session, for benefit of SstFileWriter) + do { + base_context_checksum = Random::GetTLSInstance()->Next(); + } while (UNLIKELY(base_context_checksum == 0)); + } else { + base_context_checksum = 0; + } } Rep(const Rep&) = delete; @@ -1285,7 +1307,8 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock( bool is_data_block = block_type == BlockType::kData; // Old, misleading name of this function: WriteRawBlock StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS); - handle->set_offset(r->get_offset()); + const uint64_t offset = r->get_offset(); + handle->set_offset(offset); handle->set_size(block_contents.size()); assert(status().ok()); assert(io_status().ok()); @@ -1307,6 +1330,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock( uint32_t checksum = ComputeBuiltinChecksumWithLastByte( r->table_options.checksum, block_contents.data(), block_contents.size(), /*last_byte*/ comp_type); + checksum += ChecksumModifierForContext(r->base_context_checksum, offset); if (block_type == BlockType::kFilter) { Status s = r->filter_builder->MaybePostVerifyFilter(block_contents); @@ -1600,6 +1624,11 @@ void BlockBasedTableBuilder::WriteIndexBlock( // The last index_block_handle will be for the partition index block } } + // If success and need to record in metaindex rather than footer... + if (!FormatVersionUsesIndexHandleInFooter( + rep_->table_options.format_version)) { + meta_index_builder->Add(kIndexBlockName, *index_block_handle); + } } void BlockBasedTableBuilder::WritePropertiesBlock( @@ -1625,9 +1654,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock( rep_->props.compression_options = CompressionOptionsToString(rep_->compression_opts); rep_->props.prefix_extractor_name = - rep_->moptions.prefix_extractor != nullptr - ? rep_->moptions.prefix_extractor->AsString() - : "nullptr"; + rep_->prefix_extractor ? rep_->prefix_extractor->AsString() : "nullptr"; std::string property_collectors_names = "["; for (size_t i = 0; i < rep_->ioptions.table_properties_collector_factories.size(); ++i) { @@ -1746,16 +1773,20 @@ void BlockBasedTableBuilder::WriteRangeDelBlock( void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle, BlockHandle& index_block_handle) { + assert(ok()); Rep* r = rep_; // this is guaranteed by BlockBasedTableBuilder's constructor assert(r->table_options.checksum == kCRC32c || r->table_options.format_version != 0); - assert(ok()); - FooterBuilder footer; - footer.Build(kBlockBasedTableMagicNumber, r->table_options.format_version, - r->get_offset(), r->table_options.checksum, - metaindex_block_handle, index_block_handle); + Status s = footer.Build(kBlockBasedTableMagicNumber, + r->table_options.format_version, r->get_offset(), + r->table_options.checksum, metaindex_block_handle, + index_block_handle, r->base_context_checksum); + if (!s.ok()) { + r->SetStatus(s); + return; + } IOStatus ios = r->file->Append(footer.GetSlice()); if (ios.ok()) { r->set_offset(r->get_offset() + footer.GetSlice().size()); @@ -1970,10 +2001,14 @@ Status BlockBasedTableBuilder::Finish() { WriteFooter(metaindex_block_handle, index_block_handle); } r->state = Rep::State::kClosed; - r->SetStatus(r->CopyIOStatus()); - Status ret_status = r->CopyStatus(); - assert(!ret_status.ok() || io_status().ok()); r->tail_size = r->offset - r->props.tail_start_offset; + + Status ret_status = r->CopyStatus(); + IOStatus ios = r->GetIOStatus(); + if (!ios.ok() && ret_status.ok()) { + // Let io_status supersede ok status (otherwise status takes precedennce) + ret_status = ios; + } return ret_status; } @@ -1983,8 +2018,10 @@ void BlockBasedTableBuilder::Abandon() { StopParallelCompression(); } rep_->state = Rep::State::kClosed; +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED // Avoid unnecessary lock acquisition rep_->CopyStatus().PermitUncheckedError(); rep_->CopyIOStatus().PermitUncheckedError(); +#endif // ROCKSDB_ASSERT_STATUS_CHECKED } uint64_t BlockBasedTableBuilder::NumEntries() const { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 35c6f46f6..1fc477842 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -771,6 +771,7 @@ Status BlockBasedTable::Open( if (!s.ok()) { return s; } + rep->verify_checksum_set_on_open = ro.verify_checksums; s = new_table->PrefetchIndexAndFilterBlocks( ro, prefetch_buffer.get(), metaindex_iter.get(), new_table.get(), prefetch_all, table_options, level, file_size, @@ -2454,6 +2455,10 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( return BlockType::kHashIndexMetadata; } + if (meta_block_name == kIndexBlockName) { + return BlockType::kIndex; + } + if (meta_block_name.starts_with(kObsoleteFilterBlockPrefix)) { // Obsolete but possible in old files return BlockType::kInvalid; @@ -2474,6 +2479,9 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks( BlockHandle handle; Slice input = index_iter->value(); s = handle.DecodeFrom(&input); + if (!s.ok()) { + break; + } BlockContents contents; const Slice meta_block_name = index_iter->key(); if (meta_block_name == kPropertiesBlockName) { @@ -2484,7 +2492,13 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks( nullptr /* prefetch_buffer */, rep_->footer, rep_->ioptions, &table_properties, nullptr /* memory_allocator */); + } else if (rep_->verify_checksum_set_on_open && + meta_block_name == kIndexBlockName) { + // WART: For now, to maintain similar I/O behavior as before + // format_version=6, we skip verifying index block checksum--but only + // if it was checked on open. } else { + // FIXME? Need to verify checksums of index and filter partitions? s = BlockFetcher( rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer, read_options, handle, &contents, rep_->ioptions, @@ -2542,6 +2556,15 @@ Status BlockBasedTable::CreateIndexReader( InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin, BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader) { + if (FormatVersionUsesIndexHandleInFooter(rep_->footer.format_version())) { + rep_->index_handle = rep_->footer.index_handle(); + } else { + Status s = FindMetaBlock(meta_iter, kIndexBlockName, &rep_->index_handle); + if (!s.ok()) { + return s; + } + } + switch (rep_->index_type) { case BlockBasedTableOptions::kTwoLevelIndexSearch: { return PartitionIndexReader::Create(this, ro, prefetch_buffer, use_cache, @@ -2709,7 +2732,7 @@ bool BlockBasedTable::TEST_FilterBlockInCache() const { bool BlockBasedTable::TEST_IndexBlockInCache() const { assert(rep_ != nullptr); - return TEST_BlockInCache(rep_->footer.index_handle()); + return TEST_BlockInCache(rep_->index_handle); } Status BlockBasedTable::GetKVPairsFromDataBlocks( diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index d0686a4bc..0b5fe1cb8 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -594,6 +594,7 @@ struct BlockBasedTable::Rep { BlockHandle compression_dict_handle; std::shared_ptr table_properties; + BlockHandle index_handle; BlockBasedTableOptions::IndexType index_type; bool whole_key_filtering; bool prefix_filtering; @@ -637,6 +638,12 @@ struct BlockBasedTable::Rep { bool index_key_includes_seq = true; bool index_value_is_full = true; + // Whether block checksums in metadata blocks were verified on open. + // This is only to mostly maintain current dubious behavior of VerifyChecksum + // with respect to index blocks, but only when the checksum was previously + // verified. + bool verify_checksum_set_on_open = false; + const bool immortal_table; // Whether the user key contains user-defined timestamps. If this is false and // the running user comparator has a non-zero timestamp size, a min timestamp diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index eb4bf5bed..43af02fad 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -222,9 +222,8 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks) // begin address of each read request, we need to add the offset // in each read request. Checksum is stored in the block trailer, // beyond the payload size. - s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset, - handle.size(), rep_->file->file_name(), - handle.offset()); + s = VerifyBlockChecksum(footer, data + req_offset, handle.size(), + rep_->file->file_name(), handle.offset()); TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s); } } else if (!use_shared_buffer) { diff --git a/table/block_based/index_reader_common.cc b/table/block_based/index_reader_common.cc index 828200299..e68be2a10 100644 --- a/table/block_based/index_reader_common.cc +++ b/table/block_based/index_reader_common.cc @@ -26,7 +26,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( assert(rep != nullptr); const Status s = table->RetrieveBlock( - prefetch_buffer, read_options, rep->footer.index_handle(), + prefetch_buffer, read_options, rep->index_handle, UncompressionDict::GetEmptyDict(), &index_block->As(), get_context, lookup_context, /* for_compaction */ false, use_cache, /* async_read */ false); diff --git a/table/block_based/reader_common.cc b/table/block_based/reader_common.cc index 0ff43e9b4..7d0c97c71 100644 --- a/table/block_based/reader_common.cc +++ b/table/block_based/reader_common.cc @@ -23,10 +23,14 @@ void ForceReleaseCachedEntry(void* arg, void* h) { } // WART: this is specific to block-based table -Status VerifyBlockChecksum(ChecksumType type, const char* data, +Status VerifyBlockChecksum(const Footer& footer, const char* data, size_t block_size, const std::string& file_name, uint64_t offset) { PERF_TIMER_GUARD(block_checksum_time); + + assert(footer.GetBlockTrailerSize() == 5); + ChecksumType type = footer.checksum_type(); + // After block_size bytes is compression type (1 byte), which is part of // the checksummed section. size_t len = block_size + 1; @@ -34,6 +38,13 @@ Status VerifyBlockChecksum(ChecksumType type, const char* data, uint32_t stored = DecodeFixed32(data + len); uint32_t computed = ComputeBuiltinChecksum(type, data, len); + + // Unapply context to 'stored' rather than apply to 'computed, for people + // who might look for reference crc value in error message + uint32_t modifier = + ChecksumModifierForContext(footer.base_context_checksum(), offset); + stored -= modifier; + if (stored == computed) { return Status::OK(); } else { @@ -43,8 +54,9 @@ Status VerifyBlockChecksum(ChecksumType type, const char* data, computed = crc32c::Unmask(computed); } return Status::Corruption( - "block checksum mismatch: stored = " + std::to_string(stored) + - ", computed = " + std::to_string(computed) + + "block checksum mismatch: stored" + + std::string(modifier ? "(context removed)" : "") + " = " + + std::to_string(stored) + ", computed = " + std::to_string(computed) + ", type = " + std::to_string(type) + " in " + file_name + " offset " + std::to_string(offset) + " size " + std::to_string(block_size)); } diff --git a/table/block_based/reader_common.h b/table/block_based/reader_common.h index 102802afe..08c2a756b 100644 --- a/table/block_based/reader_common.h +++ b/table/block_based/reader_common.h @@ -12,6 +12,8 @@ #include "rocksdb/table.h" namespace ROCKSDB_NAMESPACE { +class Footer; + // Release the cached entry and decrement its ref count. extern void ForceReleaseCachedEntry(void* arg, void* h); @@ -22,12 +24,13 @@ inline MemoryAllocator* GetMemoryAllocator( : nullptr; } -// Assumes block has a trailer as in format.h. file_name and offset provided -// for generating a diagnostic message in returned status. +// Assumes block has a trailer past `data + block_size` as in format.h. +// `file_name` provided for generating diagnostic message in returned status. +// `offset` might be required for proper verification (also used for message). // // Returns Status::OK() on checksum match, or Status::Corruption() on checksum // mismatch. -extern Status VerifyBlockChecksum(ChecksumType type, const char* data, +extern Status VerifyBlockChecksum(const Footer& footer, const char* data, size_t block_size, const std::string& file_name, uint64_t offset); diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index b2fa6d4b5..412ac4bde 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -33,9 +33,9 @@ inline void BlockFetcher::ProcessTrailerIfPresent() { if (footer_.GetBlockTrailerSize() > 0) { assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize); if (read_options_.verify_checksums) { - io_status_ = status_to_io_status(VerifyBlockChecksum( - footer_.checksum_type(), slice_.data(), block_size_, - file_->file_name(), handle_.offset())); + io_status_ = status_to_io_status( + VerifyBlockChecksum(footer_, slice_.data(), block_size_, + file_->file_name(), handle_.offset())); RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT); if (!io_status_.ok()) { assert(io_status_.IsCorruption()); diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index c2f6552cc..18109811d 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -107,6 +107,9 @@ class BlockFetcherTest : public testing::Test { Footer footer; ReadFooter(file.get(), &footer); const BlockHandle& index_handle = footer.index_handle(); + // FIXME: index handle will need to come from metaindex for + // format_version >= 6 when that becomes the default + ASSERT_FALSE(index_handle.IsNull()); CompressionType compression_type; FetchBlock(file.get(), index_handle, BlockType::kIndex, diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 7ca72365f..0cf6834af 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -403,8 +403,12 @@ Status CuckooTableBuilder::Finish() { } FooterBuilder footer; - footer.Build(kCuckooTableMagicNumber, /* format_version */ 1, offset, - kNoChecksum, meta_index_block_handle); + Status s = footer.Build(kCuckooTableMagicNumber, /* format_version */ 1, + offset, kNoChecksum, meta_index_block_handle); + if (!s.ok()) { + status_ = s; + return status_; + } io_status_ = file_->Append(footer.GetSlice()); status_ = io_status_; return status_; diff --git a/table/format.cc b/table/format.cc index dc077bd45..8825384f0 100644 --- a/table/format.cc +++ b/table/format.cc @@ -10,6 +10,7 @@ #include "table/format.h" #include +#include #include #include "block_fetcher.h" @@ -18,12 +19,14 @@ #include "monitoring/perf_context_imp.h" #include "monitoring/statistics_impl.h" #include "options/options_helper.h" +#include "port/likely.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "table/persistent_cache_helper.h" +#include "unique_id_impl.h" #include "util/cast_util.h" #include "util/coding.h" #include "util/compression.h" @@ -195,25 +198,41 @@ inline uint8_t BlockTrailerSizeForMagicNumber(uint64_t magic_number) { // -> format_version >= 1 // checksum type (char, 1 byte) // * Part2 +// -> format_version <= 5 // metaindex handle (varint64 offset, varint64 size) // index handle (varint64 offset, varint64 size) // for part2 size = 2 * BlockHandle::kMaxEncodedLength = 40 +// - This padding is unchecked/ignored +// -> format_version >= 6 +// extended magic number (4 bytes) = 0x3e 0x00 0x7a 0x00 +// - Also surely invalid (size 0) handles if interpreted as older version +// - (Helps ensure a corrupted format_version doesn't get us far with no +// footer checksum.) +// footer_checksum (uint32LE, 4 bytes) +// - Checksum of above checksum type of whole footer, with this field +// set to all zeros. +// base_context_checksum (uint32LE, 4 bytes) +// metaindex block size (uint32LE, 4 bytes) +// - Assumed to be immediately before footer, < 4GB +// (24 bytes, reserved for future use) +// - Brings part2 size also to 40 bytes +// - Checked that last eight bytes == 0, so reserved for a future +// incompatible feature (but under format_version=6) // * Part3 // -> format_version == 0 (inferred from legacy magic number) // legacy magic number (8 bytes) // -> format_version >= 1 (inferred from NOT legacy magic number) // format_version (uint32LE, 4 bytes), also called "footer version" // newer magic number (8 bytes) - +const std::array kExtendedMagic{{0x3e, 0x00, 0x7a, 0x00}}; constexpr size_t kFooterPart2Size = 2 * BlockHandle::kMaxEncodedLength; } // namespace -void FooterBuilder::Build(uint64_t magic_number, uint32_t format_version, - uint64_t footer_offset, ChecksumType checksum_type, - const BlockHandle& metaindex_handle, - const BlockHandle& index_handle) { - (void)footer_offset; // Future use - +Status FooterBuilder::Build(uint64_t magic_number, uint32_t format_version, + uint64_t footer_offset, ChecksumType checksum_type, + const BlockHandle& metaindex_handle, + const BlockHandle& index_handle, + uint32_t base_context_checksum) { assert(magic_number != Footer::kNullTableMagicNumber); assert(IsSupportedFormatVersion(format_version)); @@ -249,19 +268,71 @@ void FooterBuilder::Build(uint64_t magic_number, uint32_t format_version, assert(cur + 8 == slice_.data() + slice_.size()); } - { + if (format_version >= 6) { + if (BlockTrailerSizeForMagicNumber(magic_number) != 0) { + // base context checksum required for table formats with block checksums + assert(base_context_checksum != 0); + assert(ChecksumModifierForContext(base_context_checksum, 0) != 0); + } else { + // base context checksum not used + assert(base_context_checksum == 0); + assert(ChecksumModifierForContext(base_context_checksum, 0) == 0); + } + + // Start populating Part 2 + char* cur = data_.data() + /* part 1 size */ 1; + // Set extended magic of part2 + std::copy(kExtendedMagic.begin(), kExtendedMagic.end(), cur); + cur += kExtendedMagic.size(); + // Fill checksum data with zeros (for later computing checksum) + char* checksum_data = cur; + EncodeFixed32(cur, 0); + cur += 4; + // Save base context checksum + EncodeFixed32(cur, base_context_checksum); + cur += 4; + // Compute and save metaindex size + uint32_t metaindex_size = static_cast(metaindex_handle.size()); + if (metaindex_size != metaindex_handle.size()) { + return Status::NotSupported("Metaindex block size > 4GB"); + } + // Metaindex must be adjacent to footer + assert(metaindex_size == 0 || + metaindex_handle.offset() + metaindex_handle.size() == + footer_offset - BlockTrailerSizeForMagicNumber(magic_number)); + EncodeFixed32(cur, metaindex_size); + cur += 4; + + // Zero pad remainder (for future use) + std::fill_n(cur, 24U, char{0}); + assert(cur + 24 == part3); + + // Compute checksum, add context + uint32_t checksum = ComputeBuiltinChecksum( + checksum_type, data_.data(), Footer::kNewVersionsEncodedLength); + checksum += + ChecksumModifierForContext(base_context_checksum, footer_offset); + // Store it + EncodeFixed32(checksum_data, checksum); + } else { + // Base context checksum not used + assert(!FormatVersionUsesContextChecksum(format_version)); + // Should be left empty + assert(base_context_checksum == 0); + assert(ChecksumModifierForContext(base_context_checksum, 0) == 0); + + // Populate all of part 2 char* cur = part2; cur = metaindex_handle.EncodeTo(cur); cur = index_handle.EncodeTo(cur); // Zero pad remainder std::fill(cur, part3, char{0}); } + return Status::OK(); } Status Footer::DecodeFrom(Slice input, uint64_t input_offset, uint64_t enforce_table_magic_number) { - (void)input_offset; // Future use - // Only decode to unused Footer assert(table_magic_number_ == kNullTableMagicNumber); assert(input != nullptr); @@ -284,6 +355,9 @@ Status Footer::DecodeFrom(Slice input, uint64_t input_offset, block_trailer_size_ = BlockTrailerSizeForMagicNumber(magic); // Parse Part3 + const char* part3_ptr = magic_ptr; + uint32_t computed_checksum = 0; + uint64_t footer_offset = 0; if (legacy) { // The size is already asserted to be at least kMinEncodedLength // at the beginning of the function @@ -291,37 +365,101 @@ Status Footer::DecodeFrom(Slice input, uint64_t input_offset, format_version_ = 0 /* legacy */; checksum_type_ = kCRC32c; } else { - const char* part3_ptr = magic_ptr - 4; + part3_ptr = magic_ptr - 4; format_version_ = DecodeFixed32(part3_ptr); - if (!IsSupportedFormatVersion(format_version_)) { + if (UNLIKELY(!IsSupportedFormatVersion(format_version_))) { return Status::Corruption("Corrupt or unsupported format_version: " + std::to_string(format_version_)); } // All known format versions >= 1 occupy exactly this many bytes. - if (input.size() < kNewVersionsEncodedLength) { + if (UNLIKELY(input.size() < kNewVersionsEncodedLength)) { return Status::Corruption("Input is too short to be an SST file"); } uint64_t adjustment = input.size() - kNewVersionsEncodedLength; input.remove_prefix(adjustment); + footer_offset = input_offset + adjustment; // Parse Part1 char chksum = input.data()[0]; checksum_type_ = lossless_cast(chksum); - if (!IsSupportedChecksumType(checksum_type())) { + if (UNLIKELY(!IsSupportedChecksumType(checksum_type()))) { return Status::Corruption("Corrupt or unsupported checksum type: " + std::to_string(lossless_cast(chksum))); } + // This is the most convenient place to compute the checksum + if (checksum_type_ != kNoChecksum && format_version_ >= 6) { + std::array copy_without_checksum; + std::copy_n(input.data(), kNewVersionsEncodedLength, + ©_without_checksum[0]); + EncodeFixed32(©_without_checksum[5], 0); // Clear embedded checksum + computed_checksum = + ComputeBuiltinChecksum(checksum_type(), copy_without_checksum.data(), + kNewVersionsEncodedLength); + } // Consume checksum type field input.remove_prefix(1); } // Parse Part2 - Status result = metaindex_handle_.DecodeFrom(&input); - if (result.ok()) { - result = index_handle_.DecodeFrom(&input); + if (format_version_ >= 6) { + Slice ext_magic(input.data(), 4); + if (UNLIKELY(ext_magic.compare(Slice(kExtendedMagic.data(), + kExtendedMagic.size())) != 0)) { + return Status::Corruption("Bad extended magic number: 0x" + + ext_magic.ToString(/*hex*/ true)); + } + input.remove_prefix(4); + uint32_t stored_checksum = 0, metaindex_size = 0; + bool success; + success = GetFixed32(&input, &stored_checksum); + assert(success); + success = GetFixed32(&input, &base_context_checksum_); + assert(success); + if (UNLIKELY(ChecksumModifierForContext(base_context_checksum_, 0) == 0)) { + return Status::Corruption("Invalid base context checksum"); + } + computed_checksum += + ChecksumModifierForContext(base_context_checksum_, footer_offset); + if (UNLIKELY(computed_checksum != stored_checksum)) { + return Status::Corruption("Footer at " + std::to_string(footer_offset) + + " checksum mismatch"); + } + success = GetFixed32(&input, &metaindex_size); + assert(success); + (void)success; + uint64_t metaindex_end = footer_offset - GetBlockTrailerSize(); + metaindex_handle_ = + BlockHandle(metaindex_end - metaindex_size, metaindex_size); + + // Mark unpopulated + index_handle_ = BlockHandle::NullBlockHandle(); + + // 16 bytes of unchecked reserved padding + input.remove_prefix(16U); + + // 8 bytes of checked reserved padding (expected to be zero unless using a + // future feature). + uint64_t reserved = 0; + success = GetFixed64(&input, &reserved); + assert(success); + if (UNLIKELY(reserved != 0)) { + return Status::NotSupported( + "File uses a future feature not supported in this version"); + } + // End of part 2 + assert(input.data() == part3_ptr); + } else { + // format_version_ < 6 + Status result = metaindex_handle_.DecodeFrom(&input); + if (result.ok()) { + result = index_handle_.DecodeFrom(&input); + } + if (!result.ok()) { + return result; + } + // Padding in part2 is ignored } - return result; - // Padding in part2 is ignored + return Status::OK(); } std::string Footer::ToString() const { diff --git a/table/format.h b/table/format.h index c375165bf..73675381e 100644 --- a/table/format.h +++ b/table/format.h @@ -111,6 +111,40 @@ struct IndexValue { std::string ToString(bool hex, bool have_first_key) const; }; +// Given a file's base_context_checksum and an offset of a block within that +// file, choose a 32-bit value that is as unique as possible. This value will +// be added to the standard checksum to get a checksum "with context," or can +// be subtracted to "remove" context. Returns zero (no modifier) if feature is +// disabled with base_context_checksum == 0. +inline uint32_t ChecksumModifierForContext(uint32_t base_context_checksum, + uint64_t offset) { + // To disable on base_context_checksum == 0, we could write + // `if (base_context_checksum == 0) return 0;` but benchmarking shows + // measurable performance penalty vs. this: compute the modifier + // unconditionally and use an "all or nothing" bit mask to enable + // or disable. + uint32_t all_or_nothing = uint32_t{0} - (base_context_checksum != 0); + + // Desired properties: + // (call this function f(b, o) where b = base and o = offset) + // 1. Fast + // 2. f(b1, o) == f(b2, o) iff b1 == b2 + // (Perfectly preserve base entropy) + // 3. f(b, o1) == f(b, o2) only if o1 == o2 or |o1-o2| >= 4 billion + // (Guaranteed uniqueness for nearby offsets) + // 3. f(b, o + j * 2**32) == f(b, o + k * 2**32) only if j == k + // (Upper bits matter, and *aligned* misplacement fails check) + // 4. f(b1, o) == f(b2, o + x) then preferably not + // f(b1, o + y) == f(b2, o + x + y) + // (Avoid linearly correlated matches) + // 5. f(b, o) == 0 depends on both b and o + // (No predictable overlap with non-context checksums) + uint32_t modifier = + base_context_checksum ^ (Lower32of64(offset) + Upper32of64(offset)); + + return modifier & all_or_nothing; +} + inline uint32_t GetCompressFormatForVersion(uint32_t format_version) { // As of format_version 2, we encode compressed block with // compress_format_version == 2. Before that, the version is 1. @@ -118,18 +152,27 @@ inline uint32_t GetCompressFormatForVersion(uint32_t format_version) { return format_version >= 2 ? 2 : 1; } -constexpr uint32_t kLatestFormatVersion = 5; +constexpr uint32_t kLatestFormatVersion = 6; inline bool IsSupportedFormatVersion(uint32_t version) { return version <= kLatestFormatVersion; } +// Same as having a unique id in footer. +inline bool FormatVersionUsesContextChecksum(uint32_t version) { + return version >= 6; +} + +inline bool FormatVersionUsesIndexHandleInFooter(uint32_t version) { + return version < 6; +} + // Footer encapsulates the fixed information stored at the tail end of every // SST file. In general, it should only include things that cannot go // elsewhere under the metaindex block. For example, checksum_type is // required for verifying metaindex block checksum (when applicable), but -// index block handle can easily go in metaindex block (possible future). -// See also FooterBuilder below. +// index block handle can easily go in metaindex block. See also FooterBuilder +// below. class Footer { public: // Create empty. Populate using DecodeFrom. @@ -137,7 +180,7 @@ class Footer { // Deserialize a footer (populate fields) from `input` and check for various // corruptions. `input_offset` is the offset within the target file of - // `input` buffer (future use). + // `input` buffer, which is needed for verifying format_version >= 6 footer. // If enforce_table_magic_number != 0, will return corruption if table magic // number is not equal to enforce_table_magic_number. Status DecodeFrom(Slice input, uint64_t input_offset, @@ -152,13 +195,17 @@ class Footer { // BBTO::format_version.) uint32_t format_version() const { return format_version_; } + // See ChecksumModifierForContext() + uint32_t base_context_checksum() const { return base_context_checksum_; } + // Block handle for metaindex block. const BlockHandle& metaindex_handle() const { return metaindex_handle_; } // Block handle for (top-level) index block. + // TODO? remove from this struct and only read on decode for legacy cases const BlockHandle& index_handle() const { return index_handle_; } - // Checksum type used in the file. + // Checksum type used in the file, including footer for format version >= 6. ChecksumType checksum_type() const { return static_cast(checksum_type_); } @@ -198,6 +245,7 @@ class Footer { uint64_t table_magic_number_ = kNullTableMagicNumber; uint32_t format_version_ = kInvalidFormatVersion; + uint32_t base_context_checksum_ = 0; BlockHandle metaindex_handle_; BlockHandle index_handle_; int checksum_type_ = kInvalidChecksumType; @@ -219,11 +267,16 @@ class FooterBuilder { // * footer_offset is the file offset where the footer will be written // (for future use). // * checksum_type is for formats using block checksums. - // * index_handle is optional for some kinds of SST files. - void Build(uint64_t table_magic_number, uint32_t format_version, - uint64_t footer_offset, ChecksumType checksum_type, - const BlockHandle& metaindex_handle, - const BlockHandle& index_handle = BlockHandle::NullBlockHandle()); + // * index_handle is optional for some SST kinds and (for caller convenience) + // ignored when format_version >= 6. (Must be added to metaindex in that + // case.) + // * unique_id must be specified if format_vesion >= 6 and SST uses block + // checksums with context. Otherwise, auto-generated if format_vesion >= 6. + Status Build(uint64_t table_magic_number, uint32_t format_version, + uint64_t footer_offset, ChecksumType checksum_type, + const BlockHandle& metaindex_handle, + const BlockHandle& index_handle = BlockHandle::NullBlockHandle(), + uint32_t base_context_checksum = 0); // After Builder, get a Slice for the serialized Footer, backed by this // FooterBuilder. diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index cf756cfff..b82b5962f 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -27,6 +27,8 @@ namespace ROCKSDB_NAMESPACE { const std::string kPropertiesBlockName = "rocksdb.properties"; +// NB: only used with format_version >= 6 +const std::string kIndexBlockName = "rocksdb.index"; // Old property block name for backward compatibility const std::string kPropertiesBlockOldName = "rocksdb.stats"; const std::string kCompressionDictBlockName = "rocksdb.compression_dict"; @@ -395,8 +397,8 @@ Status ReadTablePropertiesHelper( // Modified version of BlockFetcher checksum verification // (See write_global_seqno comment above) if (s.ok() && footer.GetBlockTrailerSize() > 0) { - s = VerifyBlockChecksum(footer.checksum_type(), properties_block.data(), - block_size, file->file_name(), handle.offset()); + s = VerifyBlockChecksum(footer, properties_block.data(), block_size, + file->file_name(), handle.offset()); if (s.IsCorruption()) { if (new_table_properties->external_sst_file_global_seqno_offset != 0) { std::string tmp_buf(properties_block.data(), @@ -405,8 +407,8 @@ Status ReadTablePropertiesHelper( new_table_properties->external_sst_file_global_seqno_offset - handle.offset(); EncodeFixed64(&tmp_buf[static_cast(global_seqno_offset)], 0); - s = VerifyBlockChecksum(footer.checksum_type(), tmp_buf.data(), - block_size, file->file_name(), handle.offset()); + s = VerifyBlockChecksum(footer, tmp_buf.data(), block_size, + file->file_name(), handle.offset()); } } } diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 962a31638..1ed9cf27f 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -32,6 +32,7 @@ struct TableProperties; // Meta block names for metaindex extern const std::string kPropertiesBlockName; +extern const std::string kIndexBlockName; extern const std::string kPropertiesBlockOldName; extern const std::string kCompressionDictBlockName; extern const std::string kRangeDelBlockName; diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index 126098a86..ffa811c3c 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -274,10 +274,11 @@ Status PlainTableBuilder::Finish() { // -- Write property block BlockHandle property_block_handle; - IOStatus s = WriteBlock(property_block_builder.Finish(), file_, &offset_, + io_status_ = WriteBlock(property_block_builder.Finish(), file_, &offset_, &property_block_handle); - if (!s.ok()) { - return static_cast(s); + if (!io_status_.ok()) { + status_ = io_status_; + return status_; } meta_index_builer.Add(kPropertiesBlockName, property_block_handle); @@ -293,8 +294,12 @@ Status PlainTableBuilder::Finish() { // Write Footer // no need to write out new footer if we're using default checksum FooterBuilder footer; - footer.Build(kPlainTableMagicNumber, /* format_version */ 0, offset_, - kNoChecksum, metaindex_block_handle); + Status s = footer.Build(kPlainTableMagicNumber, /* format_version */ 0, + offset_, kNoChecksum, metaindex_block_handle); + if (!s.ok()) { + status_ = s; + return status_; + } io_status_ = file_->Append(footer.GetSlice()); if (io_status_.ok()) { offset_ += footer.GetSlice().size(); diff --git a/table/table_test.cc b/table/table_test.cc index 9610ba767..5b7e4682f 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -4472,11 +4472,12 @@ TEST(TableTest, FooterTests) { BlockHandle index(data_size + 5, index_size); BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size); uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5; + uint32_t base_context_checksum = 123456789; { // legacy block based FooterBuilder footer; - footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0, - footer_offset, kCRC32c, meta_index, index); + ASSERT_OK(footer.Build(kBlockBasedTableMagicNumber, /* format_version */ 0, + footer_offset, kCRC32c, meta_index, index)); Footer decoded_footer; ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); @@ -4486,6 +4487,7 @@ TEST(TableTest, FooterTests) { ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); ASSERT_EQ(decoded_footer.format_version(), 0U); + ASSERT_EQ(decoded_footer.base_context_checksum(), 0U); ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); // Ensure serialized with legacy magic ASSERT_EQ( @@ -4495,9 +4497,11 @@ TEST(TableTest, FooterTests) { // block based, various checksums, various versions for (auto t : GetSupportedChecksums()) { for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) { + uint32_t maybe_bcc = + FormatVersionUsesContextChecksum(fv) ? base_context_checksum : 0U; FooterBuilder footer; - footer.Build(kBlockBasedTableMagicNumber, fv, footer_offset, t, - meta_index, index); + ASSERT_OK(footer.Build(kBlockBasedTableMagicNumber, fv, footer_offset, t, + meta_index, index, maybe_bcc)); Footer decoded_footer; ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), @@ -4506,18 +4510,44 @@ TEST(TableTest, FooterTests) { ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + if (FormatVersionUsesIndexHandleInFooter(fv)) { + ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); + ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + } ASSERT_EQ(decoded_footer.format_version(), fv); ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); + + if (FormatVersionUsesContextChecksum(fv)) { + ASSERT_EQ(decoded_footer.base_context_checksum(), + base_context_checksum); + + // Bad offset should fail footer checksum + decoded_footer = Footer(); + ASSERT_NOK( + decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset - 1)); + } else { + ASSERT_EQ(decoded_footer.base_context_checksum(), 0U); + } + + // Too big metaindex size should also fail encoding only in new footer + uint64_t big_metaindex_size = 0x100000007U; + uint64_t big_footer_offset = + data_size + big_metaindex_size + index_size + 3 * 5; + BlockHandle big_metaindex = + BlockHandle(data_size + index_size + 2 * 5, big_metaindex_size); + ASSERT_NE(footer + .Build(kBlockBasedTableMagicNumber, fv, big_footer_offset, + t, big_metaindex, index, maybe_bcc) + .ok(), + FormatVersionUsesContextChecksum(fv)); } } { // legacy plain table FooterBuilder footer; - footer.Build(kPlainTableMagicNumber, /* format_version */ 0, footer_offset, - kNoChecksum, meta_index); + ASSERT_OK(footer.Build(kPlainTableMagicNumber, /* format_version */ 0, + footer_offset, kNoChecksum, meta_index)); Footer decoded_footer; ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); @@ -4536,8 +4566,8 @@ TEST(TableTest, FooterTests) { { // xxhash plain table (not currently used) FooterBuilder footer; - footer.Build(kPlainTableMagicNumber, /* format_version */ 1, footer_offset, - kxxHash, meta_index); + ASSERT_OK(footer.Build(kPlainTableMagicNumber, /* format_version */ 1, + footer_offset, kxxHash, meta_index)); Footer decoded_footer; ASSERT_OK(decoded_footer.DecodeFrom(footer.GetSlice(), footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); @@ -5211,9 +5241,13 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { } } ASSERT_EQ(kPropertiesBlockName, key_at_max_offset); - // index handle is stored in footer rather than metaindex block, so need - // separate logic to verify it comes before properties block. - ASSERT_GT(max_offset, footer.index_handle().offset()); + if (FormatVersionUsesIndexHandleInFooter(footer.format_version())) { + // If index handle is stored in footer rather than metaindex block, + // need separate logic to verify it comes before properties block. + ASSERT_GT(max_offset, footer.index_handle().offset()); + } else { + ASSERT_TRUE(footer.index_handle().IsNull()); + } c.ResetTableReader(); } diff --git a/test_util/testutil.cc b/test_util/testutil.cc index b128b797a..1e771f4fd 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -39,7 +39,10 @@ namespace test { const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version; const std::set kFooterFormatVersionsToTest{ + // Non-legacy, before big footer changes 5U, + // After big footer changes + 6U, // In case any interesting future changes kDefaultFormatVersion, kLatestFormatVersion, diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 1a3821aa1..01b71136a 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -134,7 +134,7 @@ "verify_checksum": 1, "write_buffer_size": 4 * 1024 * 1024, "writepercent": 35, - "format_version": lambda: random.choice([2, 3, 4, 5, 5]), + "format_version": lambda: random.choice([2, 3, 4, 5, 6, 6]), "index_block_restart_interval": lambda: random.choice(range(1, 16)), "use_multiget": lambda: random.randint(0, 1), "use_get_entity": lambda: random.choice([0] * 7 + [1]), diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index 29d11d4da..481c4b722 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -468,4 +468,3 @@ int main(int argc, char** argv) { RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); } - diff --git a/unreleased_history/new_features/1_context_checksum.md b/unreleased_history/new_features/1_context_checksum.md new file mode 100644 index 000000000..303613cad --- /dev/null +++ b/unreleased_history/new_features/1_context_checksum.md @@ -0,0 +1 @@ +* Added enhanced data integrity checking on SST files with new format_version=6. Performance impact is very small or negligible. Previously if SST data was misplaced or re-arranged by the storage layer, it could pass block checksum with higher than 1 in 4 billion probability. With format_version=6, block checksums depend on what file they are in and location within the file. This way, misplaced SST data is no more likely to pass checksum verification than randomly corrupted data. Also in format_version=6, SST footers are checksum-protected. From bb8fcc00448cc841395663b6a7fc7ed571e86d0f Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Sun, 30 Jul 2023 17:30:01 -0700 Subject: [PATCH 012/386] db_stress: Reinstate Transaction::Rollback() calls before destruction (#11656) Summary: https://github.com/facebook/rocksdb/issues/11653 broke some crash tests. Apparently these Rollbacks are needed for pessimistic transaction cases. (I'm still not sure if the API makes any sense with regard to safe usage. It's certainly not documented. Will consider in follow-up PRs.) Pull Request resolved: https://github.com/facebook/rocksdb/pull/11656 Test Plan: manual crash test runs, crash_test_with_multiops_wc_txn and crash_test_with_multiops_wp_txn Reviewed By: cbi42 Differential Revision: D47906280 Pulled By: pdillinger fbshipit-source-id: d058a01b6dbb47a4f08d199e335364168304f81b --- db_stress_tool/multi_ops_txns_stress.cc | 15 ++++++++++----- db_stress_tool/no_batched_ops_stress.cc | 3 +++ include/rocksdb/utilities/transaction.h | 4 +--- utilities/transactions/pessimistic_transaction.h | 1 - utilities/transactions/write_prepared_txn.h | 1 - 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc index 23850da5c..1591a52e9 100644 --- a/db_stress_tool/multi_ops_txns_stress.cc +++ b/db_stress_tool/multi_ops_txns_stress.cc @@ -572,7 +572,7 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, assert(txn); txn->SetSnapshotOnNextOperation(/*notifier=*/nullptr); - const Defer cleanup([new_a, &s, thread, this]() { + const Defer cleanup([new_a, &s, thread, this, &txn]() { if (s.ok()) { // Two gets, one for existing pk, one for locking potential new pk. thread->stats.AddGets(/*ngets=*/2, /*nfounds=*/1); @@ -594,6 +594,7 @@ Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread, } auto& key_gen = key_gen_for_a_[thread->tid]; key_gen->UndoAllocation(new_a); + txn->Rollback().PermitUncheckedError(); }); ReadOptions ropts; @@ -692,7 +693,7 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, Iterator* it = nullptr; long iterations = 0; - const Defer cleanup([new_c, &s, thread, &it, this, &iterations]() { + const Defer cleanup([new_c, &s, thread, &txn, &it, this, &iterations]() { delete it; if (s.ok()) { thread->stats.AddIterations(iterations); @@ -717,6 +718,7 @@ Status MultiOpsTxnsStressTest::SecondaryKeyUpdateTxn(ThreadState* thread, } auto& key_gen = key_gen_for_c_[thread->tid]; key_gen->UndoAllocation(new_c); + txn->Rollback().PermitUncheckedError(); }); // TODO (yanqin) try SetSnapshotOnNextOperation(). We currently need to take @@ -887,7 +889,7 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, assert(txn); - const Defer cleanup([&s, thread]() { + const Defer cleanup([&s, thread, &txn]() { if (s.ok()) { thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); thread->stats.AddBytesForWrites( @@ -904,6 +906,7 @@ Status MultiOpsTxnsStressTest::UpdatePrimaryIndexValueTxn(ThreadState* thread, } else { thread->stats.AddErrors(1); } + txn->Rollback().PermitUncheckedError(); }); ReadOptions ropts; ropts.rate_limiter_priority = @@ -967,7 +970,7 @@ Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, assert(txn); - const Defer cleanup([&s, thread]() { + const Defer cleanup([&s, thread, &txn]() { if (s.ok()) { thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/1); return; @@ -976,6 +979,7 @@ Status MultiOpsTxnsStressTest::PointLookupTxn(ThreadState* thread, } else { thread->stats.AddErrors(1); } + txn->Rollback().PermitUncheckedError(); }); std::shared_ptr snapshot; @@ -1010,12 +1014,13 @@ Status MultiOpsTxnsStressTest::RangeScanTxn(ThreadState* thread, assert(txn); - const Defer cleanup([&s, thread]() { + const Defer cleanup([&s, thread, &txn]() { if (s.ok()) { thread->stats.AddIterations(1); return; } thread->stats.AddErrors(1); + txn->Rollback().PermitUncheckedError(); }); std::shared_ptr snapshot; diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 31aac13ee..5d0ee2205 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -865,6 +865,9 @@ class NonBatchedOpsStressTest : public StressTest { if (readoptionscopy.snapshot) { db_->ReleaseSnapshot(readoptionscopy.snapshot); } + if (use_txn) { + txn->Rollback().PermitUncheckedError(); + } return statuses; } diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 4ac47ec04..3cdcc9bb2 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -140,9 +140,6 @@ class Transaction { Transaction(const Transaction&) = delete; void operator=(const Transaction&) = delete; - // The transaction is safely discarded on destruction, though must be - // discarded before the DB is closed or destroyed. (Calling Rollback() - // is not necessary before destruction.) virtual ~Transaction() {} // If a transaction has a snapshot set, the transaction will ensure that @@ -263,6 +260,7 @@ class Transaction { std::shared_ptr* snapshot = nullptr); // Discard all batched writes in this transaction. + // FIXME: what happens if this isn't called before destruction? virtual Status Rollback() = 0; // Records the state of the transaction for future calls to diff --git a/utilities/transactions/pessimistic_transaction.h b/utilities/transactions/pessimistic_transaction.h index ffff50974..dfec50d00 100644 --- a/utilities/transactions/pessimistic_transaction.h +++ b/utilities/transactions/pessimistic_transaction.h @@ -308,4 +308,3 @@ class WriteCommittedTxn : public PessimisticTransaction { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/transactions/write_prepared_txn.h b/utilities/transactions/write_prepared_txn.h index f621e37ab..3faf0c9b8 100644 --- a/utilities/transactions/write_prepared_txn.h +++ b/utilities/transactions/write_prepared_txn.h @@ -114,4 +114,3 @@ class WritePreparedTxn : public PessimisticTransaction { }; } // namespace ROCKSDB_NAMESPACE - From 9a2a6db2a9c5e628b38a5c8cceb90e1e5dbc39a4 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Tue, 1 Aug 2023 14:49:06 -0700 Subject: [PATCH 013/386] Use C++17 [[fallthrough]] in transaction_test.cc (#11663) Summary: (Copied from https://www.internalfb.com/diff/D46606060) This diff makes its files safe for use with -Wimplicit-fallthrough. Now that we're using C+20 there's no reason not to use this C++17 feature to make our code safer. It's currently possible to write code like this: ``` switch(x){ case 1: foo1(); case 2: foo2(); break; case 3: foo3(); } ``` But that's scary because we don't know whether the fallthrough from case 1 was intentional or not. The -Wimplicit-fallthrough flag will make this an error. The solution is to either fix the bug by inserting break or indicating intention by using [[fallthrough]]; (from C++17). ``` switch(x){ case 1: foo1(); [[fallthrough]]; // Solution if we intended to fallthrough break; // Solution if we did not intend to fallthrough case 2: foo2(); break; case 3: foo3(); } ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11663 Test Plan: Existing tests Reviewed By: jowlyzhang Differential Revision: D47961248 Pulled By: jaykorean fbshipit-source-id: 0d374c721bf1b328c14949dc5c17693da7311d03 --- utilities/transactions/transaction_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index ebe924fda..b46bac0d9 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -4990,7 +4990,7 @@ TEST_P(TransactionTest, DeleteRangeSupportTest) { } break; case WRITE_PREPARED: - // Intentional fall-through + FALLTHROUGH_INTENDED; case WRITE_UNPREPARED: if (skip_concurrency_control && skip_duplicate_key_check) { ASSERT_OK(s); From 946d1009bc1bd94809d30486e6de514438a3d8a3 Mon Sep 17 00:00:00 2001 From: amatveev-cf <120733177+amatveev-cf@users.noreply.github.com> Date: Wed, 2 Aug 2023 10:53:40 -0700 Subject: [PATCH 014/386] Expand Statistics support in the C API (#11263) Summary: Adds a few missing features to the C API: 1) Statistics level 2) Getting individual values instead of a serialized string Pull Request resolved: https://github.com/facebook/rocksdb/pull/11263 Test Plan: unit tests Reviewed By: ajkr Differential Revision: D47309963 Pulled By: hx235 fbshipit-source-id: 84df59db4045fc0fb3ea4aec451bc5c2afd2a248 --- db/c.cc | 114 +++++++++++++++++++++++++++++++++++++++++--- db/c_test.c | 85 ++++++++++++++++++++++++++++++--- include/rocksdb/c.h | 49 ++++++++++++++++++- 3 files changed, 235 insertions(+), 13 deletions(-) diff --git a/db/c.cc b/db/c.cc index 2ffa714ce..42ddc5217 100644 --- a/db/c.cc +++ b/db/c.cc @@ -77,6 +77,7 @@ using ROCKSDB_NAMESPACE::EnvOptions; using ROCKSDB_NAMESPACE::FileLock; using ROCKSDB_NAMESPACE::FilterPolicy; using ROCKSDB_NAMESPACE::FlushOptions; +using ROCKSDB_NAMESPACE::HistogramData; using ROCKSDB_NAMESPACE::HyperClockCacheOptions; using ROCKSDB_NAMESPACE::InfoLogLevel; using ROCKSDB_NAMESPACE::IngestExternalFileOptions; @@ -279,6 +280,11 @@ struct rocksdb_compactionfiltercontext_t { CompactionFilter::Context rep; }; +struct rocksdb_statistics_histogram_data_t { + rocksdb_statistics_histogram_data_t() : rep() {} + HistogramData rep; +}; + struct rocksdb_compactionfilter_t : public CompactionFilter { void* state_; void (*destructor_)(void*); @@ -3023,6 +3029,29 @@ void rocksdb_options_enable_statistics(rocksdb_options_t* opt) { opt->rep.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); } +void rocksdb_options_set_statistics_level(rocksdb_options_t* opt, int level) { + if (!opt->rep.statistics) { + return; + } + + if (level < rocksdb_statistics_level_disable_all) { + level = rocksdb_statistics_level_disable_all; + } + if (level > rocksdb_statistics_level_all) { + level = rocksdb_statistics_level_all; + } + opt->rep.statistics->set_stats_level( + static_cast(level)); +} + +int rocksdb_options_get_statistics_level(rocksdb_options_t* opt) { + if (!opt->rep.statistics) { + return ROCKSDB_NAMESPACE::StatsLevel::kDisableAll; + } + + return static_cast(opt->rep.statistics->get_stats_level()); +} + void rocksdb_options_set_skip_stats_update_on_db_open(rocksdb_options_t* opt, unsigned char val) { opt->rep.skip_stats_update_on_db_open = val; @@ -3862,6 +3891,26 @@ char* rocksdb_options_statistics_get_string(rocksdb_options_t* opt) { return nullptr; } +uint64_t rocksdb_options_statistics_get_ticker_count(rocksdb_options_t* opt, + uint32_t ticker_type) { + ROCKSDB_NAMESPACE::Statistics* statistics = opt->rep.statistics.get(); + if (statistics) { + return statistics->getTickerCount(ticker_type); + } + return 0; +} + +void rocksdb_options_statistics_get_histogram_data( + rocksdb_options_t* opt, uint32_t type, + rocksdb_statistics_histogram_data_t* const data) { + ROCKSDB_NAMESPACE::Statistics* statistics = opt->rep.statistics.get(); + if (statistics) { + statistics->histogramData(type, &data->rep); + } else { + *data = rocksdb_statistics_histogram_data_t{}; + } +} + void rocksdb_options_set_ratelimiter(rocksdb_options_t* opt, rocksdb_ratelimiter_t* limiter) { if (limiter) { @@ -5194,7 +5243,8 @@ rocksdb_fifo_compaction_options_t* rocksdb_fifo_compaction_options_create() { } void rocksdb_fifo_compaction_options_set_allow_compaction( - rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction) { + rocksdb_fifo_compaction_options_t* fifo_opts, + unsigned char allow_compaction) { fifo_opts->rep.allow_compaction = allow_compaction; } @@ -5623,8 +5673,7 @@ int rocksdb_transactiondb_property_int(rocksdb_transactiondb_t* db, } } -rocksdb_t* rocksdb_transactiondb_get_base_db( - rocksdb_transactiondb_t* txn_db) { +rocksdb_t* rocksdb_transactiondb_get_base_db(rocksdb_transactiondb_t* txn_db) { DB* base_db = txn_db->rep->GetBaseDB(); if (base_db != nullptr) { @@ -5636,9 +5685,7 @@ rocksdb_t* rocksdb_transactiondb_get_base_db( return nullptr; } -void rocksdb_transactiondb_close_base_db(rocksdb_t* base_db) { - delete base_db; -} +void rocksdb_transactiondb_close_base_db(rocksdb_t* base_db) { delete base_db; } rocksdb_transaction_t* rocksdb_transaction_begin( rocksdb_transactiondb_t* txn_db, @@ -6617,4 +6664,59 @@ void rocksdb_enable_manual_compaction(rocksdb_t* db) { db->rep->EnableManualCompaction(); } +rocksdb_statistics_histogram_data_t* +rocksdb_statistics_histogram_data_create() { + return new rocksdb_statistics_histogram_data_t{}; +} + +void rocksdb_statistics_histogram_data_destroy( + rocksdb_statistics_histogram_data_t* data) { + delete data; +} + +double rocksdb_statistics_histogram_data_get_median( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.median; +} + +double rocksdb_statistics_histogram_data_get_p95( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.percentile95; +} + +double rocksdb_statistics_histogram_data_get_p99( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.percentile99; +} + +double rocksdb_statistics_histogram_data_get_average( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.average; +} + +double rocksdb_statistics_histogram_data_get_std_dev( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.standard_deviation; +} + +double rocksdb_statistics_histogram_data_get_max( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.max; +} + +uint64_t rocksdb_statistics_histogram_data_get_count( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.count; +} + +uint64_t rocksdb_statistics_histogram_data_get_sum( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.sum; +} + +double rocksdb_statistics_histogram_data_get_min( + rocksdb_statistics_histogram_data_t* data) { + return data->rep.min; +} + } // end extern "C" diff --git a/db/c_test.c b/db/c_test.c index 80eb90b8a..b21e1aee3 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -3,15 +3,14 @@ found in the LICENSE file. See the AUTHORS file for names of contributors. */ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#include +#include "rocksdb/c.h" #include #include +#include #include #include #include - -#include "rocksdb/c.h" #ifndef OS_WIN #include #endif @@ -2061,6 +2060,15 @@ int main(int argc, char** argv) { CheckCondition(29.0 == rocksdb_options_get_experimental_mempurge_threshold(o)); + CheckCondition(rocksdb_statistics_level_disable_all == + rocksdb_options_get_statistics_level(o)); + rocksdb_options_enable_statistics(o); + CheckCondition(rocksdb_statistics_level_disable_all != + rocksdb_options_get_statistics_level(o)); + rocksdb_options_set_statistics_level(o, rocksdb_statistics_level_all); + CheckCondition(rocksdb_statistics_level_all == + rocksdb_options_get_statistics_level(o)); + /* Blob Options */ rocksdb_options_set_enable_blob_files(o, 1); CheckCondition(1 == rocksdb_options_get_enable_blob_files(o)); @@ -3122,12 +3130,12 @@ int main(int argc, char** argv) { CheckTxnDBGetCF(txn_db, roptions, cfh, "cf_foo", NULL); CheckTxnDBPinGetCF(txn_db, roptions, cfh, "cf_foo", NULL); - - //memory usage + // memory usage rocksdb_t* base_db = rocksdb_transactiondb_get_base_db(txn_db); rocksdb_memory_consumers_t* consumers = rocksdb_memory_consumers_create(); rocksdb_memory_consumers_add_db(consumers, base_db); - rocksdb_memory_usage_t* usage = rocksdb_approximate_memory_usage_create(consumers, &err); + rocksdb_memory_usage_t* usage = + rocksdb_approximate_memory_usage_create(consumers, &err); CheckNoError(err); rocksdb_approximate_memory_usage_destroy(usage); rocksdb_memory_consumers_destroy(consumers); @@ -3614,6 +3622,71 @@ int main(int argc, char** argv) { rocksdb_readoptions_destroy(ropts); } + StartPhase("statistics"); + { + const uint32_t BYTES_WRITTEN_TICKER = 40; + const uint32_t DB_WRITE_HIST = 1; + + rocksdb_statistics_histogram_data_t* hist = + rocksdb_statistics_histogram_data_create(); + { + // zero by default + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_median(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_p95(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_p99(hist)); + CheckCondition(0.0 == + rocksdb_statistics_histogram_data_get_average(hist)); + CheckCondition(0.0 == + rocksdb_statistics_histogram_data_get_std_dev(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_max(hist)); + CheckCondition(0 == rocksdb_statistics_histogram_data_get_count(hist)); + CheckCondition(0 == rocksdb_statistics_histogram_data_get_sum(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_min(hist)); + } + + rocksdb_close(db); + rocksdb_destroy_db(options, dbname, &err); + CheckNoError(err); + + rocksdb_options_enable_statistics(options); + rocksdb_options_set_statistics_level(options, rocksdb_statistics_level_all); + + db = rocksdb_open(options, dbname, &err); + CheckNoError(err); + + CheckCondition(0 == rocksdb_options_statistics_get_ticker_count( + options, BYTES_WRITTEN_TICKER)); + rocksdb_options_statistics_get_histogram_data(options, DB_WRITE_HIST, hist); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_median(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_p95(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_p99(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_average(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_std_dev(hist)); + CheckCondition(0.0 == rocksdb_statistics_histogram_data_get_max(hist)); + CheckCondition(0 == rocksdb_statistics_histogram_data_get_count(hist)); + CheckCondition(0 == rocksdb_statistics_histogram_data_get_sum(hist)); + + int i; + for (i = 0; i < 10; ++i) { + char key = '0' + (char)i; + rocksdb_put(db, woptions, &key, 1, "", 1, &err); + CheckNoError(err); + } + CheckCondition(0 != rocksdb_options_statistics_get_ticker_count( + options, BYTES_WRITTEN_TICKER)); + rocksdb_options_statistics_get_histogram_data(options, DB_WRITE_HIST, hist); + CheckCondition(0.0 != rocksdb_statistics_histogram_data_get_median(hist)); + CheckCondition(0.0 != rocksdb_statistics_histogram_data_get_p95(hist)); + CheckCondition(0.0 != rocksdb_statistics_histogram_data_get_p99(hist)); + CheckCondition(0.0 != rocksdb_statistics_histogram_data_get_average(hist)); + CheckCondition(0.0 != rocksdb_statistics_histogram_data_get_std_dev(hist)); + CheckCondition(0.0 != rocksdb_statistics_histogram_data_get_max(hist)); + CheckCondition(0 != rocksdb_statistics_histogram_data_get_count(hist)); + CheckCondition(0 != rocksdb_statistics_histogram_data_get_sum(hist)); + + rocksdb_statistics_histogram_data_destroy(hist); + } + StartPhase("cancel_all_background_work"); rocksdb_cancel_all_background_work(db, 1); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 407eb4720..6636b592b 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -136,6 +136,8 @@ typedef struct rocksdb_wal_iterator_t rocksdb_wal_iterator_t; typedef struct rocksdb_wal_readoptions_t rocksdb_wal_readoptions_t; typedef struct rocksdb_memory_consumers_t rocksdb_memory_consumers_t; typedef struct rocksdb_memory_usage_t rocksdb_memory_usage_t; +typedef struct rocksdb_statistics_histogram_data_t + rocksdb_statistics_histogram_data_t; /* DB operations */ @@ -1252,6 +1254,22 @@ rocksdb_options_set_max_bytes_for_level_multiplier_additional( rocksdb_options_t*, int* level_values, size_t num_levels); extern ROCKSDB_LIBRARY_API void rocksdb_options_enable_statistics( rocksdb_options_t*); + +enum { + rocksdb_statistics_level_disable_all = 0, + rocksdb_statistics_level_except_tickers = + rocksdb_statistics_level_disable_all, + rocksdb_statistics_level_except_histogram_or_timers = 1, + rocksdb_statistics_level_except_timers = 2, + rocksdb_statistics_level_except_detailed_timers = 3, + rocksdb_statistics_level_except_time_for_mutex = 4, + rocksdb_statistics_level_all = 5, +}; + +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_statistics_level( + rocksdb_options_t*, int level); +extern ROCKSDB_LIBRARY_API int rocksdb_options_get_statistics_level( + rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_skip_stats_update_on_db_open(rocksdb_options_t* opt, unsigned char val); @@ -1328,6 +1346,11 @@ extern ROCKSDB_LIBRARY_API int rocksdb_options_get_prepopulate_blob_cache( /* returns a pointer to a malloc()-ed, null terminated string */ extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string( rocksdb_options_t* opt); +extern ROCKSDB_LIBRARY_API uint64_t rocksdb_options_statistics_get_ticker_count( + rocksdb_options_t* opt, uint32_t ticker_type); +extern ROCKSDB_LIBRARY_API void rocksdb_options_statistics_get_histogram_data( + rocksdb_options_t* opt, uint32_t histogram_type, + rocksdb_statistics_histogram_data_t* const data); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_max_write_buffer_number( rocksdb_options_t*, int); @@ -2257,7 +2280,8 @@ extern ROCKSDB_LIBRARY_API rocksdb_fifo_compaction_options_t* rocksdb_fifo_compaction_options_create(void); extern ROCKSDB_LIBRARY_API void rocksdb_fifo_compaction_options_set_allow_compaction( - rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction); + rocksdb_fifo_compaction_options_t* fifo_opts, + unsigned char allow_compaction); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_fifo_compaction_options_get_allow_compaction( rocksdb_fifo_compaction_options_t* fifo_opts); @@ -2880,6 +2904,29 @@ extern ROCKSDB_LIBRARY_API void rocksdb_disable_manual_compaction( extern ROCKSDB_LIBRARY_API void rocksdb_enable_manual_compaction(rocksdb_t* db); +extern ROCKSDB_LIBRARY_API rocksdb_statistics_histogram_data_t* +rocksdb_statistics_histogram_data_create(void); +extern ROCKSDB_LIBRARY_API void rocksdb_statistics_histogram_data_destroy( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_median( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_p95( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_p99( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_average( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_std_dev( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_max( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API uint64_t rocksdb_statistics_histogram_data_get_count( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API uint64_t rocksdb_statistics_histogram_data_get_sum( + rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_min( + rocksdb_statistics_histogram_data_t* data); + #ifdef __cplusplus } /* end extern "C" */ #endif From f4e4039f007f5b7083ecd3a845e9a6638e2b6cf2 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 2 Aug 2023 11:30:10 -0700 Subject: [PATCH 015/386] Add some more bit operations to internal APIs (#11660) Summary: BottomNBits() - there is a single fast instruction for this on x86 since BMI2, but testing with godbolt indicates you need at least GCC 10 for the compiler to choose that instruction from the obvious C++ code. https://godbolt.org/z/5a7Ysd41h BitwiseAnd() - this is a convenience function that works around the language flaw that the type of the result of x & y is the larger of the two input types, when it should be the smaller. This can save some ugly static_cast. I expect to use both of these in coming HyperClockCache developments, and have applied them in a couple of places in existing code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11660 Test Plan: unit tests added Reviewed By: jowlyzhang Differential Revision: D47935531 Pulled By: pdillinger fbshipit-source-id: d148c43a1e51df4a1c549b93aaf2725a3f8d3bd6 --- cache/clock_cache.h | 3 +- cache/sharded_cache.cc | 2 +- util/core_local.h | 3 +- util/hash_test.cc | 44 +++++++++++++++++++++++++++++ util/math.h | 64 ++++++++++++++++++++++++++++++++++++++---- util/math128.h | 36 +++++++++++++++++++----- 6 files changed, 136 insertions(+), 16 deletions(-) diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 75a7b43a3..7a1caa023 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -24,6 +24,7 @@ #include "rocksdb/cache.h" #include "rocksdb/secondary_cache.h" #include "util/autovector.h" +#include "util/math.h" namespace ROCKSDB_NAMESPACE { @@ -563,7 +564,7 @@ class HyperClockTable : public BaseClockTable { private: // functions // Returns x mod 2^{length_bits_}. inline size_t ModTableSize(uint64_t x) { - return static_cast(x) & length_bits_mask_; + return BitwiseAnd(x, length_bits_mask_); } // Returns the first slot in the probe sequence with a handle e such that diff --git a/cache/sharded_cache.cc b/cache/sharded_cache.cc index cb8555b35..322b59226 100644 --- a/cache/sharded_cache.cc +++ b/cache/sharded_cache.cc @@ -38,7 +38,7 @@ uint32_t DetermineSeed(int32_t hash_seed_option) { return GetSliceHash(hostname) & kSeedMask; } else { // Fall back on something stable within the process. - return static_cast(gen.GetBaseUpper()) & kSeedMask; + return BitwiseAnd(gen.GetBaseUpper(), kSeedMask); } } else { // for kQuasiRandomHashSeed and fallback diff --git a/util/core_local.h b/util/core_local.h index 25174aef8..9c5b3f281 100644 --- a/util/core_local.h +++ b/util/core_local.h @@ -13,6 +13,7 @@ #include "port/likely.h" #include "port/port.h" +#include "util/math.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { @@ -70,7 +71,7 @@ std::pair CoreLocalArray::AccessElementAndIndex() const { // cpu id unavailable, just pick randomly core_idx = Random::GetTLSInstance()->Uniform(1 << size_shift_); } else { - core_idx = static_cast(cpuid & ((1 << size_shift_) - 1)); + core_idx = static_cast(BottomNBits(cpuid, size_shift_)); } return {AccessAtCore(core_idx), core_idx}; } diff --git a/util/hash_test.cc b/util/hash_test.cc index 72112b044..ccc283a24 100644 --- a/util/hash_test.cc +++ b/util/hash_test.cc @@ -565,6 +565,8 @@ size_t FastRange64(uint64_t hash, size_t range) { // Tests for math.h / math128.h (not worth a separate test binary) using ROCKSDB_NAMESPACE::BitParity; using ROCKSDB_NAMESPACE::BitsSetToOne; +using ROCKSDB_NAMESPACE::BitwiseAnd; +using ROCKSDB_NAMESPACE::BottomNBits; using ROCKSDB_NAMESPACE::ConstexprFloorLog2; using ROCKSDB_NAMESPACE::CountTrailingZeroBits; using ROCKSDB_NAMESPACE::DecodeFixed128; @@ -580,6 +582,19 @@ using ROCKSDB_NAMESPACE::Upper64of128; int blah(int x) { return DownwardInvolution(x); } +template +static void test_BitwiseAnd(T1 v1, T2 v2) { + auto a = BitwiseAnd(v1, v2); + // Essentially repeating the implementation :-/ + if constexpr (sizeof(T1) < sizeof(T2)) { + static_assert(std::is_same_v); + EXPECT_EQ(a, static_cast(v1 & v2)); + } else { + static_assert(std::is_same_v); + EXPECT_EQ(a, static_cast(v1 & v2)); + } +} + template static void test_BitOps() { // This complex code is to generalize to 128-bit values. Otherwise @@ -598,6 +613,22 @@ static void test_BitOps() { // If we could directly use arithmetic: // T vm1 = static_cast(v - 1); + // BottomNBits + { + // An essentially full length value + T x = everyOtherBit; + if (i > 2) { + // Make it slightly irregular + x = x ^ (T{1} << (i / 2)); + } + auto a = BottomNBits(x, i); + auto b = BottomNBits(~x, i); + EXPECT_EQ(x | a, x); + EXPECT_EQ(a | b, vm1); + EXPECT_EQ(a & b, T{0}); + EXPECT_EQ(BottomNBits(x ^ a, i), T{0}); + } + // FloorLog2 if (v > 0) { EXPECT_EQ(FloorLog2(v), i); @@ -707,9 +738,22 @@ static void test_BitOps() { } } + // BitwiseAnd + { + test_BitwiseAnd(vm1, static_cast(0x99)); + test_BitwiseAnd(v, static_cast(0x99)); + test_BitwiseAnd(char{0x66}, vm1); + test_BitwiseAnd(char{0x66}, v); + test_BitwiseAnd(v, int16_t{0x6699}); + test_BitwiseAnd(v, uint16_t{0x9966}); + test_BitwiseAnd(int64_t{0x1234234534564567}, v); + test_BitwiseAnd(uint64_t{0x9876876576545432}, v); + } + vm1 = (vm1 << 1) | 1; } + // ConstexprFloorLog2 EXPECT_EQ(ConstexprFloorLog2(T{1}), 0); EXPECT_EQ(ConstexprFloorLog2(T{2}), 1); EXPECT_EQ(ConstexprFloorLog2(T{3}), 1); diff --git a/util/math.h b/util/math.h index 39f308328..e1948e0a3 100644 --- a/util/math.h +++ b/util/math.h @@ -9,6 +9,9 @@ #ifdef _MSC_VER #include #endif +#ifdef __BMI2__ +#include +#endif #include #include @@ -20,11 +23,33 @@ ASSERT_FEATURE_COMPAT_HEADER(); namespace ROCKSDB_NAMESPACE { +// Fast implementation of extracting the bottom n bits of an integer. +// To ensure fast implementation, undefined if n bits is full width or more. +template +inline T BottomNBits(T v, int nbits) { + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + assert(nbits >= 0); + assert(nbits < int{8 * sizeof(T)}); +#ifdef __BMI2__ + if constexpr (sizeof(T) <= 4) { + return static_cast(_bzhi_u32(static_cast(v), nbits)); + } + if constexpr (sizeof(T) <= 8) { + return static_cast(_bzhi_u64(static_cast(v), nbits)); + } +#endif + // Newer compilers compile this down to bzhi on x86, but some older + // ones don't, thus the need for the intrinsic above. + return static_cast(v & ((T{1} << nbits) - 1)); +} + // Fast implementation of floor(log2(v)). Undefined for 0 or negative // numbers (in case of signed type). template inline int FloorLog2(T v) { - static_assert(std::is_integral::value, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); assert(v > 0); #ifdef _MSC_VER static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); @@ -63,6 +88,8 @@ inline int FloorLog2(T v) { // Constexpr version of FloorLog2 template constexpr int ConstexprFloorLog2(T v) { + // NOTE: not checking is_integral so that this works with Unsigned128 + static_assert(!std::is_reference_v, "use std::remove_reference_t"); int rv = 0; while (v > T{1}) { ++rv; @@ -74,7 +101,8 @@ constexpr int ConstexprFloorLog2(T v) { // Number of low-order zero bits before the first 1 bit. Undefined for 0. template inline int CountTrailingZeroBits(T v) { - static_assert(std::is_integral::value, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); assert(v != 0); #ifdef _MSC_VER static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); @@ -115,6 +143,9 @@ namespace detail { template int BitsSetToOneFallback(T v) { + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + const int kBits = static_cast(sizeof(T)) * 8; static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits"); // we static_cast these bit patterns in order to truncate them to the correct @@ -140,7 +171,9 @@ int BitsSetToOneFallback(T v) { // Number of bits set to 1. Also known as "population count". template inline int BitsSetToOne(T v) { - static_assert(std::is_integral::value, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + #ifdef _MSC_VER static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); if (sizeof(T) < sizeof(uint32_t)) { @@ -192,7 +225,9 @@ inline int BitsSetToOne(T v) { template inline int BitParity(T v) { - static_assert(std::is_integral::value, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + #ifdef _MSC_VER // bit parity == oddness of popcount return BitsSetToOne(v) & 1; @@ -214,7 +249,8 @@ inline int BitParity(T v) { // encode/decode big endian. template inline T EndianSwapValue(T v) { - static_assert(std::is_integral::value, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); #ifdef _MSC_VER if (sizeof(T) == 2) { @@ -244,6 +280,9 @@ inline T EndianSwapValue(T v) { // Reverses the order of bits in an integral value template inline T ReverseBits(T v) { + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + T r = EndianSwapValue(v); const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8); const T kEveryByte = kHighestByte | (kHighestByte / 255); @@ -277,7 +316,8 @@ inline T ReverseBits(T v) { // is that all square sub-matrices that include the top row are invertible. template inline T DownwardInvolution(T v) { - static_assert(std::is_integral::value, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); static_assert(sizeof(T) <= 8, "only supported up to 64 bits"); uint64_t r = static_cast(v); @@ -296,4 +336,16 @@ inline T DownwardInvolution(T v) { return static_cast(r); } +// Bitwise-And with typing that allows you to avoid writing an explicit cast +// to the smaller type, or the type of the right parameter if same size. +template +inline std::conditional_t BitwiseAnd(A a, B b) { + static_assert(std::is_integral_v, "non-integral type"); + static_assert(std::is_integral_v, "non-integral type"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + static_assert(!std::is_reference_v, "use std::remove_reference_t"); + using Smaller = std::conditional_t; + return static_cast(a & b); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/util/math128.h b/util/math128.h index ae490051a..5f96dbc66 100644 --- a/util/math128.h +++ b/util/math128.h @@ -41,13 +41,13 @@ struct Unsigned128 { hi = upper; } - explicit operator uint64_t() { return lo; } - - explicit operator uint32_t() { return static_cast(lo); } - - explicit operator uint16_t() { return static_cast(lo); } - - explicit operator uint8_t() { return static_cast(lo); } + // Convert to any integer 64 bits or less. + template && + sizeof(T) <= sizeof(uint64_t)> > + explicit operator T() { + return static_cast(lo); + } }; inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) { @@ -190,6 +190,16 @@ inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) { #endif } +template <> +inline Unsigned128 BottomNBits(Unsigned128 v, int nbits) { + if (nbits < 64) { + return BottomNBits(Lower64of128(v), nbits); + } else { + return (Unsigned128{BottomNBits(Upper64of128(v), nbits - 64)} << 64) | + Lower64of128(v); + } +} + template <> inline int FloorLog2(Unsigned128 v) { if (Upper64of128(v) == 0) { @@ -236,6 +246,18 @@ inline Unsigned128 DownwardInvolution(Unsigned128 v) { DownwardInvolution(Upper64of128(v) ^ Lower64of128(v)); } +template +inline std::remove_reference_t BitwiseAnd(A a, Unsigned128 b) { + static_assert(sizeof(A) <= sizeof(Unsigned128)); + return static_cast(a & b); +} + +template +inline std::remove_reference_t BitwiseAnd(Unsigned128 a, B b) { + static_assert(sizeof(B) <= sizeof(Unsigned128)); + return static_cast(a & b); +} + template struct IsUnsignedUpTo128 : std::integral_constant::value || From cf95821fb6b18b30edb56098da3afccdf2c88916 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 2 Aug 2023 12:34:11 -0700 Subject: [PATCH 016/386] Update for 8.5.fb branch cut (#11642) Summary: Updated the main branch for the 8.5.fb branch cut. Also made unreleased_history/release.sh backdate to the last commit instead of the current date in case the release manager is a laggard like myself. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11642 Reviewed By: cbi42 Differential Revision: D47783574 Pulled By: ajkr fbshipit-source-id: 4e2a80f5ccd542dc7dd0d22dfd7e59cb136325a1 --- HISTORY.md | 15 +++++++++++++++ include/rocksdb/version.h | 2 +- tools/check_format_compatible.sh | 2 +- .../fifo_ttl_periodic_compaction.md | 1 - .../fs_prefetch_compaction_read.md | 1 - unreleased_history/bug_fixes/fsbuffer_bug_fix.md | 1 - .../avoid_memcpy_directio.md | 1 - .../performance_improvements/hcc_perf | 1 - .../public_api_changes/rename_migration_caches.md | 1 - unreleased_history/release.sh | 2 +- 10 files changed, 18 insertions(+), 9 deletions(-) delete mode 100644 unreleased_history/behavior_changes/fifo_ttl_periodic_compaction.md delete mode 100644 unreleased_history/behavior_changes/fs_prefetch_compaction_read.md delete mode 100644 unreleased_history/bug_fixes/fsbuffer_bug_fix.md delete mode 100644 unreleased_history/performance_improvements/avoid_memcpy_directio.md delete mode 100644 unreleased_history/performance_improvements/hcc_perf delete mode 100644 unreleased_history/public_api_changes/rename_migration_caches.md diff --git a/HISTORY.md b/HISTORY.md index 41d4b8453..71b91d926 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,21 @@ # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` +## 8.5.0 (07/21/2023) +### Public API Changes +* Removed recently added APIs `GeneralCache` and `MakeSharedGeneralCache()` as our plan changed to stop exposing a general-purpose cache interface. The old forms of these APIs, `Cache` and `NewLRUCache()`, are still available, although general-purpose caching support will be dropped eventually. + +### Behavior Changes +* Option `periodic_compaction_seconds` no longer supports FIFO compaction: setting it has no effect on FIFO compactions. FIFO compaction users should only set option `ttl` instead. +* Move prefetching responsibility to page cache for compaction read for non directIO use case + +### Performance Improvements +* In case of direct_io, if buffer passed by callee is already aligned, RandomAccessFileRead::Read will avoid realloacting a new buffer, reducing memcpy and use already passed aligned buffer. +* Small efficiency improvement to HyperClockCache by reducing chance of compiler-generated heap allocations + +### Bug Fixes +* Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest. Right now it's an experimental feature. + ## 8.4.0 (06/26/2023) ### New Features * Add FSReadRequest::fs_scratch which is a data buffer allocated and provided by underlying FileSystem to RocksDB during reads, when FS wants to provide its own buffer with data instead of using RocksDB provided FSReadRequest::scratch. This can help in cpu optimization by avoiding copy from file system's buffer to RocksDB buffer. More details on how to use/enable it in file_system.h. Right now its supported only for MultiReads(async + sync) with non direct io. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index 1b934a79f..de6629d80 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -12,7 +12,7 @@ // NOTE: in 'main' development branch, this should be the *next* // minor or major version number planned for release. #define ROCKSDB_MAJOR 8 -#define ROCKSDB_MINOR 5 +#define ROCKSDB_MINOR 6 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with diff --git a/tools/check_format_compatible.sh b/tools/check_format_compatible.sh index 6403b2675..1282d1375 100755 --- a/tools/check_format_compatible.sh +++ b/tools/check_format_compatible.sh @@ -125,7 +125,7 @@ EOF # To check for DB forward compatibility with loading options (old version # reading data from new), as well as backward compatibility -declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb") +declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb") # To check for DB forward compatibility without loading options (in addition # to the "with loading options" set), as well as backward compatibility declare -a db_forward_no_options_refs=() # N/A at the moment diff --git a/unreleased_history/behavior_changes/fifo_ttl_periodic_compaction.md b/unreleased_history/behavior_changes/fifo_ttl_periodic_compaction.md deleted file mode 100644 index 6297ccc91..000000000 --- a/unreleased_history/behavior_changes/fifo_ttl_periodic_compaction.md +++ /dev/null @@ -1 +0,0 @@ -Option `periodic_compaction_seconds` no longer supports FIFO compaction: setting it has no effect on FIFO compactions. FIFO compaction users should only set option `ttl` instead. \ No newline at end of file diff --git a/unreleased_history/behavior_changes/fs_prefetch_compaction_read.md b/unreleased_history/behavior_changes/fs_prefetch_compaction_read.md deleted file mode 100644 index 0552a57e0..000000000 --- a/unreleased_history/behavior_changes/fs_prefetch_compaction_read.md +++ /dev/null @@ -1 +0,0 @@ -Move prefetching responsibility to page cache for compaction read for non directIO use case diff --git a/unreleased_history/bug_fixes/fsbuffer_bug_fix.md b/unreleased_history/bug_fixes/fsbuffer_bug_fix.md deleted file mode 100644 index bec91bc4f..000000000 --- a/unreleased_history/bug_fixes/fsbuffer_bug_fix.md +++ /dev/null @@ -1 +0,0 @@ -Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest. Right now it's an experimental feature. diff --git a/unreleased_history/performance_improvements/avoid_memcpy_directio.md b/unreleased_history/performance_improvements/avoid_memcpy_directio.md deleted file mode 100644 index d5ac0b911..000000000 --- a/unreleased_history/performance_improvements/avoid_memcpy_directio.md +++ /dev/null @@ -1 +0,0 @@ -In case of direct_io, if buffer passed by callee is already aligned, RandomAccessFileRead::Read will avoid realloacting a new buffer, reducing memcpy and use already passed aligned buffer. diff --git a/unreleased_history/performance_improvements/hcc_perf b/unreleased_history/performance_improvements/hcc_perf deleted file mode 100644 index c129393dc..000000000 --- a/unreleased_history/performance_improvements/hcc_perf +++ /dev/null @@ -1 +0,0 @@ -Small efficiency improvement to HyperClockCache by reducing chance of compiler-generated heap allocations diff --git a/unreleased_history/public_api_changes/rename_migration_caches.md b/unreleased_history/public_api_changes/rename_migration_caches.md deleted file mode 100644 index 3db59947d..000000000 --- a/unreleased_history/public_api_changes/rename_migration_caches.md +++ /dev/null @@ -1 +0,0 @@ -Removed recently added APIs `GeneralCache` and `MakeSharedGeneralCache()` as our plan changed to stop exposing a general-purpose cache interface. The old forms of these APIs, `Cache` and `NewLRUCache()`, are still available, although general-purpose caching support will be dropped eventually. diff --git a/unreleased_history/release.sh b/unreleased_history/release.sh index 07f3a92f9..1f50f51b0 100755 --- a/unreleased_history/release.sh +++ b/unreleased_history/release.sh @@ -31,7 +31,7 @@ awk '/#define ROCKSDB_MAJOR/ { major = $3 } /#define ROCKSDB_MINOR/ { minor = $3 } /#define ROCKSDB_PATCH/ { patch = $3 } END { printf "## " major "." minor "." patch }' < include/rocksdb/version.h >> HISTORY.new -echo " (`date +%x`)" >> HISTORY.new +echo " (`git log -n1 --date=format:"%m/%d/%Y" --format="%ad"`)" >> HISTORY.new function process_file () { # use awk to correct From f9de217353f2d45f06fe5b9eab50b191f1a2d7a2 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 2 Aug 2023 13:19:20 -0700 Subject: [PATCH 017/386] Some cache_bench enhancements (#11661) Summary: ... used in validating some HyperClockCache development in progress. * Revamp the "populate cache" step to avoid redundant insertions (very rare in practice) and more consistently approach the desired resident_ratio while maintaining appropriate skew (still not perfect). * Track and print hit ratio on lookups, to ensure a fair comparison is happening between implementations etc. * Add an option to disable tracking and printing histograms (lots of output) * Add an option to specify a random seed (for more reproducibility) * Remove confusing/redundant "-skewed" option Uses BitwiseAnd from https://github.com/facebook/rocksdb/issues/11660 (tested there) Pull Request resolved: https://github.com/facebook/rocksdb/pull/11661 Test Plan: manual Reviewed By: akankshamahajan15, jowlyzhang Differential Revision: D47937671 Pulled By: pdillinger fbshipit-source-id: 85a2bb881b1bca4f63e015bac684105fd91c9f35 --- cache/cache_bench_tool.cc | 175 +++++++++++++++++++++++++------------- 1 file changed, 116 insertions(+), 59 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index 61b12b19e..fd1b44d16 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -50,7 +50,7 @@ DEFINE_double(resident_ratio, 0.25, DEFINE_uint64(ops_per_thread, 2000000U, "Number of operations per thread."); DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added."); -DEFINE_uint32(skew, 5, "Degree of skew in key selection"); +DEFINE_uint32(skew, 5, "Degree of skew in key selection. 0 = no skew"); DEFINE_bool(populate_cache, true, "Populate cache before operations"); DEFINE_uint32(lookup_insert_percent, 87, @@ -71,7 +71,6 @@ DEFINE_uint32( DEFINE_uint32(gather_stats_entries_per_lock, 256, "For Cache::ApplyToAllEntries"); -DEFINE_bool(skewed, false, "If true, skew the key access distribution"); DEFINE_bool(lean, false, "If true, no additional computation is performed besides cache " @@ -81,6 +80,11 @@ DEFINE_bool(early_exit, false, "Exit before deallocating most memory. Good for malloc stats, e.g." "MALLOC_CONF=\"stats_print:true\""); +DEFINE_bool(histograms, true, + "Whether to track and print histogram statistics."); + +DEFINE_uint32(seed, 0, "Hashing/random seed to use. 0 = choose at random"); + DEFINE_string(secondary_cache_uri, "", "Full URI for creating a custom secondary cache object"); static class std::shared_ptr secondary_cache; @@ -149,9 +153,6 @@ class SharedState { public: explicit SharedState(CacheBench* cache_bench) : cv_(&mu_), - num_initialized_(0), - start_(false), - num_done_(0), cache_bench_(cache_bench) {} ~SharedState() {} @@ -174,15 +175,27 @@ class SharedState { bool Started() const { return start_; } + void AddLookupStats(uint64_t hits, uint64_t misses) { + MutexLock l(&mu_); + lookup_count_ += hits + misses; + lookup_hits_ += hits; + } + + double GetLookupHitRatio() const { + return 1.0 * lookup_hits_ / lookup_count_; + } + private: port::Mutex mu_; port::CondVar cv_; - uint64_t num_initialized_; - bool start_; - uint64_t num_done_; - CacheBench* cache_bench_; + + uint64_t num_initialized_ = 0; + bool start_ = false; + uint64_t num_done_ = 0; + uint64_t lookup_count_ = 0; + uint64_t lookup_hits_ = 0; }; // Per-thread state for concurrent executions of the same benchmark. @@ -194,27 +207,19 @@ struct ThreadState { uint64_t duration_us = 0; ThreadState(uint32_t index, SharedState* _shared) - : tid(index), rnd(1000 + index), shared(_shared) {} + : tid(index), rnd(FLAGS_seed + 1 + index), shared(_shared) {} }; struct KeyGen { char key_data[27]; - Slice GetRand(Random64& rnd, uint64_t max_key, int max_log) { - uint64_t key = 0; - if (!FLAGS_skewed) { - uint64_t raw = rnd.Next(); - // Skew according to setting - for (uint32_t i = 0; i < FLAGS_skew; ++i) { - raw = std::min(raw, rnd.Next()); - } - key = FastRange64(raw, max_key); - } else { - key = rnd.Skewed(max_log); - if (key > max_key) { - key -= max_key; - } + Slice GetRand(Random64& rnd, uint64_t max_key, uint32_t skew) { + uint64_t raw = rnd.Next(); + // Skew according to setting + for (uint32_t i = 0; i < skew; ++i) { + raw = std::min(raw, rnd.Next()); } + uint64_t key = FastRange64(raw, max_key); // Variable size and alignment size_t off = key % 8; key_data[0] = char{42}; @@ -285,31 +290,25 @@ class CacheBench { lookup_threshold_(insert_threshold_ + kHundredthUint64 * FLAGS_lookup_percent), erase_threshold_(lookup_threshold_ + - kHundredthUint64 * FLAGS_erase_percent), - skewed_(FLAGS_skewed) { + kHundredthUint64 * FLAGS_erase_percent) { if (erase_threshold_ != 100U * kHundredthUint64) { fprintf(stderr, "Percentages must add to 100.\n"); exit(1); } - max_log_ = 0; - if (skewed_) { - uint64_t max_key = max_key_; - while (max_key >>= 1) max_log_++; - if (max_key > (static_cast(1) << max_log_)) max_log_++; - } - if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); } else if (FLAGS_cache_type == "hyper_clock_cache") { - cache_ = HyperClockCacheOptions(FLAGS_cache_size, FLAGS_value_bytes, - FLAGS_num_shard_bits) - .MakeSharedCache(); + HyperClockCacheOptions opts(FLAGS_cache_size, FLAGS_value_bytes, + FLAGS_num_shard_bits); + opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); + cache_ = opts.MakeSharedCache(); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false /* strict_capacity_limit */, 0.5 /* high_pri_pool_ratio */); + opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); if (!FLAGS_secondary_cache_uri.empty()) { Status s = SecondaryCache::CreateFromString( ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); @@ -333,13 +332,50 @@ class CacheBench { ~CacheBench() {} void PopulateCache() { - Random64 rnd(1); + Random64 rnd(FLAGS_seed); KeyGen keygen; - for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) { - Status s = cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_), - createValue(rnd), &helper1, FLAGS_value_bytes); + size_t max_occ = 0; + size_t inserts_since_max_occ_increase = 0; + size_t keys_since_last_not_found = 0; + + // Avoid redundant insertions by checking Lookup before Insert. + // Loop until insertions consistently fail to increase max occupancy or + // it becomes difficult to find keys not already inserted. + while (inserts_since_max_occ_increase < 100 && + keys_since_last_not_found < 100) { + Slice key = keygen.GetRand(rnd, max_key_, FLAGS_skew); + + Cache::Handle* handle = cache_->Lookup(key); + if (handle != nullptr) { + cache_->Release(handle); + ++keys_since_last_not_found; + continue; + } + keys_since_last_not_found = 0; + + Status s = + cache_->Insert(key, createValue(rnd), &helper1, FLAGS_value_bytes); assert(s.ok()); + + handle = cache_->Lookup(key); + if (!handle) { + fprintf(stderr, "Failed to lookup key just inserted.\n"); + assert(false); + exit(42); + } else { + cache_->Release(handle); + } + + size_t occ = cache_->GetOccupancyCount(); + if (occ > max_occ) { + max_occ = occ; + inserts_since_max_occ_increase = 0; + } else { + ++inserts_since_max_occ_increase; + } } + printf("Population complete (%zu entries, %g average charge)\n", max_occ, + 1.0 * FLAGS_cache_size / max_occ); } bool Run() { @@ -398,18 +434,21 @@ class CacheBench { FLAGS_ops_per_thread / elapsed_secs); printf("Thread ops/sec = %u\n", ops_per_sec); - printf("\nOperation latency (ns):\n"); - HistogramImpl combined; - for (uint32_t i = 0; i < FLAGS_threads; i++) { - combined.Merge(threads[i]->latency_ns_hist); - } - printf("%s", combined.ToString().c_str()); + printf("Lookup hit ratio: %g\n", shared.GetLookupHitRatio()); - if (FLAGS_gather_stats) { - printf("\nGather stats latency (us):\n"); - printf("%s", stats_hist.ToString().c_str()); - } + if (FLAGS_histograms) { + printf("\nOperation latency (ns):\n"); + HistogramImpl combined; + for (uint32_t i = 0; i < FLAGS_threads; i++) { + combined.Merge(threads[i]->latency_ns_hist); + } + printf("%s", combined.ToString().c_str()); + if (FLAGS_gather_stats) { + printf("\nGather stats latency (us):\n"); + printf("%s", stats_hist.ToString().c_str()); + } + } printf("\n%s", stats_report.c_str()); return true; @@ -423,8 +462,6 @@ class CacheBench { const uint64_t insert_threshold_; const uint64_t lookup_threshold_; const uint64_t erase_threshold_; - const bool skewed_; - int max_log_; // A benchmark version of gathering stats on an active block cache by // iterating over it. The primary purpose is to measure the impact of @@ -494,13 +531,17 @@ class CacheBench { // Something slightly more expensive as in stats by category helpers.insert(helper); }; - timer.Start(); + if (FLAGS_histograms) { + timer.Start(); + } Cache::ApplyToAllEntriesOptions opts; opts.average_entries_per_lock = FLAGS_gather_stats_entries_per_lock; shared->GetCacheBench()->cache_->ApplyToAllEntries(fn, opts); table_occupancy = shared->GetCacheBench()->cache_->GetOccupancyCount(); table_size = shared->GetCacheBench()->cache_->GetTableAddressCount(); - stats_hist->Add(timer.ElapsedNanos() / 1000); + if (FLAGS_histograms) { + stats_hist->Add(timer.ElapsedNanos() / 1000); + } } } @@ -531,6 +572,8 @@ class CacheBench { void OperateCache(ThreadState* thread) { // To use looked-up values uint64_t result = 0; + uint64_t lookup_misses = 0; + uint64_t lookup_hits = 0; // To hold handles for a non-trivial amount of time Cache::Handle* handle = nullptr; KeyGen gen; @@ -539,10 +582,12 @@ class CacheBench { StopWatchNano timer(clock); for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) { - Slice key = gen.GetRand(thread->rnd, max_key_, max_log_); + Slice key = gen.GetRand(thread->rnd, max_key_, FLAGS_skew); uint64_t random_op = thread->rnd.Next(); - timer.Start(); + if (FLAGS_histograms) { + timer.Start(); + } if (random_op < lookup_insert_threshold_) { if (handle) { @@ -553,12 +598,14 @@ class CacheBench { handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, Cache::Priority::LOW); if (handle) { + ++lookup_hits; if (!FLAGS_lean) { // do something with the data result += NPHash64(static_cast(cache_->Value(handle)), FLAGS_value_bytes); } } else { + ++lookup_misses; // do insert Status s = cache_->Insert(key, createValue(thread->rnd), &helper2, FLAGS_value_bytes, &handle); @@ -582,11 +629,14 @@ class CacheBench { handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, Cache::Priority::LOW); if (handle) { + ++lookup_hits; if (!FLAGS_lean) { // do something with the data result += NPHash64(static_cast(cache_->Value(handle)), FLAGS_value_bytes); } + } else { + ++lookup_misses; } } else if (random_op < erase_threshold_) { // do erase @@ -595,7 +645,10 @@ class CacheBench { // Should be extremely unlikely (noop) assert(random_op >= kHundredthUint64 * 100U); } - thread->latency_ns_hist.Add(timer.ElapsedNanos()); + if (FLAGS_histograms) { + thread->latency_ns_hist.Add(timer.ElapsedNanos()); + } + thread->shared->AddLookupStats(lookup_hits, lookup_misses); } if (FLAGS_early_exit) { MutexLock l(thread->shared->GetMutex()); @@ -621,6 +674,7 @@ class CacheBench { #ifndef NDEBUG printf("WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); #endif + printf("----------------------------\n"); printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion); printf("DMutex impl name : %s\n", DMutex::kName()); printf("Number of threads : %u\n", FLAGS_threads); @@ -960,11 +1014,14 @@ int cache_bench_tool(int argc, char** argv) { exit(1); } + if (FLAGS_seed == 0) { + FLAGS_seed = static_cast(port::GetProcessID()); + printf("Using seed = %" PRIu32 "\n", FLAGS_seed); + } + ROCKSDB_NAMESPACE::CacheBench bench; if (FLAGS_populate_cache) { bench.PopulateCache(); - printf("Population complete\n"); - printf("----------------------------\n"); } if (bench.Run()) { return 0; From 87a21d08fe606055ed79144b725445c38b4e0ae2 Mon Sep 17 00:00:00 2001 From: Vardhan Date: Wed, 2 Aug 2023 19:58:56 -0700 Subject: [PATCH 018/386] Add an option to trigger flush when the number of range deletions reach a threshold (#11358) Summary: Add a mutable column family option `memtable_max_range_deletions`. When non-zero, RocksDB will try to flush the current memtable after it has at least `memtable_max_range_deletions` range deletions. Java API is added and crash test is updated accordingly to randomly enable this option. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11358 Test Plan: * New unit test: `DBRangeDelTest.MemtableMaxRangeDeletions` * Ran crash test `python3 ./tools/db_crashtest.py whitebox --simple --memtable_max_range_deletions=20` and saw logs showing flushed memtables usually with 20 range deletions. Reviewed By: ajkr Differential Revision: D46582680 Pulled By: cbi42 fbshipit-source-id: f23d6fa8d8264ecf0a18d55c113ba03f5e2504da --- db/db_range_del_test.cc | 36 ++++++++++++++ db/flush_job.cc | 8 +++- db/memtable.cc | 19 +++++++- db/memtable.h | 17 +++++++ db_stress_tool/db_stress_common.h | 2 + db_stress_tool/db_stress_gflags.cc | 4 ++ db_stress_tool/db_stress_test_base.cc | 2 + include/rocksdb/options.h | 11 +++++ java/rocksjni/options.cc | 47 +++++++++++++++++++ .../java/org/rocksdb/ColumnFamilyOptions.java | 13 +++++ .../rocksdb/ColumnFamilyOptionsInterface.java | 17 +++++++ java/src/main/java/org/rocksdb/Options.java | 13 +++++ .../org/rocksdb/ColumnFamilyOptionsTest.java | 10 ++++ .../test/java/org/rocksdb/OptionsTest.java | 10 ++++ options/cf_options.cc | 5 ++ options/cf_options.h | 7 ++- options/options.cc | 4 +- options/options_helper.cc | 1 + options/options_settable_test.cc | 3 +- options/options_test.cc | 4 ++ tools/db_crashtest.py | 1 + .../new_features/memetable_range_del_limit.md | 1 + 22 files changed, 229 insertions(+), 6 deletions(-) create mode 100644 unreleased_history/new_features/memetable_range_del_limit.md diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 5abb7dd2b..bb75592c7 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -3475,6 +3475,42 @@ TEST_F(DBRangeDelTest, NonBottommostCompactionDropRangetombstone) { db_->ReleaseSnapshot(snapshot); } +TEST_F(DBRangeDelTest, MemtableMaxRangeDeletions) { + // Tests option `memtable_max_range_deletions`. + Options options = CurrentOptions(); + options.level_compaction_dynamic_file_size = false; + options.memtable_max_range_deletions = 50; + options.level0_file_num_compaction_trigger = 5; + DestroyAndReopen(options); + + for (int i = 0; i < 50; ++i) { + // Intentionally delete overlapping ranges to see if the option + // checks number of range tombstone fragments instead. + ASSERT_OK(Put(Key(i), "val1")); + ASSERT_OK(Put(Key(i + 1), "val2")); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(i), Key(i + 2))); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_EQ(0, NumTableFilesAtLevel(0)); + } + // One more write to trigger flush. + ASSERT_OK(Put(Key(50), "val")); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + + // This should take effect for the next new memtable. + ASSERT_OK(db_->SetOptions({{"memtable_max_range_deletions", "1"}})); + ASSERT_OK(Flush()); + ASSERT_EQ(2, NumTableFilesAtLevel(0)); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + Key(50), Key(100))); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_EQ(2, NumTableFilesAtLevel(0)); + // One more write to trigger flush. + ASSERT_OK(Put(Key(50), "new val")); + ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_EQ(3, NumTableFilesAtLevel(0)); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/flush_job.cc b/db/flush_job.cc index b989cc8e3..d3a777b44 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -861,6 +861,7 @@ Status FlushJob::WriteLevel0Table() { uint64_t total_num_entries = 0, total_num_deletes = 0; uint64_t total_data_size = 0; size_t total_memory_usage = 0; + uint64_t total_num_range_deletes = 0; // Used for testing: uint64_t mems_size = mems_.size(); (void)mems_size; // avoids unused variable error when @@ -883,15 +884,20 @@ Status FlushJob::WriteLevel0Table() { total_num_deletes += m->num_deletes(); total_data_size += m->get_data_size(); total_memory_usage += m->ApproximateMemoryUsage(); + total_num_range_deletes += m->num_range_deletes(); } + // TODO(cbi): when memtable is flushed due to number of range deletions + // hitting limit memtable_max_range_deletions, flush_reason_ is still + // "Write Buffer Full", should make update flush_reason_ accordingly. event_logger_->Log() << "job" << job_context_->job_id << "event" << "flush_started" << "num_memtables" << mems_.size() << "num_entries" << total_num_entries << "num_deletes" << total_num_deletes << "total_data_size" << total_data_size << "memory_usage" - << total_memory_usage << "flush_reason" + << total_memory_usage << "num_range_deletes" + << total_num_range_deletes << "flush_reason" << GetFlushReasonString(flush_reason_); { diff --git a/db/memtable.cc b/db/memtable.cc index 216bb8d6e..8a71a6494 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -95,6 +95,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp, data_size_(0), num_entries_(0), num_deletes_(0), + num_range_deletes_(0), write_buffer_size_(mutable_cf_options.write_buffer_size), flush_in_progress_(false), flush_completed_(false), @@ -114,7 +115,9 @@ MemTable::MemTable(const InternalKeyComparator& cmp, ioptions.memtable_insert_with_hint_prefix_extractor.get()), oldest_key_time_(std::numeric_limits::max()), atomic_flush_seqno_(kMaxSequenceNumber), - approximate_memory_usage_(0) { + approximate_memory_usage_(0), + memtable_max_range_deletions_( + mutable_cf_options.memtable_max_range_deletions) { UpdateFlushState(); // something went wrong if we need to flush before inserting anything assert(!ShouldScheduleFlush()); @@ -174,6 +177,14 @@ size_t MemTable::ApproximateMemoryUsage() { } bool MemTable::ShouldFlushNow() { + // This is set if memtable_max_range_deletions is > 0, + // and that many range deletions are done + if (memtable_max_range_deletions_ > 0 && + num_range_deletes_.load(std::memory_order_relaxed) >= + static_cast(memtable_max_range_deletions_)) { + return true; + } + size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed); // In a lot of times, we cannot allocate arena blocks that exactly matches the // buffer size. Thus we have to decide if we should over-allocate or @@ -756,6 +767,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type, type == kTypeDeletionWithTimestamp) { num_deletes_.store(num_deletes_.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + } else if (type == kTypeRangeDeletion) { + uint64_t val = num_range_deletes_.load(std::memory_order_relaxed) + 1; + num_range_deletes_.store(val, std::memory_order_relaxed); } if (bloom_filter_ && prefix_extractor_ && @@ -822,6 +836,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type, auto new_cache = std::make_shared(); size_t size = cached_range_tombstone_.Size(); if (allow_concurrent) { + post_process_info->num_range_deletes++; range_del_mutex_.lock(); } for (size_t i = 0; i < size; ++i) { @@ -840,6 +855,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type, new_local_cache_ref, new_cache.get()), std::memory_order_relaxed); } + if (allow_concurrent) { range_del_mutex_.unlock(); } @@ -1268,6 +1284,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, // Avoiding recording stats for speed. return false; } + PERF_TIMER_GUARD(get_from_memtable_time); std::unique_ptr range_del_iter( diff --git a/db/memtable.h b/db/memtable.h index bfe882b0e..c55b34761 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -68,6 +68,7 @@ struct MemTablePostProcessInfo { uint64_t data_size = 0; uint64_t num_entries = 0; uint64_t num_deletes = 0; + uint64_t num_range_deletes = 0; }; using MultiGetRange = MultiGetContext::Range; @@ -332,6 +333,10 @@ class MemTable { num_deletes_.fetch_add(update_counters.num_deletes, std::memory_order_relaxed); } + if (update_counters.num_range_deletes > 0) { + num_range_deletes_.fetch_add(update_counters.num_range_deletes, + std::memory_order_relaxed); + } UpdateFlushState(); } @@ -349,6 +354,13 @@ class MemTable { return num_deletes_.load(std::memory_order_relaxed); } + // Get total number of range deletions in the mem table. + // REQUIRES: external synchronization to prevent simultaneous + // operations on the same MemTable (unless this Memtable is immutable). + uint64_t num_range_deletes() const { + return num_range_deletes_.load(std::memory_order_relaxed); + } + uint64_t get_data_size() const { return data_size_.load(std::memory_order_relaxed); } @@ -565,6 +577,7 @@ class MemTable { std::atomic data_size_; std::atomic num_entries_; std::atomic num_deletes_; + std::atomic num_range_deletes_; // Dynamically changeable memtable option std::atomic write_buffer_size_; @@ -626,6 +639,10 @@ class MemTable { // Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow` std::atomic approximate_memory_usage_; + // max range deletions in a memtable, before automatic flushing, 0 for + // unlimited. + uint32_t memtable_max_range_deletions_ = 0; + // Flush job info of the current memtable. std::unique_ptr flush_job_info_; diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index c0b1e6fd2..0a9dd4251 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -327,6 +327,8 @@ DECLARE_bool(allow_data_in_errors); DECLARE_bool(enable_thread_tracking); +DECLARE_uint32(memtable_max_range_deletions); + // Tiered storage DECLARE_bool(enable_tiered_storage); // set last_level_temperature DECLARE_int64(preclude_last_level_data_seconds); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index df4c3be0b..32764189c 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1102,4 +1102,8 @@ DEFINE_uint64(stats_dump_period_sec, DEFINE_bool(use_io_uring, false, "Enable the use of IO uring on Posix"); extern "C" bool RocksDbIOUringEnable() { return FLAGS_use_io_uring; } +DEFINE_uint32(memtable_max_range_deletions, 0, + "If nonzero, RocksDB will try to flush the current memtable" + "after the number of range deletions is >= this limit"); + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index ff004ae0f..01026a319 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -3278,6 +3278,8 @@ void InitializeOptionsFromFlags( options.allow_data_in_errors = FLAGS_allow_data_in_errors; options.enable_thread_tracking = FLAGS_enable_thread_tracking; + + options.memtable_max_range_deletions = FLAGS_memtable_max_range_deletions; } void InitializeOptionsGeneral( diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 53e534164..8b176e6ad 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -331,6 +331,17 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions { // Default: nullptr std::shared_ptr sst_partitioner_factory = nullptr; + // RocksDB will try to flush the current memtable after the number of range + // deletions is >= this limit. For workloads with many range + // deletions, limiting the number of range deletions in memtable can help + // prevent performance degradation and/or OOM caused by too many range + // tombstones in a single memtable. + // + // Default: 0 (disabled) + // + // Dynamically changeable through SetOptions() API + uint32_t memtable_max_range_deletions = 0; + // Create ColumnFamilyOptions with default values for all fields ColumnFamilyOptions(); // Create ColumnFamilyOptions from Options diff --git a/java/rocksjni/options.cc b/java/rocksjni/options.cc index 724d298e7..0d84901c9 100644 --- a/java/rocksjni/options.cc +++ b/java/rocksjni/options.cc @@ -3904,6 +3904,29 @@ jbyte Java_org_rocksdb_Options_prepopulateBlobCache(JNIEnv*, jobject, opts->prepopulate_blob_cache); } +/* + * Class: org_rocksdb_Options + * Method: setMemtableMaxRangeDeletions + * Signature: (JI)V + */ +void Java_org_rocksdb_Options_setMemtableMaxRangeDeletions( + JNIEnv*, jobject, jlong jhandle, jint jmemtable_max_range_deletions) { + auto* opts = reinterpret_cast(jhandle); + opts->memtable_max_range_deletions = + static_cast(jmemtable_max_range_deletions); +} + +/* + * Class: org_rocksdb_Options + * Method: memtableMaxRangeDeletions + * Signature: (J)I + */ +jint Java_org_rocksdb_Options_memtableMaxRangeDeletions(JNIEnv*, jobject, + jlong jhandle) { + auto* opts = reinterpret_cast(jhandle); + return static_cast(opts->memtable_max_range_deletions); +} + ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::ColumnFamilyOptions @@ -5770,6 +5793,30 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_prepopulateBlobCache(JNIEnv*, opts->prepopulate_blob_cache); } +/* + * Class: org_rocksdb_ColumnFamilyOptions + * Method: setMemtableMaxRangeDeletions + * Signature: (JI)V + */ +void Java_org_rocksdb_ColumnFamilyOptions_setMemtableMaxRangeDeletions( + JNIEnv*, jobject, jlong jhandle, jint jmemtable_max_range_deletions) { + auto* opts = + reinterpret_cast(jhandle); + opts->memtable_max_range_deletions = jmemtable_max_range_deletions; +} + +/* + * Class: org_rocksdb_ColumnFamilyOptions + * Method: memtableMaxRangeDeletions + * Signature: (J)I + */ +jint Java_org_rocksdb_ColumnFamilyOptions_memtableMaxRangeDeletions( + JNIEnv*, jobject, jlong jhandle) { + auto* opts = + reinterpret_cast(jhandle); + return static_cast(opts->memtable_max_range_deletions); +} + ///////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DBOptions diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index d8d9658fc..8274ebeea 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -959,6 +959,17 @@ public SstPartitionerFactory sstPartitionerFactory() { return sstPartitionerFactory_; } + @Override + public ColumnFamilyOptions setMemtableMaxRangeDeletions(final int count) { + setMemtableMaxRangeDeletions(nativeHandle_, count); + return this; + } + + @Override + public int memtableMaxRangeDeletions() { + return memtableMaxRangeDeletions(nativeHandle_); + } + // // BEGIN options for blobs (integrated BlobDB) // @@ -1498,6 +1509,8 @@ private native void setForceConsistencyChecks(final long handle, private native void setSstPartitionerFactory(long nativeHandle_, long newFactoryHandle); private static native void setCompactionThreadLimiter( final long nativeHandle_, final long compactionThreadLimiterHandle); + private native void setMemtableMaxRangeDeletions(final long handle, final int count); + private native int memtableMaxRangeDeletions(final long handle); private native void setEnableBlobFiles(final long nativeHandle_, final boolean enableBlobFiles); private native boolean enableBlobFiles(final long nativeHandle_); diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java index 776fc7038..4776773bd 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java @@ -506,6 +506,23 @@ T setCompressionOptions( @Experimental("Caution: this option is experimental") SstPartitionerFactory sstPartitionerFactory(); + /** + * Sets the maximum range delete calls, after which memtable is flushed. + * This applies to the mutable memtable. + * + * @param count a positive integer, 0 (default) to disable the feature. + * @return the reference of the current options. + */ + T setMemtableMaxRangeDeletions(final int count); + + /** + * Gets the current setting of maximum range deletes allowed + * 0(default) indicates that feature is disabled. + * + * @return current value of memtable_max_range_deletions + */ + int memtableMaxRangeDeletions(); + /** * Compaction concurrent thread limiter for the column family. * If non-nullptr, use given concurrent thread limiter to control diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index 08a07661c..bfb0e9e48 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -1984,6 +1984,17 @@ public SstPartitionerFactory sstPartitionerFactory() { return sstPartitionerFactory_; } + @Override + public Options setMemtableMaxRangeDeletions(final int count) { + setMemtableMaxRangeDeletions(nativeHandle_, count); + return this; + } + + @Override + public int memtableMaxRangeDeletions() { + return memtableMaxRangeDeletions(nativeHandle_); + } + @Override public Options setCompactionThreadLimiter(final ConcurrentTaskLimiter compactionThreadLimiter) { setCompactionThreadLimiter(nativeHandle_, compactionThreadLimiter.nativeHandle_); @@ -2502,6 +2513,8 @@ private native void setAtomicFlush(final long handle, final boolean atomicFlush); private native boolean atomicFlush(final long handle); private native void setSstPartitionerFactory(long nativeHandle_, long newFactoryHandle); + private native void setMemtableMaxRangeDeletions(final long handle, final int count); + private native int memtableMaxRangeDeletions(final long handle); private static native void setCompactionThreadLimiter( final long nativeHandle_, final long newLimiterHandle); private static native void setAvoidUnnecessaryBlockingIO( diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java b/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java index a5fe8cef7..35a04a697 100644 --- a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java +++ b/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java @@ -709,4 +709,14 @@ public void cfPaths() throws IOException { assertThat(options.cfPaths()).isEqualTo(paths); } } + + @Test + public void memtableMaxRangeDeletions() { + try (final ColumnFamilyOptions options = new ColumnFamilyOptions()) { + assertThat(options.memtableMaxRangeDeletions()).isEqualTo(0); + final int val = 32; + assertThat(options.setMemtableMaxRangeDeletions(val)).isEqualTo(options); + assertThat(options.memtableMaxRangeDeletions()).isEqualTo(val); + } + } } diff --git a/java/src/test/java/org/rocksdb/OptionsTest.java b/java/src/test/java/org/rocksdb/OptionsTest.java index e1a7f8c27..bef25fc81 100644 --- a/java/src/test/java/org/rocksdb/OptionsTest.java +++ b/java/src/test/java/org/rocksdb/OptionsTest.java @@ -1452,6 +1452,16 @@ public void skipCheckingSstFileSizesOnDbOpen() { } } + @Test + public void memtableMaxRangeDeletions() { + try (final Options options = new Options()) { + assertThat(options.memtableMaxRangeDeletions()).isEqualTo(0); + final int val = 32; + assertThat(options.setMemtableMaxRangeDeletions(val)).isEqualTo(options); + assertThat(options.memtableMaxRangeDeletions()).isEqualTo(val); + } + } + @Test public void eventListeners() { final AtomicBoolean wasCalled1 = new AtomicBoolean(); diff --git a/options/cf_options.cc b/options/cf_options.cc index ad1e669df..a425c18bd 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -552,6 +552,11 @@ static std::unordered_map } })}, // End special case properties + {"memtable_max_range_deletions", + {offsetof(struct MutableCFOptions, memtable_max_range_deletions), + OptionType::kUInt32T, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, + }; static std::unordered_map diff --git a/options/cf_options.h b/options/cf_options.h index 37ef54c0c..86de78d9d 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -175,7 +175,8 @@ struct MutableCFOptions { block_protection_bytes_per_key(options.block_protection_bytes_per_key), sample_for_compression( options.sample_for_compression), // TODO: is 0 fine here? - compression_per_level(options.compression_per_level) { + compression_per_level(options.compression_per_level), + memtable_max_range_deletions(options.memtable_max_range_deletions) { RefreshDerivedOptions(options.num_levels, options.compaction_style); } @@ -224,7 +225,8 @@ struct MutableCFOptions { last_level_temperature(Temperature::kUnknown), memtable_protection_bytes_per_key(0), block_protection_bytes_per_key(0), - sample_for_compression(0) {} + sample_for_compression(0), + memtable_max_range_deletions(0) {} explicit MutableCFOptions(const Options& options); @@ -318,6 +320,7 @@ struct MutableCFOptions { uint64_t sample_for_compression; std::vector compression_per_level; + uint32_t memtable_max_range_deletions; // Derived options // Per-level target file size. diff --git a/options/options.cc b/options/options.cc index 1caebdfb2..4e3ac4115 100644 --- a/options/options.cc +++ b/options/options.cc @@ -448,8 +448,10 @@ void ColumnFamilyOptions::Dump(Logger* log) const { ? "flush only" : "disabled"); } - ROCKS_LOG_HEADER(log, "Options.experimental_mempurge_threshold: %f", + ROCKS_LOG_HEADER(log, " Options.experimental_mempurge_threshold: %f", experimental_mempurge_threshold); + ROCKS_LOG_HEADER(log, " Options.memtable_max_range_deletions: %d", + memtable_max_range_deletions); } // ColumnFamilyOptions::Dump void Options::Dump(Logger* log) const { diff --git a/options/options_helper.cc b/options/options_helper.cc index d221f9705..83b4e970d 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -272,6 +272,7 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions, cf_opts->compression_per_level = moptions.compression_per_level; cf_opts->last_level_temperature = moptions.last_level_temperature; cf_opts->bottommost_temperature = moptions.last_level_temperature; + cf_opts->memtable_max_range_deletions = moptions.memtable_max_range_deletions; } void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions, diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 8b69e6079..c2bf8640b 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -557,7 +557,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "blob_cache=1M;" "memtable_protection_bytes_per_key=2;" "persist_user_defined_timestamps=true;" - "block_protection_bytes_per_key=1;", + "block_protection_bytes_per_key=1;" + "memtable_max_range_deletions=999999;", new_options)); ASSERT_NE(new_options->blob_cache.get(), nullptr); diff --git a/options/options_test.cc b/options/options_test.cc index ef0b50843..067b00b77 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -131,6 +131,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"prepopulate_blob_cache", "kDisable"}, {"last_level_temperature", "kWarm"}, {"persist_user_defined_timestamps", "true"}, + {"memtable_max_range_deletions", "0"}, }; std::unordered_map db_options_map = { @@ -284,6 +285,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm); ASSERT_EQ(new_cf_opt.persist_user_defined_timestamps, true); + ASSERT_EQ(new_cf_opt.memtable_max_range_deletions, 0); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap(exact, base_cf_opt, cf_options_map, @@ -2338,6 +2340,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"prepopulate_blob_cache", "kDisable"}, {"last_level_temperature", "kWarm"}, {"persist_user_defined_timestamps", "true"}, + {"memtable_max_range_deletions", "0"}, }; std::unordered_map db_options_map = { @@ -2489,6 +2492,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm); ASSERT_EQ(new_cf_opt.persist_user_defined_timestamps, true); + ASSERT_EQ(new_cf_opt.memtable_max_range_deletions, 0); cf_options_map["write_buffer_size"] = "hello"; ASSERT_NOK(GetColumnFamilyOptionsFromMap(cf_config_options, base_cf_opt, diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 01b71136a..fd753b90b 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -208,6 +208,7 @@ "num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]), "min_write_buffer_number_to_merge": lambda: random.choice([1, 2]), "preserve_internal_time_seconds": lambda: random.choice([0, 60, 3600, 36000]), + "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]), } _TEST_DIR_ENV_VAR = "TEST_TMPDIR" diff --git a/unreleased_history/new_features/memetable_range_del_limit.md b/unreleased_history/new_features/memetable_range_del_limit.md new file mode 100644 index 000000000..72c4d53f4 --- /dev/null +++ b/unreleased_history/new_features/memetable_range_del_limit.md @@ -0,0 +1 @@ +Add a column family option `memtable_max_range_deletions` that limits the number of range deletions in a memtable. RocksDB will try to do an automatic flush after the limit is reached. (#11358) \ No newline at end of file From 09882a52d6d42d22e35475360eb5ae4356d3f3f6 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 3 Aug 2023 17:23:02 -0700 Subject: [PATCH 019/386] Prepare for deprecation of Options::access_hint_on_compaction_start (#11658) Summary: **Context/Summary:** After https://github.com/facebook/rocksdb/pull/11631, file hint is not longer needed for compaction read. Therefore we can deprecate `Options::access_hint_on_compaction_start`. As this is a public API change, we should first mark the relevant APIs (including the Java's) deprecated and remove it in next major release 9.0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11658 Test Plan: No code change Reviewed By: ajkr Differential Revision: D47997856 Pulled By: hx235 fbshipit-source-id: 16e015ae7728c224b1caef73143aa9915668f4ac --- db/db_impl/db_impl_open.cc | 6 ++++-- db/db_options_test.cc | 3 ++- file/prefetch_test.cc | 13 +++---------- include/rocksdb/options.h | 3 +++ java/src/main/java/org/rocksdb/AccessHint.java | 1 + java/src/main/java/org/rocksdb/DBOptions.java | 2 ++ .../java/org/rocksdb/DBOptionsInterface.java | 4 ++-- java/src/main/java/org/rocksdb/Options.java | 2 ++ .../test/java/org/rocksdb/DBOptionsTest.java | 1 + .../src/test/java/org/rocksdb/OptionsTest.java | 1 + table/block_based/block_based_table_reader.cc | 18 +----------------- ...mark_dep_access_hint_on_compaction_start.md | 1 + 12 files changed, 23 insertions(+), 32 deletions(-) create mode 100644 unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 3d41520c8..e6d97b125 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -143,8 +143,10 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src, result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1); } - if (result.use_direct_reads && result.compaction_readahead_size == 0) { - TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); + if (result.compaction_readahead_size == 0) { + if (result.use_direct_reads) { + TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); + } result.compaction_readahead_size = 1024 * 1024 * 2; } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index d64d0eae5..df6e10850 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1043,7 +1043,8 @@ TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { const std::string kValue(1024, 'v'); Reopen(options); - ASSERT_EQ(0, dbfull()->GetDBOptions().compaction_readahead_size); + ASSERT_EQ(1024 * 1024 * 2, + dbfull()->GetDBOptions().compaction_readahead_size); ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); for (int i = 0; i < 1024; i++) { diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 6e807f581..15b1c6beb 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -238,16 +238,9 @@ TEST_P(PrefetchTest, Basic) { fs->ClearPrefetchCount(); } else { ASSERT_FALSE(fs->IsPrefetchCalled()); - if (use_direct_io) { - // To rule out false positive by the SST file tail prefetch during - // compaction output verification - ASSERT_GT(buff_prefetch_count, 1); - } else { - // In buffered IO, compaction readahead size is 0, leading to no prefetch - // during compaction input read - ASSERT_EQ(buff_prefetch_count, 1); - } - + // To rule out false positive by the SST file tail prefetch during + // compaction output verification + ASSERT_GT(buff_prefetch_count, 1); buff_prefetch_count = 0; ASSERT_GT(cur_table_open_prefetch_tail_read.count, diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 8b176e6ad..7fac24335 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -942,6 +942,9 @@ struct DBOptions { // Default: null std::shared_ptr write_buffer_manager = nullptr; + // DEPRECATED + // This flag has no effect on the behavior of compaction and we plan to delete + // it in the future. // Specify the file access pattern once a compaction is started. // It will be applied to all input files of a compaction. // Default: NORMAL diff --git a/java/src/main/java/org/rocksdb/AccessHint.java b/java/src/main/java/org/rocksdb/AccessHint.java index 877c4ab39..b7ccadd84 100644 --- a/java/src/main/java/org/rocksdb/AccessHint.java +++ b/java/src/main/java/org/rocksdb/AccessHint.java @@ -8,6 +8,7 @@ /** * File access pattern once a compaction has started */ +@Deprecated public enum AccessHint { NONE((byte)0x0), NORMAL((byte)0x1), diff --git a/java/src/main/java/org/rocksdb/DBOptions.java b/java/src/main/java/org/rocksdb/DBOptions.java index 655d900c3..62ad137ee 100644 --- a/java/src/main/java/org/rocksdb/DBOptions.java +++ b/java/src/main/java/org/rocksdb/DBOptions.java @@ -752,6 +752,7 @@ public long dbWriteBufferSize() { } @Override + @Deprecated public DBOptions setAccessHintOnCompactionStart(final AccessHint accessHint) { assert(isOwningHandle()); setAccessHintOnCompactionStart(nativeHandle_, accessHint.getValue()); @@ -759,6 +760,7 @@ public DBOptions setAccessHintOnCompactionStart(final AccessHint accessHint) { } @Override + @Deprecated public AccessHint accessHintOnCompactionStart() { assert(isOwningHandle()); return AccessHint.getAccessHint(accessHintOnCompactionStart(nativeHandle_)); diff --git a/java/src/main/java/org/rocksdb/DBOptionsInterface.java b/java/src/main/java/org/rocksdb/DBOptionsInterface.java index ef1b86bff..326da98d2 100644 --- a/java/src/main/java/org/rocksdb/DBOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/DBOptionsInterface.java @@ -935,7 +935,7 @@ public interface DBOptionsInterface> { * * @return the reference to the current options. */ - T setAccessHintOnCompactionStart(final AccessHint accessHint); + @Deprecated T setAccessHintOnCompactionStart(final AccessHint accessHint); /** * Specify the file access pattern once a compaction is started. @@ -945,7 +945,7 @@ public interface DBOptionsInterface> { * * @return The access hint */ - AccessHint accessHintOnCompactionStart(); + @Deprecated AccessHint accessHintOnCompactionStart(); /** * This is a maximum buffer size that is used by WinMmapReadableFile in diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index bfb0e9e48..d00b489ab 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -840,6 +840,7 @@ public long dbWriteBufferSize() { } @Override + @Deprecated public Options setAccessHintOnCompactionStart(final AccessHint accessHint) { assert(isOwningHandle()); setAccessHintOnCompactionStart(nativeHandle_, accessHint.getValue()); @@ -847,6 +848,7 @@ public Options setAccessHintOnCompactionStart(final AccessHint accessHint) { } @Override + @Deprecated public AccessHint accessHintOnCompactionStart() { assert(isOwningHandle()); return AccessHint.getAccessHint(accessHintOnCompactionStart(nativeHandle_)); diff --git a/java/src/test/java/org/rocksdb/DBOptionsTest.java b/java/src/test/java/org/rocksdb/DBOptionsTest.java index 882015f3e..d79f78db7 100644 --- a/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ b/java/src/test/java/org/rocksdb/DBOptionsTest.java @@ -453,6 +453,7 @@ public void setWriteBufferManagerWithZeroBufferSize() throws RocksDBException { } } + @SuppressWarnings("deprecated") @Test public void accessHintOnCompactionStart() { try(final DBOptions opt = new DBOptions()) { diff --git a/java/src/test/java/org/rocksdb/OptionsTest.java b/java/src/test/java/org/rocksdb/OptionsTest.java index bef25fc81..4b59464b1 100644 --- a/java/src/test/java/org/rocksdb/OptionsTest.java +++ b/java/src/test/java/org/rocksdb/OptionsTest.java @@ -699,6 +699,7 @@ public void setWriteBufferManagerWithAllowStall() throws RocksDBException { } } + @SuppressWarnings("deprecated") @Test public void accessHintOnCompactionStart() { try (final Options opt = new Options()) { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 1fc477842..129f87888 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1215,23 +1215,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( return s; } -void BlockBasedTable::SetupForCompaction() { - switch (rep_->ioptions.access_hint_on_compaction_start) { - case Options::NONE: - break; - case Options::NORMAL: - rep_->file->file()->Hint(FSRandomAccessFile::kNormal); - break; - case Options::SEQUENTIAL: - rep_->file->file()->Hint(FSRandomAccessFile::kSequential); - break; - case Options::WILLNEED: - rep_->file->file()->Hint(FSRandomAccessFile::kWillNeed); - break; - default: - assert(false); - } -} +void BlockBasedTable::SetupForCompaction() {} std::shared_ptr BlockBasedTable::GetTableProperties() const { diff --git a/unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md b/unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md new file mode 100644 index 000000000..066caaf3c --- /dev/null +++ b/unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md @@ -0,0 +1 @@ +Mark `Options::access_hint_on_compaction_start` related APIs as deprecated. See #11631 for alternative behavior. \ No newline at end of file From eca48bc16644d194c45964708463fdd18a9b8945 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 4 Aug 2023 14:29:50 -0700 Subject: [PATCH 020/386] Avoid shifting component too large error in FileTtlBooster (#11673) Summary: When `num_levels` > 65, we may be shifting more than 63 bits in FileTtlBooster. This can give errors like: `runtime error: shift exponent 98 is too large for 64-bit type 'uint64_t' (aka 'unsigned long')`. This PR makes a quick fix for this issue by taking a min in the shifting component. This issue should be rare since it requires a user using a large `num_levels`. I'll follow up with a more complex fix if needed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11673 Test Plan: * Add a unit test that produce the above error before this PR. Need to compile it with ubsan: `COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 compaction_picker_test` Reviewed By: hx235 Differential Revision: D48074386 Pulled By: cbi42 fbshipit-source-id: 25e59df7e93f20e0793cffb941de70ac815d9392 --- db/compaction/compaction_picker_test.cc | 9 +++++++++ db/compaction/file_pri.h | 4 +++- .../shifting_componeng_too_large_file_ttl_booster.md | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index fd14322b2..6aec03840 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -1968,6 +1968,15 @@ TEST_F(CompactionPickerTest, OverlappingUserKeys11) { ASSERT_EQ(7U, compaction->input(1, 0)->fd.GetNumber()); } +TEST_F(CompactionPickerTest, FileTtlBoosterLargeNumLevels) { + const uint64_t kCurrentTime = 1000000; + FileTtlBooster booster(kCurrentTime, /*ttl=*/2048, + /*num_non_empty_levels=*/100, /*level=*/1); + FileMetaData meta; + meta.oldest_ancester_time = kCurrentTime - 1023; + ASSERT_EQ(1, booster.GetBoostScore(&meta)); +} + TEST_F(CompactionPickerTest, FileTtlBooster) { // Set TTL to 2048 // TTL boosting for all levels starts at 1024, diff --git a/db/compaction/file_pri.h b/db/compaction/file_pri.h index 82dddcf93..e60d73e88 100644 --- a/db/compaction/file_pri.h +++ b/db/compaction/file_pri.h @@ -53,8 +53,10 @@ class FileTtlBooster { enabled_ = true; uint64_t all_boost_start_age = ttl / 2; uint64_t all_boost_age_range = (ttl / 32) * 31 - all_boost_start_age; + // TODO(cbi): more reasonable algorithm that gives different values + // when num_non_empty_levels - level - 1 > 63. uint64_t boost_age_range = - all_boost_age_range >> (num_non_empty_levels - level - 1); + all_boost_age_range >> std::min(63, num_non_empty_levels - level - 1); boost_age_start_ = all_boost_start_age + boost_age_range; const uint64_t kBoostRatio = 16; // prevent 0 value to avoid divide 0 error. diff --git a/unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md b/unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md new file mode 100644 index 000000000..f76830232 --- /dev/null +++ b/unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md @@ -0,0 +1 @@ +Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." (#11673). \ No newline at end of file From 4500a0d6ec836b68cb0b6c464b3239d67f7ff0b3 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Sun, 6 Aug 2023 18:01:08 -0700 Subject: [PATCH 021/386] Avoid an std::map copy in persistent stats (#11681) Summary: An internal user reported this copy showing up in a CPU profile. We can use move instead. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11681 Differential Revision: D48103170 Pulled By: ajkr fbshipit-source-id: 083d6470181a0041bb5275b657aa61bee23a3729 --- db/db_impl/db_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 5dee77853..40e82cdba 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -970,7 +970,7 @@ void DBImpl::PersistStats() { "Storing %" ROCKSDB_PRIszt " stats with timestamp %" PRIu64 " to in-memory stats history", stats_slice_.size(), now_seconds); - stats_history_[now_seconds] = stats_delta; + stats_history_[now_seconds] = std::move(stats_delta); } stats_slice_initialized_ = true; std::swap(stats_slice_, stats_map); From cdb11f5ce6cb1e334e98bb672c5b121581a3af39 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 7 Aug 2023 12:20:23 -0700 Subject: [PATCH 022/386] More minor HCC refactoring + typed mmap (#11670) Summary: More code leading up to dynamic HCC. * Small enhancements to cache_bench * Extra assertion in Unref * Improve a CAS loop in ChargeUsageMaybeEvictStrict * Put load factor constants in appropriate class * Move `standalone` field to HyperClockTable::HandleImpl because it can be encoded differently in the upcoming dynamic HCC. * Add a typed version of MemMapping to simplify some future code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11670 Test Plan: existing tests, unit test added for TypedMemMapping Reviewed By: jowlyzhang Differential Revision: D48056464 Pulled By: pdillinger fbshipit-source-id: 186b7d3105c5d6d2eb6a592369bc10a97ee14a15 --- cache/cache_bench_tool.cc | 5 +++ cache/clock_cache.cc | 35 +++++++++++--------- cache/clock_cache.h | 68 +++++++++++++++++++-------------------- cache/lru_cache_test.cc | 10 +++--- memory/arena_test.cc | 29 ++++++++++------- port/mmap.h | 20 ++++++++++++ 6 files changed, 102 insertions(+), 65 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index fd1b44d16..6a0ef2a73 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -436,6 +436,10 @@ class CacheBench { printf("Lookup hit ratio: %g\n", shared.GetLookupHitRatio()); + size_t occ = cache_->GetOccupancyCount(); + size_t slot = cache_->GetTableAddressCount(); + printf("Final load factor: %g (%zu / %zu)\n", 1.0 * occ / slot, occ, slot); + if (FLAGS_histograms) { printf("\nOperation latency (ns):\n"); HistogramImpl combined; @@ -676,6 +680,7 @@ class CacheBench { #endif printf("----------------------------\n"); printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion); + printf("Cache impl name : %s\n", cache_->Name()); printf("DMutex impl name : %s\n", DMutex::kName()); printf("Number of threads : %u\n", FLAGS_threads); printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread); diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index 16955004c..ac9c5f837 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -79,8 +79,10 @@ inline void Unref(const ClockHandle& h, uint64_t count = 1) { // Pretend we never took the reference // WART: there's a tiny chance we release last ref to invisible // entry here. If that happens, we let eviction take care of it. - h.meta.fetch_sub(ClockHandle::kAcquireIncrement * count, - std::memory_order_release); + uint64_t old_meta = h.meta.fetch_sub(ClockHandle::kAcquireIncrement * count, + std::memory_order_release); + assert(GetRefcount(old_meta) != 0); + (void)old_meta; } inline bool ClockUpdate(ClockHandle& h) { @@ -406,14 +408,14 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( // Grab any available capacity, and free up any more required. size_t old_usage = usage_.load(std::memory_order_relaxed); size_t new_usage; - if (LIKELY(old_usage != capacity)) { - do { - new_usage = std::min(capacity, old_usage + total_charge); - } while (!usage_.compare_exchange_weak(old_usage, new_usage, - std::memory_order_relaxed)); - } else { - new_usage = old_usage; - } + do { + new_usage = std::min(capacity, old_usage + total_charge); + if (new_usage == old_usage) { + // No change needed + break; + } + } while (!usage_.compare_exchange_weak(old_usage, new_usage, + std::memory_order_relaxed)); // How much do we need to evict then? size_t need_evict_charge = old_usage + total_charge - new_usage; size_t request_evict_charge = need_evict_charge; @@ -1418,7 +1420,7 @@ void AddShardEvaluation(const HyperClockCache::Shard& shard, // If filled to capacity, what would the occupancy ratio be? double ratio = occ_ratio / usage_ratio; // Given max load factor, what that load factor be? - double lf = ratio * kStrictLoadFactor; + double lf = ratio * HyperClockTable::kStrictLoadFactor; predicted_load_factors.push_back(lf); // Update min_recommendation also @@ -1457,17 +1459,18 @@ void HyperClockCache::ReportProblems( predicted_load_factors.end(), 0.0) / shard_count; - constexpr double kLowSpecLoadFactor = kLoadFactor / 2; - constexpr double kMidSpecLoadFactor = kLoadFactor / 1.414; - if (average_load_factor > kLoadFactor) { + constexpr double kLowSpecLoadFactor = HyperClockTable::kLoadFactor / 2; + constexpr double kMidSpecLoadFactor = HyperClockTable::kLoadFactor / 1.414; + if (average_load_factor > HyperClockTable::kLoadFactor) { // Out of spec => Consider reporting load factor too high // Estimate effective overall capacity loss due to enforcing occupancy limit double lost_portion = 0.0; int over_count = 0; for (double lf : predicted_load_factors) { - if (lf > kStrictLoadFactor) { + if (lf > HyperClockTable::kStrictLoadFactor) { ++over_count; - lost_portion += (lf - kStrictLoadFactor) / lf / shard_count; + lost_portion += + (lf - HyperClockTable::kStrictLoadFactor) / lf / shard_count; } } // >= 20% loss -> error diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 7a1caa023..7df65ed1b 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -282,29 +282,6 @@ class ClockCacheTest; // ----------------------------------------------------------------------- // -// The load factor p is a real number in (0, 1) such that at all -// times at most a fraction p of all slots, without counting tombstones, -// are occupied by elements. This means that the probability that a random -// probe hits an occupied slot is at most p, and thus at most 1/p probes -// are required on average. For example, p = 70% implies that between 1 and 2 -// probes are needed on average (bear in mind that this reasoning doesn't -// consider the effects of clustering over time, which should be negligible -// with double hashing). -// Because the size of the hash table is always rounded up to the next -// power of 2, p is really an upper bound on the actual load factor---the -// actual load factor is anywhere between p/2 and p. This is a bit wasteful, -// but bear in mind that slots only hold metadata, not actual values. -// Since space cost is dominated by the values (the LSM blocks), -// overprovisioning the table with metadata only increases the total cache space -// usage by a tiny fraction. -constexpr double kLoadFactor = 0.7; - -// The user can exceed kLoadFactor if the sizes of the inserted values don't -// match estimated_value_size, or in some rare cases with -// strict_capacity_limit == false. To avoid degenerate performance, we set a -// strict upper bound on the load factor. -constexpr double kStrictLoadFactor = 0.84; - struct ClockHandleBasicData { Cache::ObjectPtr value = nullptr; const Cache::CacheItemHelper* helper = nullptr; @@ -374,17 +351,6 @@ struct ClockHandle : public ClockHandleBasicData { // See above. Mutable for read reference counting. mutable std::atomic meta{}; - - // Whether this is a "deteched" handle that is independently allocated - // with `new` (so must be deleted with `delete`). - // TODO: ideally this would be packed into some other data field, such - // as upper bits of total_charge, but that incurs a measurable performance - // regression. - bool standalone = false; - - inline bool IsStandalone() const { return standalone; } - - inline void SetStandalone() { standalone = true; } }; // struct ClockHandle class BaseClockTable { @@ -476,6 +442,7 @@ class BaseClockTable { // Clock algorithm sweep pointer. std::atomic clock_pointer_{}; + // TODO: is this separation needed if we don't do background evictions? ALIGN_AS(CACHE_LINE_SIZE) // Number of elements in the table. std::atomic occupancy_{}; @@ -508,6 +475,16 @@ class HyperClockTable : public BaseClockTable { // up in this slot or a higher one. std::atomic displacements{}; + // Whether this is a "deteched" handle that is independently allocated + // with `new` (so must be deleted with `delete`). + // TODO: ideally this would be packed into some other data field, such + // as upper bits of total_charge, but that incurs a measurable performance + // regression. + bool standalone = false; + + inline bool IsStandalone() const { return standalone; } + + inline void SetStandalone() { standalone = true; } }; // struct HandleImpl struct Opts { @@ -561,6 +538,29 @@ class HyperClockTable : public BaseClockTable { void TEST_ReleaseN(HandleImpl* handle, size_t n); #endif + // The load factor p is a real number in (0, 1) such that at all + // times at most a fraction p of all slots, without counting tombstones, + // are occupied by elements. This means that the probability that a random + // probe hits an occupied slot is at most p, and thus at most 1/p probes + // are required on average. For example, p = 70% implies that between 1 and 2 + // probes are needed on average (bear in mind that this reasoning doesn't + // consider the effects of clustering over time, which should be negligible + // with double hashing). + // Because the size of the hash table is always rounded up to the next + // power of 2, p is really an upper bound on the actual load factor---the + // actual load factor is anywhere between p/2 and p. This is a bit wasteful, + // but bear in mind that slots only hold metadata, not actual values. + // Since space cost is dominated by the values (the LSM blocks), + // overprovisioning the table with metadata only increases the total cache + // space usage by a tiny fraction. + static constexpr double kLoadFactor = 0.7; + + // The user can exceed kLoadFactor if the sizes of the inserted values don't + // match estimated_value_size, or in some rare cases with + // strict_capacity_limit == false. To avoid degenerate performance, we set a + // strict upper bound on the load factor. + static constexpr double kStrictLoadFactor = 0.84; + private: // functions // Returns x mod 2^{length_bits_}. inline size_t ModTableSize(uint64_t x) { diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index cb7beb7b1..720a1b2c0 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -915,8 +915,10 @@ TEST_F(ClockCacheTest, TableSizesTest) { /*memory_allocator*/ nullptr, kDontChargeCacheMetadata) .MakeSharedCache(); // Table sizes are currently only powers of two - EXPECT_GE(cache->GetTableAddressCount(), est_count / kLoadFactor); - EXPECT_LE(cache->GetTableAddressCount(), est_count / kLoadFactor * 2.0); + EXPECT_GE(cache->GetTableAddressCount(), + est_count / HyperClockTable::kLoadFactor); + EXPECT_LE(cache->GetTableAddressCount(), + est_count / HyperClockTable::kLoadFactor * 2.0); EXPECT_EQ(cache->GetUsage(), 0); // kFullChargeMetaData @@ -933,9 +935,9 @@ TEST_F(ClockCacheTest, TableSizesTest) { double est_count_after_meta = (capacity - cache->GetUsage()) * 1.0 / est_val_size; EXPECT_GE(cache->GetTableAddressCount(), - est_count_after_meta / kLoadFactor); + est_count_after_meta / HyperClockTable::kLoadFactor); EXPECT_LE(cache->GetTableAddressCount(), - est_count_after_meta / kLoadFactor * 2.0); + est_count_after_meta / HyperClockTable::kLoadFactor * 2.0); } } } diff --git a/memory/arena_test.cc b/memory/arena_test.cc index 21bf7ed62..592bbd723 100644 --- a/memory/arena_test.cc +++ b/memory/arena_test.cc @@ -219,21 +219,28 @@ size_t PopMinorPageFaultCount() { TEST(MmapTest, AllocateLazyZeroed) { // Doesn't have to be page aligned - constexpr size_t len = 1234567; - MemMapping m = MemMapping::AllocateLazyZeroed(len); - auto arr = static_cast(m.Get()); + constexpr size_t len = 1234567; // in bytes + constexpr size_t count = len / 8; // in uint64_t objects + // Implicit conversion move + TypedMemMapping pre_arr = MemMapping::AllocateLazyZeroed(len); + // Move from same type + TypedMemMapping arr = std::move(pre_arr); - // Should generally work - ASSERT_NE(arr, nullptr); + ASSERT_NE(arr.Get(), nullptr); + ASSERT_EQ(arr.Get(), &arr[0]); + ASSERT_EQ(arr.Get(), arr.MemMapping::Get()); + + ASSERT_EQ(arr.Length(), len); + ASSERT_EQ(arr.Count(), count); // Start counting page faults PopMinorPageFaultCount(); // Access half of the allocation size_t i = 0; - for (; i < len / 2; ++i) { + for (; i < count / 2; ++i) { ASSERT_EQ(arr[i], 0); - arr[i] = static_cast(i & 255); + arr[i] = i; } // Appropriate page faults (maybe more) @@ -241,9 +248,9 @@ TEST(MmapTest, AllocateLazyZeroed) { ASSERT_GE(faults, len / 2 / port::kPageSize); // Access rest of the allocation - for (; i < len; ++i) { + for (; i < count; ++i) { ASSERT_EQ(arr[i], 0); - arr[i] = static_cast(i & 255); + arr[i] = i; } // Appropriate page faults (maybe more) @@ -251,8 +258,8 @@ TEST(MmapTest, AllocateLazyZeroed) { ASSERT_GE(faults, len / 2 / port::kPageSize); // Verify data - for (i = 0; i < len; ++i) { - ASSERT_EQ(arr[i], static_cast(i & 255)); + for (i = 0; i < count; ++i) { + ASSERT_EQ(arr[i], i); } } diff --git a/port/mmap.h b/port/mmap.h index 7342a13f9..0f385522f 100644 --- a/port/mmap.h +++ b/port/mmap.h @@ -14,6 +14,7 @@ #endif // OS_WIN #include +#include #include "rocksdb/rocksdb_namespace.h" @@ -67,4 +68,23 @@ class MemMapping { static MemMapping AllocateAnonymous(size_t length, bool huge); }; +// Simple MemMapping wrapper that presents the memory as an array of T. +// For example, +// TypedMemMapping arr = MemMapping::AllocateLazyZeroed(num_bytes); +template +class TypedMemMapping : public MemMapping { + public: + /*implicit*/ TypedMemMapping(MemMapping&& v) noexcept + : MemMapping(std::move(v)) {} + TypedMemMapping& operator=(MemMapping&& v) noexcept { + MemMapping& base = *this; + base = std::move(v); + } + + inline T* Get() const { return static_cast(MemMapping::Get()); } + inline size_t Count() const { return MemMapping::Length() / sizeof(T); } + + inline T& operator[](size_t index) const { return Get()[index]; } +}; + } // namespace ROCKSDB_NAMESPACE From d2b0652b32b8671c9ec4057e6da2fa564d1cc610 Mon Sep 17 00:00:00 2001 From: Xinye Tao Date: Mon, 7 Aug 2023 12:29:31 -0700 Subject: [PATCH 023/386] compute compaction score once for a batch of range file deletes (#10744) Summary: Only re-calculate compaction score once for a batch of deletions. Fix performance regression brought by https://github.com/facebook/rocksdb/pull/8434. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10744 Test Plan: In one of our production cluster that recently upgraded to RocksDB 6.29, it takes more than 10 minutes to delete files in 30,000 ranges. The RocksDB instance contains approximately 80,000 files. After this patch, the duration reduces to 100+ ms, which is on par with RocksDB 6.4. Cherry-picking downstream PR: https://github.com/tikv/rocksdb/pull/316 Signed-off-by: tabokie Reviewed By: cbi42 Differential Revision: D48002581 Pulled By: ajkr fbshipit-source-id: 7245607ee3ad79c53b648a6396c9159f166b9437 --- db/db_impl/db_impl.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 40e82cdba..906b42bb6 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4447,10 +4447,12 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family, deleted_files.insert(level_file); level_file->being_compacted = true; } - vstorage->ComputeCompactionScore(*cfd->ioptions(), - *cfd->GetLatestMutableCFOptions()); } } + if (!deleted_files.empty()) { + vstorage->ComputeCompactionScore(*cfd->ioptions(), + *cfd->GetLatestMutableCFOptions()); + } if (edit.GetDeletedFiles().empty()) { job_context.Clean(); return status; From 6d1effaf01f4f82b54fcb9b6bfcfdffe31da30e4 Mon Sep 17 00:00:00 2001 From: tabokie Date: Mon, 7 Aug 2023 12:35:42 -0700 Subject: [PATCH 024/386] exclude uninitialized files when estimating compression ratio (#11664) Summary: Exclude files with uninitialized table properties when estimating compression ratio. Cherry-picking downstream PR: https://github.com/tikv/rocksdb/pull/335 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11664 Reviewed By: cbi42 Differential Revision: D48002518 Pulled By: ajkr fbshipit-source-id: 931fac8a06b4ed7b7b605cf79903302f1b8babfd --- db/version_set.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 32dd4b8d9..68646b122 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1948,8 +1948,14 @@ double VersionStorageInfo::GetEstimatedCompressionRatioAtLevel( uint64_t sum_file_size_bytes = 0; uint64_t sum_data_size_bytes = 0; for (auto* file_meta : files_[level]) { - sum_file_size_bytes += file_meta->fd.GetFileSize(); - sum_data_size_bytes += file_meta->raw_key_size + file_meta->raw_value_size; + auto raw_size = file_meta->raw_key_size + file_meta->raw_value_size; + // Check if the table property is properly initialized. It might not be + // because in `UpdateAccumulatedStats` we limit the maximum number of + // properties to read once. + if (raw_size > 0) { + sum_file_size_bytes += file_meta->fd.GetFileSize(); + sum_data_size_bytes += raw_size; + } } if (sum_file_size_bytes == 0) { return -1.0; From 99daea3481575a269f94c78da2bafc0c77e2608c Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 7 Aug 2023 18:17:12 -0700 Subject: [PATCH 025/386] Prepare tests for new HCC naming (#11676) Summary: I'm anticipating using the public name HyperClockCache for both the current version with a fixed-size table and the upcoming version with an automatically growing table. However, for simplicity of testing them as substantially distinct implementations, I want to give them distinct internal names, like FixedHyperClockCache and AutoHyperClockCache. This change anticipates that by renaming to FixedHyperClockCache and assuming for now that all the unit tests run on HCC will run and behave similarly for the automatic HCC. Obviously updates will need to be made, but I'm trying to avoid uninteresting find & replace updates in what will be a large and engineering-heavy PR for AutoHCC Pull Request resolved: https://github.com/facebook/rocksdb/pull/11676 Test Plan: no behavior change intended, except logging will now use the name FixedHyperClockCache Reviewed By: ajkr Differential Revision: D48103165 Pulled By: pdillinger fbshipit-source-id: a33f1901488fea102164c2318e2f2b156aaba736 --- cache/cache_bench_tool.cc | 3 +- cache/cache_test.cc | 31 ++++----- cache/clock_cache.cc | 85 ++++++++++++------------ cache/clock_cache.h | 28 ++++---- cache/compressed_secondary_cache_test.cc | 8 +-- cache/lru_cache_test.cc | 39 +++++------ db_stress_tool/db_stress_test_base.cc | 3 +- test_util/secondary_cache_test_util.h | 14 ++-- tools/db_bench_tool.cc | 3 +- 9 files changed, 109 insertions(+), 105 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index 6a0ef2a73..f3992e43e 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -299,7 +299,8 @@ class CacheBench { if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache") { + } else if (FLAGS_cache_type == "hyper_clock_cache" || + FLAGS_cache_type == "fixed_hyper_clock_cache") { HyperClockCacheOptions opts(FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); diff --git a/cache/cache_test.cc b/cache/cache_test.cc index 4585faacb..f65698873 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -120,8 +120,7 @@ class CacheTest : public testing::Test, // Currently, HyperClockCache requires keys to be 16B long, whereas // LRUCache doesn't, so the encoding depends on the cache type. std::string EncodeKey(int k) { - auto type = GetParam(); - if (type == kHyperClock) { + if (IsHyperClock()) { return EncodeKey16Bytes(k); } else { return EncodeKey32Bits(k); @@ -129,8 +128,7 @@ class CacheTest : public testing::Test, } int DecodeKey(const Slice& k) { - auto type = GetParam(); - if (type == kHyperClock) { + if (IsHyperClock()) { return DecodeKey16Bytes(k); } else { return DecodeKey32Bits(k); @@ -190,7 +188,7 @@ TEST_P(CacheTest, UsageTest) { auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata); ASSERT_EQ(0, cache->GetUsage()); size_t baseline_meta_usage = precise_cache->GetUsage(); - if (type != kHyperClock) { + if (!IsHyperClock()) { ASSERT_EQ(0, baseline_meta_usage); } @@ -209,7 +207,7 @@ TEST_P(CacheTest, UsageTest) { ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size)); usage += kv_size; ASSERT_EQ(usage, cache->GetUsage()); - if (type == kHyperClock) { + if (IsHyperClock()) { ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage()); } else { ASSERT_LT(usage, precise_cache->GetUsage()); @@ -237,7 +235,7 @@ TEST_P(CacheTest, UsageTest) { ASSERT_GT(kCapacity, cache->GetUsage()); ASSERT_GT(kCapacity, precise_cache->GetUsage()); ASSERT_LT(kCapacity * 0.95, cache->GetUsage()); - if (type != kHyperClock) { + if (!IsHyperClock()) { ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage()); } else { // estimated value size of 1 is weird for clock cache, because @@ -263,7 +261,7 @@ TEST_P(CacheTest, PinnedUsageTest) { auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata); size_t baseline_meta_usage = precise_cache->GetUsage(); - if (type != kHyperClock) { + if (!IsHyperClock()) { ASSERT_EQ(0, baseline_meta_usage); } @@ -368,7 +366,7 @@ TEST_P(CacheTest, HitAndMiss) { ASSERT_EQ(-1, Lookup(300)); Insert(100, 102); - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { // ClockCache usually doesn't overwrite on Insert ASSERT_EQ(101, Lookup(100)); } else { @@ -378,7 +376,7 @@ TEST_P(CacheTest, HitAndMiss) { ASSERT_EQ(-1, Lookup(300)); ASSERT_EQ(1U, deleted_values_.size()); - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ASSERT_EQ(102, deleted_values_[0]); } else { ASSERT_EQ(101, deleted_values_[0]); @@ -386,7 +384,7 @@ TEST_P(CacheTest, HitAndMiss) { } TEST_P(CacheTest, InsertSameKey) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS( "ClockCache doesn't guarantee Insert overwrite same key."); return; @@ -415,7 +413,7 @@ TEST_P(CacheTest, Erase) { } TEST_P(CacheTest, EntriesArePinned) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS( "ClockCache doesn't guarantee Insert overwrite same key."); return; @@ -479,7 +477,7 @@ TEST_P(CacheTest, ExternalRefPinsEntries) { Insert(1000 + j, 2000 + j); } // Clock cache is even more stateful and needs more churn to evict - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { for (int j = 0; j < kCacheSize; j++) { Insert(11000 + j, 11000 + j); } @@ -679,7 +677,7 @@ using TypedHandle = SharedCache::TypedHandle; TEST_P(CacheTest, SetCapacity) { auto type = GetParam(); - if (type == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS( "FastLRUCache and HyperClockCache don't support arbitrary capacity " "adjustments."); @@ -811,7 +809,7 @@ TEST_P(CacheTest, OverCapacity) { cache.Release(handles[i]); } - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { // Make sure eviction is triggered. ASSERT_OK(cache.Insert(EncodeKey(-1), nullptr, 1, &handles[0])); @@ -923,8 +921,7 @@ TEST_P(CacheTest, DefaultShardBits) { // Prevent excessive allocation (to save time & space) estimated_value_size_ = 100000; // Implementations use different minimum shard sizes - size_t min_shard_size = - (GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U; + size_t min_shard_size = (IsHyperClock() ? 32U * 1024U : 512U) * 1024U; std::shared_ptr cache = NewCache(32U * min_shard_size); ShardedCacheBase* sc = dynamic_cast(cache.get()); diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index ac9c5f837..159ab4624 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -522,7 +522,7 @@ void BaseClockTable::TrackAndReleaseEvictedEntry( // For key reconstructed from hash UniqueId64x2 unhashed; took_value_ownership = - eviction_callback_(ClockCacheShard::ReverseHash( + eviction_callback_(ClockCacheShard::ReverseHash( h->GetHash(), &unhashed, hash_seed_), reinterpret_cast(h)); } @@ -670,7 +670,7 @@ void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) { } #endif -HyperClockTable::HyperClockTable( +FixedHyperClockTable::FixedHyperClockTable( size_t capacity, bool /*strict_capacity_limit*/, CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, @@ -693,7 +693,7 @@ HyperClockTable::HyperClockTable( "Expecting size / alignment with common cache line size"); } -HyperClockTable::~HyperClockTable() { +FixedHyperClockTable::~FixedHyperClockTable() { // Assumes there are no references or active operations on any slot/element // in the table. for (size_t i = 0; i < GetTableSize(); i++) { @@ -729,13 +729,13 @@ HyperClockTable::~HyperClockTable() { assert(occupancy_ == 0); } -void HyperClockTable::StartInsert(InsertState&) {} +void FixedHyperClockTable::StartInsert(InsertState&) {} -bool HyperClockTable::GrowIfNeeded(size_t new_occupancy, InsertState&) { +bool FixedHyperClockTable::GrowIfNeeded(size_t new_occupancy, InsertState&) { return new_occupancy <= occupancy_limit_; } -HyperClockTable::HandleImpl* HyperClockTable::DoInsert( +FixedHyperClockTable::HandleImpl* FixedHyperClockTable::DoInsert( const ClockHandleBasicData& proto, uint64_t initial_countdown, bool keep_ref, InsertState&) { bool already_matches = false; @@ -782,7 +782,7 @@ HyperClockTable::HandleImpl* HyperClockTable::DoInsert( return nullptr; } -HyperClockTable::HandleImpl* HyperClockTable::Lookup( +FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup( const UniqueId64x2& hashed_key) { HandleImpl* e = FindSlot( hashed_key, @@ -843,8 +843,8 @@ HyperClockTable::HandleImpl* HyperClockTable::Lookup( return e; } -bool HyperClockTable::Release(HandleImpl* h, bool useful, - bool erase_if_last_ref) { +bool FixedHyperClockTable::Release(HandleImpl* h, bool useful, + bool erase_if_last_ref) { // In contrast with LRUCache's Release, this function won't delete the handle // when the cache is above capacity and the reference is the last one. Space // is only freed up by EvictFromClock (called by Insert when space is needed) @@ -919,7 +919,7 @@ bool HyperClockTable::Release(HandleImpl* h, bool useful, } #ifndef NDEBUG -void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { +void FixedHyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { if (n > 0) { // Do n-1 simple releases first TEST_ReleaseNMinus1(h, n); @@ -930,7 +930,7 @@ void HyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { } #endif -void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { +void FixedHyperClockTable::Erase(const UniqueId64x2& hashed_key) { (void)FindSlot( hashed_key, [&](HandleImpl* h) { @@ -995,7 +995,7 @@ void HyperClockTable::Erase(const UniqueId64x2& hashed_key) { [&](HandleImpl* /*h*/, bool /*is_last*/) {}); } -void HyperClockTable::EraseUnRefEntries() { +void FixedHyperClockTable::EraseUnRefEntries() { for (size_t i = 0; i <= this->length_bits_mask_; i++) { HandleImpl& h = array_[i]; @@ -1017,7 +1017,7 @@ void HyperClockTable::EraseUnRefEntries() { } template -inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot( +inline FixedHyperClockTable::HandleImpl* FixedHyperClockTable::FindSlot( const UniqueId64x2& hashed_key, const MatchFn& match_fn, const AbortFn& abort_fn, const UpdateFn& update_fn) { // NOTE: upper 32 bits of hashed_key[0] is used for sharding @@ -1052,8 +1052,8 @@ inline HyperClockTable::HandleImpl* HyperClockTable::FindSlot( return nullptr; } -inline void HyperClockTable::Rollback(const UniqueId64x2& hashed_key, - const HandleImpl* h) { +inline void FixedHyperClockTable::Rollback(const UniqueId64x2& hashed_key, + const HandleImpl* h) { size_t current = ModTableSize(hashed_key[1]); size_t increment = static_cast(hashed_key[0]) | 1U; while (&array_[current] != h) { @@ -1062,7 +1062,7 @@ inline void HyperClockTable::Rollback(const UniqueId64x2& hashed_key, } } -inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) { +inline void FixedHyperClockTable::ReclaimEntryUsage(size_t total_charge) { auto old_occupancy = occupancy_.fetch_sub(1U, std::memory_order_release); (void)old_occupancy; // No underflow @@ -1073,8 +1073,8 @@ inline void HyperClockTable::ReclaimEntryUsage(size_t total_charge) { assert(old_usage >= total_charge); } -inline void HyperClockTable::Evict(size_t requested_charge, InsertState&, - EvictionData* data) { +inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&, + EvictionData* data) { // precondition assert(requested_charge > 0); @@ -1172,7 +1172,7 @@ void ClockCacheShard::ApplyToSomeEntries( table_.HandlePtr(index_begin), table_.HandlePtr(index_end), false); } -int HyperClockTable::CalcHashBits( +int FixedHyperClockTable::CalcHashBits( size_t capacity, size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy) { double average_slot_charge = estimated_value_size * kLoadFactor; @@ -1360,9 +1360,9 @@ size_t ClockCacheShard
::GetTableAddressCount() const { } // Explicit instantiation -template class ClockCacheShard; +template class ClockCacheShard; -HyperClockCache::HyperClockCache(const HyperClockCacheOptions& opts) +FixedHyperClockCache::FixedHyperClockCache(const HyperClockCacheOptions& opts) : ShardedCache(opts) { assert(opts.estimated_entry_charge > 0 || opts.metadata_charge_policy != kDontChargeCacheMetadata); @@ -1371,7 +1371,7 @@ HyperClockCache::HyperClockCache(const HyperClockCacheOptions& opts) size_t per_shard = GetPerShardCapacity(); MemoryAllocator* alloc = this->memory_allocator(); InitShards([&](Shard* cs) { - HyperClockTable::Opts table_opts; + FixedHyperClockTable::Opts table_opts; table_opts.estimated_value_size = opts.estimated_entry_charge; new (cs) Shard(per_shard, opts.strict_capacity_limit, opts.metadata_charge_policy, alloc, &eviction_callback_, @@ -1379,15 +1379,15 @@ HyperClockCache::HyperClockCache(const HyperClockCacheOptions& opts) }); } -Cache::ObjectPtr HyperClockCache::Value(Handle* handle) { +Cache::ObjectPtr FixedHyperClockCache::Value(Handle* handle) { return reinterpret_cast(handle)->value; } -size_t HyperClockCache::GetCharge(Handle* handle) const { +size_t FixedHyperClockCache::GetCharge(Handle* handle) const { return reinterpret_cast(handle)->GetTotalCharge(); } -const Cache::CacheItemHelper* HyperClockCache::GetCacheItemHelper( +const Cache::CacheItemHelper* FixedHyperClockCache::GetCacheItemHelper( Handle* handle) const { auto h = reinterpret_cast(handle); return h->helper; @@ -1402,7 +1402,7 @@ namespace { // or actual occupancy very close to limit (>95% of limit). // Also, for each shard compute the recommended estimated_entry_charge, // and keep the minimum one for use as overall recommendation. -void AddShardEvaluation(const HyperClockCache::Shard& shard, +void AddShardEvaluation(const FixedHyperClockCache::Shard& shard, std::vector& predicted_load_factors, size_t& min_recommendation) { size_t usage = shard.GetUsage() - shard.GetStandaloneUsage(); @@ -1420,7 +1420,7 @@ void AddShardEvaluation(const HyperClockCache::Shard& shard, // If filled to capacity, what would the occupancy ratio be? double ratio = occ_ratio / usage_ratio; // Given max load factor, what that load factor be? - double lf = ratio * HyperClockTable::kStrictLoadFactor; + double lf = ratio * FixedHyperClockTable::kStrictLoadFactor; predicted_load_factors.push_back(lf); // Update min_recommendation also @@ -1430,13 +1430,13 @@ void AddShardEvaluation(const HyperClockCache::Shard& shard, } // namespace -void HyperClockCache::ReportProblems( +void FixedHyperClockCache::ReportProblems( const std::shared_ptr& info_log) const { uint32_t shard_count = GetNumShards(); std::vector predicted_load_factors; size_t min_recommendation = SIZE_MAX; - const_cast(this)->ForEachShard( - [&](HyperClockCache::Shard* shard) { + const_cast(this)->ForEachShard( + [&](FixedHyperClockCache::Shard* shard) { AddShardEvaluation(*shard, predicted_load_factors, min_recommendation); }); @@ -1459,18 +1459,19 @@ void HyperClockCache::ReportProblems( predicted_load_factors.end(), 0.0) / shard_count; - constexpr double kLowSpecLoadFactor = HyperClockTable::kLoadFactor / 2; - constexpr double kMidSpecLoadFactor = HyperClockTable::kLoadFactor / 1.414; - if (average_load_factor > HyperClockTable::kLoadFactor) { + constexpr double kLowSpecLoadFactor = FixedHyperClockTable::kLoadFactor / 2; + constexpr double kMidSpecLoadFactor = + FixedHyperClockTable::kLoadFactor / 1.414; + if (average_load_factor > FixedHyperClockTable::kLoadFactor) { // Out of spec => Consider reporting load factor too high // Estimate effective overall capacity loss due to enforcing occupancy limit double lost_portion = 0.0; int over_count = 0; for (double lf : predicted_load_factors) { - if (lf > HyperClockTable::kStrictLoadFactor) { + if (lf > FixedHyperClockTable::kStrictLoadFactor) { ++over_count; lost_portion += - (lf - HyperClockTable::kStrictLoadFactor) / lf / shard_count; + (lf - FixedHyperClockTable::kStrictLoadFactor) / lf / shard_count; } } // >= 20% loss -> error @@ -1494,10 +1495,10 @@ void HyperClockCache::ReportProblems( if (report) { ROCKS_LOG_AT_LEVEL( info_log, level, - "HyperClockCache@%p unable to use estimated %.1f%% capacity because " - "of " - "full occupancy in %d/%u cache shards (estimated_entry_charge too " - "high). Recommend estimated_entry_charge=%zu", + "FixedHyperClockCache@%p unable to use estimated %.1f%% capacity " + "because of full occupancy in %d/%u cache shards " + "(estimated_entry_charge too high). " + "Recommend estimated_entry_charge=%zu", this, lost_portion * 100.0, over_count, (unsigned)shard_count, min_recommendation); } @@ -1515,8 +1516,8 @@ void HyperClockCache::ReportProblems( } ROCKS_LOG_AT_LEVEL( info_log, level, - "HyperClockCache@%p table has low occupancy at full capacity. Higher " - "estimated_entry_charge (about %.1fx) would likely improve " + "FixedHyperClockCache@%p table has low occupancy at full capacity. " + "Higher estimated_entry_charge (about %.1fx) would likely improve " "performance. Recommend estimated_entry_charge=%zu", this, kMidSpecLoadFactor / average_load_factor, min_recommendation); } @@ -1549,7 +1550,7 @@ std::shared_ptr HyperClockCacheOptions::MakeSharedCache() const { GetDefaultCacheShardBits(opts.capacity, min_shard_size); } std::shared_ptr cache = - std::make_shared(opts); + std::make_shared(opts); if (opts.secondary_cache) { cache = std::make_shared(cache, opts.secondary_cache); diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 7df65ed1b..65993cbb0 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -466,7 +466,7 @@ class BaseClockTable { const uint32_t& hash_seed_; }; -class HyperClockTable : public BaseClockTable { +class FixedHyperClockTable : public BaseClockTable { public: // Target size to be exactly a common cache line size (see static_assert in // clock_cache.cc) @@ -491,12 +491,12 @@ class HyperClockTable : public BaseClockTable { size_t estimated_value_size; }; - HyperClockTable(size_t capacity, bool strict_capacity_limit, - CacheMetadataChargePolicy metadata_charge_policy, - MemoryAllocator* allocator, - const Cache::EvictionCallback* eviction_callback, - const uint32_t* hash_seed, const Opts& opts); - ~HyperClockTable(); + FixedHyperClockTable(size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + MemoryAllocator* allocator, + const Cache::EvictionCallback* eviction_callback, + const uint32_t* hash_seed, const Opts& opts); + ~FixedHyperClockTable(); // For BaseClockTable::Insert struct InsertState {}; @@ -612,7 +612,7 @@ class HyperClockTable : public BaseClockTable { // Array of slots comprising the hash table. const std::unique_ptr array_; -}; // class HyperClockTable +}; // class FixedHyperClockTable // A single shard of sharded cache. template @@ -729,17 +729,17 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { std::atomic strict_capacity_limit_; }; // class ClockCacheShard -class HyperClockCache +class FixedHyperClockCache #ifdef NDEBUG final #endif - : public ShardedCache> { + : public ShardedCache> { public: - using Shard = ClockCacheShard; + using Shard = ClockCacheShard; - explicit HyperClockCache(const HyperClockCacheOptions& opts); + explicit FixedHyperClockCache(const HyperClockCacheOptions& opts); - const char* Name() const override { return "HyperClockCache"; } + const char* Name() const override { return "FixedHyperClockCache"; } Cache::ObjectPtr Value(Handle* handle) override; @@ -749,7 +749,7 @@ class HyperClockCache void ReportProblems( const std::shared_ptr& /*info_log*/) const override; -}; // class HyperClockCache +}; // class FixedHyperClockCache } // namespace clock_cache diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index 4e6a1375b..d350234f3 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -737,7 +737,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, class CompressedSecondaryCacheTest : public CompressedSecondaryCacheTestBase, public testing::WithParamInterface { - const std::string& Type() override { return GetParam(); } + const std::string& Type() const override { return GetParam(); } }; INSTANTIATE_TEST_CASE_P(CompressedSecondaryCacheTest, @@ -752,7 +752,7 @@ class CompressedSecCacheTestWithCompressAndAllocatorParam sec_cache_is_compressed_ = std::get<0>(GetParam()); use_jemalloc_ = std::get<1>(GetParam()); } - const std::string& Type() override { return std::get<2>(GetParam()); } + const std::string& Type() const override { return std::get<2>(GetParam()); } bool sec_cache_is_compressed_; bool use_jemalloc_; }; @@ -773,7 +773,7 @@ class CompressedSecondaryCacheTestWithCompressionParam CompressedSecondaryCacheTestWithCompressionParam() { sec_cache_is_compressed_ = std::get<0>(GetParam()); } - const std::string& Type() override { return std::get<1>(GetParam()); } + const std::string& Type() const override { return std::get<1>(GetParam()); } bool sec_cache_is_compressed_; }; @@ -950,7 +950,7 @@ class CompressedSecCacheTestWithCompressAndSplitParam sec_cache_is_compressed_ = std::get<0>(GetParam()); enable_custom_split_merge_ = std::get<1>(GetParam()); } - const std::string& Type() override { return std::get<2>(GetParam()); } + const std::string& Type() const override { return std::get<2>(GetParam()); } bool sec_cache_is_compressed_; bool enable_custom_split_merge_; }; diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 720a1b2c0..7bd2048dc 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -373,8 +373,8 @@ namespace clock_cache { class ClockCacheTest : public testing::Test { public: - using Shard = HyperClockCache::Shard; - using Table = HyperClockTable; + using Shard = FixedHyperClockCache::Shard; + using Table = FixedHyperClockTable; using HandleImpl = Shard::HandleImpl; ClockCacheTest() {} @@ -916,9 +916,9 @@ TEST_F(ClockCacheTest, TableSizesTest) { .MakeSharedCache(); // Table sizes are currently only powers of two EXPECT_GE(cache->GetTableAddressCount(), - est_count / HyperClockTable::kLoadFactor); + est_count / FixedHyperClockTable::kLoadFactor); EXPECT_LE(cache->GetTableAddressCount(), - est_count / HyperClockTable::kLoadFactor * 2.0); + est_count / FixedHyperClockTable::kLoadFactor * 2.0); EXPECT_EQ(cache->GetUsage(), 0); // kFullChargeMetaData @@ -935,9 +935,10 @@ TEST_F(ClockCacheTest, TableSizesTest) { double est_count_after_meta = (capacity - cache->GetUsage()) * 1.0 / est_val_size; EXPECT_GE(cache->GetTableAddressCount(), - est_count_after_meta / HyperClockTable::kLoadFactor); - EXPECT_LE(cache->GetTableAddressCount(), - est_count_after_meta / HyperClockTable::kLoadFactor * 2.0); + est_count_after_meta / FixedHyperClockTable::kLoadFactor); + EXPECT_LE( + cache->GetTableAddressCount(), + est_count_after_meta / FixedHyperClockTable::kLoadFactor * 2.0); } } } @@ -1425,7 +1426,7 @@ TEST_P(BasicSecondaryCacheTest, FullCapacityTest) { k2.AsSlice(), GetHelper(CacheEntryRole::kDataBlock, /*secondary_compatible=*/false), /*context*/ this, Cache::Priority::LOW); - if (strict_capacity_limit || GetParam() == kHyperClock) { + if (strict_capacity_limit || IsHyperClock()) { ASSERT_NE(handle2, nullptr); cache->Release(handle2); ASSERT_EQ(secondary_cache->num_inserts(), 1u); @@ -1450,12 +1451,12 @@ TEST_P(BasicSecondaryCacheTest, FullCapacityTest) { // CORRECTION: this is not quite right. block_1 can be inserted into the block // cache because strict_capacity_limit=false, but it is removed from the cache // in Release() because of being over-capacity, without demoting to secondary -// cache. HyperClockCache doesn't check capacity on release (for efficiency) -// so can demote the over-capacity item to secondary cache. Also, we intend to -// add support for demotion in Release, but that currently causes too much -// unit test churn. +// cache. FixedHyperClockCache doesn't check capacity on release (for +// efficiency) so can demote the over-capacity item to secondary cache. Also, we +// intend to add support for demotion in Release, but that currently causes too +// much unit test churn. TEST_P(DBSecondaryCacheTest, TestSecondaryCacheCorrectness1) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { // See CORRECTION above ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); return; @@ -1553,7 +1554,7 @@ TEST_P(DBSecondaryCacheTest, TestSecondaryCacheCorrectness1) { // insert and cache block_1 in the block cache (this is the different place // from TestSecondaryCacheCorrectness1) TEST_P(DBSecondaryCacheTest, TestSecondaryCacheCorrectness2) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); return; } @@ -1741,7 +1742,7 @@ TEST_P(DBSecondaryCacheTest, SecondaryCacheIntensiveTesting) { // if we try to insert block_1 to the block cache, it will always fails. Only // block_2 will be successfully inserted into the block cache. TEST_P(DBSecondaryCacheTest, SecondaryCacheFailureTest) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); return; } @@ -1851,7 +1852,7 @@ TEST_P(BasicSecondaryCacheTest, BasicWaitAllTest) { str.length())); } // Force all entries to be evicted to the secondary cache - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { // HCC doesn't respond immediately to SetCapacity for (int i = 9000; i < 9030; ++i) { ASSERT_OK(cache->Insert(ock.WithOffset(i).AsSlice(), nullptr, @@ -1906,7 +1907,7 @@ TEST_P(BasicSecondaryCacheTest, BasicWaitAllTest) { // a sync point callback in TestSecondaryCache::Lookup. We then control the // lookup result by setting the ResultMap. TEST_P(DBSecondaryCacheTest, TestSecondaryCacheMultiGet) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); return; } @@ -2407,7 +2408,7 @@ TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionBasic) { // with new options, which set the lowest_used_cache_tier to // kNonVolatileBlockTier. So secondary cache will be used. TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionChange) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); return; } @@ -2502,7 +2503,7 @@ TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionChange) { // Two DB test. We create 2 DBs sharing the same block cache and secondary // cache. We diable the secondary cache option for DB2. TEST_P(DBSecondaryCacheTest, TestSecondaryCacheOptionTwoDB) { - if (GetParam() == kHyperClock) { + if (IsHyperClock()) { ROCKSDB_GTEST_BYPASS("Test depends on LRUCache-specific behaviors"); return; } diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 01026a319..b0fd6ce7d 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -129,7 +129,8 @@ std::shared_ptr StressTest::NewCache(size_t capacity, if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache") { + } else if (FLAGS_cache_type == "hyper_clock_cache" || + FLAGS_cache_type == "fixed_hyper_clock_cache") { HyperClockCacheOptions opts(static_cast(capacity), FLAGS_block_size /*estimated_entry_charge*/, num_shard_bits); diff --git a/test_util/secondary_cache_test_util.h b/test_util/secondary_cache_test_util.h index 1cfb454b5..2b0e68f68 100644 --- a/test_util/secondary_cache_test_util.h +++ b/test_util/secondary_cache_test_util.h @@ -42,12 +42,14 @@ class WithCacheType : public TestCreateContext { }; static constexpr auto kLRU = "lru"; - static constexpr auto kHyperClock = "hyper_clock"; + static constexpr auto kFixedHyperClock = "fixed_hyper_clock"; // For options other than capacity size_t estimated_value_size_ = 1; - virtual const std::string& Type() = 0; + virtual const std::string& Type() const = 0; + + bool IsHyperClock() const { return Type() == kFixedHyperClock; } std::shared_ptr NewCache( size_t capacity, @@ -62,7 +64,7 @@ class WithCacheType : public TestCreateContext { } return lru_opts.MakeSharedCache(); } - if (type == kHyperClock) { + if (type == kFixedHyperClock) { HyperClockCacheOptions hc_opts{capacity, estimated_value_size_}; hc_opts.hash_seed = 0; // deterministic tests if (modify_opts_fn) { @@ -105,14 +107,14 @@ class WithCacheType : public TestCreateContext { class WithCacheTypeParam : public WithCacheType, public testing::WithParamInterface { - const std::string& Type() override { return GetParam(); } + const std::string& Type() const override { return GetParam(); } }; constexpr auto kLRU = WithCacheType::kLRU; -constexpr auto kHyperClock = WithCacheType::kHyperClock; +constexpr auto kFixedHyperClock = WithCacheType::kFixedHyperClock; inline auto GetTestingCacheTypes() { - return testing::Values(std::string(kLRU), std::string(kHyperClock)); + return testing::Values(std::string(kLRU), std::string(kFixedHyperClock)); } } // namespace secondary_cache_test_util diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 69d35ab2d..6d2accd87 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -3039,7 +3039,8 @@ class Benchmark { if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache") { + } else if (FLAGS_cache_type == "hyper_clock_cache" || + FLAGS_cache_type == "fixed_hyper_clock_cache") { HyperClockCacheOptions hcco{ static_cast(capacity), static_cast(FLAGS_block_size) /*estimated_entry_charge*/, From e214964f40a03c54cc0b2b77096d1bcd248286c5 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 8 Aug 2023 11:34:41 -0700 Subject: [PATCH 026/386] Fix a potential memory leak on row_cache insertion failure (#11682) Summary: Although the built-in Cache implementations never return failure on Insert without keeping a reference (Handle), a custom implementation could. The code for inserting into row_cache does not keep a reference but does not clean up appropriately on non-OK. This is a fix. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11682 Test Plan: unit test added that previously fails under ASAN Reviewed By: ajkr Differential Revision: D48153831 Pulled By: pdillinger fbshipit-source-id: 86eb7387915c5b38b6ff5dd8deb4e1e223b7d020 --- db/db_test.cc | 21 +++++++++++++++++++++ db/table_cache.cc | 9 ++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 609f96ea5..1301c2f3b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -6927,6 +6927,27 @@ TEST_F(DBTest, RowCache) { ASSERT_EQ(Get("foo"), "bar"); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); + + // Also test non-OK cache insertion (would be ASAN failure on memory leak) + class FailInsertionCache : public CacheWrapper { + public: + using CacheWrapper::CacheWrapper; + const char* Name() const override { return "FailInsertionCache"; } + Status Insert(const Slice&, Cache::ObjectPtr, const CacheItemHelper*, + size_t, Handle** = nullptr, + Priority = Priority::LOW) override { + return Status::MemoryLimit(); + } + }; + options.row_cache = std::make_shared(options.row_cache); + ASSERT_OK(options.statistics->Reset()); + Reopen(options); + + ASSERT_EQ(Get("foo"), "bar"); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); + ASSERT_EQ(Get("foo"), "bar"); + // Test condition requires row cache insertion to fail + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); } TEST_F(DBTest, PinnableSliceAndRowCache) { diff --git a/db/table_cache.cc b/db/table_cache.cc index 00224fd81..bdbb47a2f 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -484,9 +484,12 @@ Status TableCache::Get( RowCacheInterface row_cache{ioptions_.row_cache.get()}; size_t charge = row_cache_entry->capacity() + sizeof(std::string); auto row_ptr = new std::string(std::move(*row_cache_entry)); - // If row cache is full, it's OK to continue. - row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge) - .PermitUncheckedError(); + Status rcs = row_cache.Insert(row_cache_key.GetUserKey(), row_ptr, charge); + if (!rcs.ok()) { + // If row cache is full, it's OK to continue, but we keep ownership of + // row_ptr. + delete row_ptr; + } } if (handle != nullptr) { From 9c2ebcc2c365bb89af566b3076f813d7bf11146b Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 8 Aug 2023 12:25:21 -0700 Subject: [PATCH 027/386] Log user_defined_timestamps_persisted flag in event logger (#11683) Summary: As titled, and also removed an undefined and unused member function in for ColumnFamilyData Pull Request resolved: https://github.com/facebook/rocksdb/pull/11683 Reviewed By: ajkr Differential Revision: D48156290 Pulled By: jowlyzhang fbshipit-source-id: cc99aaafe69db6611af3854cb2b2ebc5044941f7 --- db/column_family.h | 2 -- db/event_helpers.cc | 2 ++ db_stress_tool/db_stress_test_base.cc | 1 - table/table_properties.cc | 4 ++++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/db/column_family.h b/db/column_family.h index f976c24cc..3f3466e8e 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -519,8 +519,6 @@ class ColumnFamilyData { return file_metadata_cache_res_mgr_; } - SequenceNumber GetFirstMemtableSequenceNumber() const; - static const uint32_t kDummyColumnFamilyDataId; // Keep track of whether the mempurge feature was ever used. diff --git a/db/event_helpers.cc b/db/event_helpers.cc index 4360144ec..d442a1ed7 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -122,6 +122,8 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished( << "column_family_name" << table_properties.column_family_name << "column_family_id" << table_properties.column_family_id << "comparator" << table_properties.comparator_name + << "user_defined_timestamps_persisted" + << table_properties.user_defined_timestamps_persisted << "merge_operator" << table_properties.merge_operator_name << "prefix_extractor_name" << table_properties.prefix_extractor_name << "property_collectors" diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b0fd6ce7d..fce0e0f6a 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -491,7 +491,6 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, const Slice v(value, sz); - std::string ts; if (FLAGS_user_timestamp_size > 0) { ts = GetNowNanos(); diff --git a/table/table_properties.cc b/table/table_properties.cc index 06ea13f7c..17a13543d 100644 --- a/table/table_properties.cc +++ b/table/table_properties.cc @@ -109,6 +109,10 @@ std::string TableProperties::ToString(const std::string& prop_delim, AppendProperty(result, "comparator name", comparator_name.empty() ? std::string("N/A") : comparator_name, prop_delim, kv_delim); + AppendProperty(result, "user defined timestamps persisted", + user_defined_timestamps_persisted ? std::string("true") + : std::string("false"), + prop_delim, kv_delim); AppendProperty( result, "merge operator name", From 9a034801cead6421bcf82b506b77e3b2251f1edb Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 8 Aug 2023 17:26:50 -0700 Subject: [PATCH 028/386] Group rocksdb.sst.read.micros stat by different user read IOActivity + misc (#11444) Summary: **Context/Summary:** - Similar to https://github.com/facebook/rocksdb/pull/11288 but for user read such as `Get(), MultiGet(), DBIterator::XXX(), Verify(File)Checksum()`. - For this, I refactored some user-facing `MultiGet` calls in `TransactionBase` and various types of `DB` so that it does not call a user-facing `Get()` but `GetImpl()` for passing the `ReadOptions::io_activity` check (see PR conversation) - New user read stats breakdown are guarded by `kExceptDetailedTimers` since measurement shows they have 4-5% regression to the upstream/main. - Misc - More refactoring: with https://github.com/facebook/rocksdb/pull/11288, we complete passing `ReadOptions/IOOptions` to FS level. So we can now replace the previously [added](https://github.com/facebook/rocksdb/pull/9424) `rate_limiter_priority` parameter in `RandomAccessFileReader`'s `Read/MultiRead/Prefetch()` with `IOOptions::rate_limiter_priority` - Also, `ReadAsync()` call time is measured in `SST_READ_MICRO` now Pull Request resolved: https://github.com/facebook/rocksdb/pull/11444 Test Plan: - CI fake db crash/stress test - Microbenchmarking **Build** `make clean && ROCKSDB_NO_FBCODE=1 DEBUG_LEVEL=0 make -jN db_basic_bench` - google benchmark version: https://github.com/google/benchmark/commit/604f6fd3f4b34a84ec4eb4db81d842fa4db829cd - db_basic_bench_base: upstream - db_basic_bench_pr: db_basic_bench_base + this PR - asyncread_db_basic_bench_base: upstream + [db basic bench patch for IteratorNext](https://github.com/facebook/rocksdb/compare/main...hx235:rocksdb:micro_bench_async_read) - asyncread_db_basic_bench_pr: asyncread_db_basic_bench_base + this PR **Test** Get ``` TEST_TMPDIR=/dev/shm ./db_basic_bench_{null_stat|base|pr} --benchmark_filter=DBGet/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1/negative_query:0/enable_filter:0/mmap:1/threads:1 --benchmark_repetitions=1000 ``` Result ``` Coming soon ``` AsyncRead ``` TEST_TMPDIR=/dev/shm ./asyncread_db_basic_bench_{base|pr} --benchmark_filter=IteratorNext/comp_style:0/max_data:134217728/per_key_size:256/enable_statistics:1/async_io:1/include_detailed_timers:0 --benchmark_repetitions=1000 > syncread_db_basic_bench_{base|pr}.out ``` Result ``` Base: 1956,1956,1968,1977,1979,1986,1988,1988,1988,1990,1991,1991,1993,1993,1993,1993,1994,1996,1997,1997,1997,1998,1999,2001,2001,2002,2004,2007,2007,2008, PR (2.3% regression, due to measuring `SST_READ_MICRO` that wasn't measured before): 1993,2014,2016,2022,2024,2027,2027,2028,2028,2030,2031,2031,2032,2032,2038,2039,2042,2044,2044,2047,2047,2047,2048,2049,2050,2052,2052,2052,2053,2053, ``` Reviewed By: ajkr Differential Revision: D45918925 Pulled By: hx235 fbshipit-source-id: 58a54560d9ebeb3a59b6d807639692614dad058a --- db/blob/blob_file_reader.cc | 35 ++- db/blob/blob_file_reader.h | 3 +- db/blob/blob_log_sequential_reader.cc | 5 +- db/convenience.cc | 23 +- db/convenience_impl.h | 15 ++ db/db_impl/compacted_db_impl.cc | 59 +++-- db/db_impl/compacted_db_impl.h | 8 +- db/db_impl/db_impl.cc | 206 +++++++++++++----- db/db_impl/db_impl.h | 25 ++- db/db_impl/db_impl_readonly.cc | 36 ++- db/db_impl/db_impl_readonly.h | 9 +- db/db_impl/db_impl_secondary.cc | 68 ++++-- db/db_impl/db_impl_secondary.h | 16 +- db/db_iter.cc | 4 +- db/db_iter.h | 1 + db/db_rate_limiter_test.cc | 15 +- db/external_sst_file_ingestion_job.cc | 9 +- db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_driver.cc | 2 + db_stress_tool/db_stress_env_wrapper.h | 42 ++++ db_stress_tool/db_stress_gflags.cc | 7 + db_stress_tool/db_stress_test_base.cc | 32 +++ db_stress_tool/no_batched_ops_stress.cc | 9 + file/file_prefetch_buffer.cc | 55 ++--- file/file_prefetch_buffer.h | 22 +- file/file_util.cc | 10 +- file/file_util.h | 2 +- file/prefetch_test.cc | 13 +- file/random_access_file_reader.cc | 73 ++++--- file/random_access_file_reader.h | 19 +- file/random_access_file_reader_test.cc | 21 +- file/sequence_file_reader.h | 3 + include/rocksdb/convenience.h | 2 +- include/rocksdb/env.h | 5 + include/rocksdb/statistics.h | 7 + include/rocksdb/thread_status.h | 5 + include/rocksdb/utilities/transaction.h | 31 ++- java/rocksjni/portal.h | 23 ++ .../main/java/org/rocksdb/HistogramType.java | 10 + microbench/db_basic_bench.cc | 3 +- monitoring/statistics.cc | 7 + monitoring/thread_status_util_debug.cc | 10 + .../block_based/block_based_table_iterator.cc | 4 +- table/block_based/block_based_table_reader.cc | 13 +- .../block_based_table_reader_sync_and_async.h | 2 +- table/block_based/block_prefetcher.cc | 31 ++- table/block_based/block_prefetcher.h | 2 +- table/block_based/partitioned_filter_block.cc | 3 +- .../block_based/partitioned_index_iterator.cc | 3 +- table/block_based/partitioned_index_reader.cc | 3 +- table/block_fetcher.cc | 16 +- table/cuckoo/cuckoo_table_builder_test.cc | 3 +- table/cuckoo/cuckoo_table_reader.cc | 5 +- table/format.cc | 12 +- table/mock_table.cc | 3 +- table/plain/plain_table_key_coding.cc | 3 +- table/plain/plain_table_reader.cc | 6 +- table/sst_file_dumper.cc | 3 +- table/table_test.cc | 14 +- tools/db_crashtest.py | 3 + .../behavior_changes/more_sst_read_micros.md | 1 + .../user_read_io_activity_stats.md | 1 + util/thread_operation.h | 9 +- utilities/blob_db/blob_db_impl.cc | 82 +++++-- utilities/blob_db/blob_db_impl.h | 13 +- utilities/blob_db/blob_dump_tool.cc | 5 +- utilities/blob_db/blob_file.cc | 17 +- utilities/cache_dump_load_impl.h | 3 +- .../persistent_cache/block_cache_tier_file.cc | 3 +- utilities/trace/file_trace_reader_writer.cc | 5 +- utilities/transactions/transaction_base.cc | 94 ++++++-- utilities/transactions/transaction_base.h | 27 ++- utilities/transactions/write_prepared_txn.cc | 52 ++++- utilities/transactions/write_prepared_txn.h | 8 +- .../transactions/write_prepared_txn_db.cc | 95 ++++++-- .../transactions/write_prepared_txn_db.h | 23 +- .../transactions/write_unprepared_txn.cc | 50 ++++- utilities/transactions/write_unprepared_txn.h | 9 +- .../transactions/write_unprepared_txn_db.cc | 22 +- .../transactions/write_unprepared_txn_db.h | 2 +- utilities/ttl/db_ttl_impl.cc | 15 +- utilities/ttl/db_ttl_impl.h | 2 +- .../write_batch_with_index.cc | 3 +- 83 files changed, 1127 insertions(+), 494 deletions(-) create mode 100644 db/convenience_impl.h create mode 100644 unreleased_history/behavior_changes/more_sst_read_micros.md create mode 100644 unreleased_history/new_features/user_read_io_activity_stats.md diff --git a/db/blob/blob_file_reader.cc b/db/blob/blob_file_reader.cc index 6df7f3aee..bdab3ae68 100644 --- a/db/blob/blob_file_reader.cc +++ b/db/blob/blob_file_reader.cc @@ -154,11 +154,9 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader, constexpr uint64_t read_offset = 0; constexpr size_t read_size = BlobLogHeader::kSize; - // TODO: rate limit reading headers from blob files. const Status s = ReadFromFile(file_reader, read_options, read_offset, read_size, - statistics, &header_slice, &buf, &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */); + statistics, &header_slice, &buf, &aligned_buf); if (!s.ok()) { return s; } @@ -207,11 +205,9 @@ Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader, const uint64_t read_offset = file_size - BlobLogFooter::kSize; constexpr size_t read_size = BlobLogFooter::kSize; - // TODO: rate limit reading footers from blob files. const Status s = ReadFromFile(file_reader, read_options, read_offset, read_size, - statistics, &footer_slice, &buf, &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */); + statistics, &footer_slice, &buf, &aligned_buf); if (!s.ok()) { return s; } @@ -242,8 +238,7 @@ Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader, const ReadOptions& read_options, uint64_t read_offset, size_t read_size, Statistics* statistics, Slice* slice, - Buffer* buf, AlignedBuf* aligned_buf, - Env::IOPriority rate_limiter_priority) { + Buffer* buf, AlignedBuf* aligned_buf) { assert(slice); assert(buf); assert(aligned_buf); @@ -264,13 +259,13 @@ Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader, constexpr char* scratch = nullptr; s = file_reader->Read(io_options, read_offset, read_size, slice, scratch, - aligned_buf, rate_limiter_priority); + aligned_buf); } else { buf->reset(new char[read_size]); constexpr AlignedBuf* aligned_scratch = nullptr; s = file_reader->Read(io_options, read_offset, read_size, slice, buf->get(), - aligned_scratch, rate_limiter_priority); + aligned_scratch); } if (!s.ok()) { @@ -345,8 +340,7 @@ Status BlobFileReader::GetBlob( } prefetched = prefetch_buffer->TryReadFromCache( io_options, file_reader_.get(), record_offset, - static_cast(record_size), &record_slice, &s, - read_options.rate_limiter_priority, for_compaction); + static_cast(record_size), &record_slice, &s, for_compaction); if (!s.ok()) { return s; } @@ -357,10 +351,10 @@ Status BlobFileReader::GetBlob( PERF_COUNTER_ADD(blob_read_count, 1); PERF_COUNTER_ADD(blob_read_byte, record_size); PERF_TIMER_GUARD(blob_read_time); - const Status s = ReadFromFile( - file_reader_.get(), read_options, record_offset, - static_cast(record_size), statistics_, &record_slice, &buf, - &aligned_buf, read_options.rate_limiter_priority); + const Status s = + ReadFromFile(file_reader_.get(), read_options, record_offset, + static_cast(record_size), statistics_, + &record_slice, &buf, &aligned_buf); if (!s.ok()) { return s; } @@ -468,9 +462,12 @@ void BlobFileReader::MultiGetBlob( TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile"); PERF_COUNTER_ADD(blob_read_count, num_blobs); PERF_COUNTER_ADD(blob_read_byte, total_len); - s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(), - direct_io ? &aligned_buf : nullptr, - read_options.rate_limiter_priority); + IOOptions opts; + s = file_reader_->PrepareIOOptions(read_options, opts); + if (s.ok()) { + s = file_reader_->MultiRead(opts, read_reqs.data(), read_reqs.size(), + direct_io ? &aligned_buf : nullptr); + } if (!s.ok()) { for (auto& req : read_reqs) { req.status.PermitUncheckedError(); diff --git a/db/blob/blob_file_reader.h b/db/blob/blob_file_reader.h index 990e32540..fa8aa501d 100644 --- a/db/blob/blob_file_reader.h +++ b/db/blob/blob_file_reader.h @@ -89,8 +89,7 @@ class BlobFileReader { const ReadOptions& read_options, uint64_t read_offset, size_t read_size, Statistics* statistics, Slice* slice, Buffer* buf, - AlignedBuf* aligned_buf, - Env::IOPriority rate_limiter_priority); + AlignedBuf* aligned_buf); static Status VerifyBlob(const Slice& record_slice, const Slice& user_key, uint64_t value_size); diff --git a/db/blob/blob_log_sequential_reader.cc b/db/blob/blob_log_sequential_reader.cc index 2ed430681..579c98e29 100644 --- a/db/blob/blob_log_sequential_reader.cc +++ b/db/blob/blob_log_sequential_reader.cc @@ -29,9 +29,8 @@ Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice, StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); // TODO: rate limit `BlobLogSequentialReader` reads (it appears unused?) - Status s = - file_->Read(IOOptions(), next_byte_, static_cast(size), slice, - buf, nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + Status s = file_->Read(IOOptions(), next_byte_, static_cast(size), + slice, buf, nullptr); next_byte_ += size; if (!s.ok()) { return s; diff --git a/db/convenience.cc b/db/convenience.cc index 32cdfafaa..08bddc8e8 100644 --- a/db/convenience.cc +++ b/db/convenience.cc @@ -7,6 +7,7 @@ #include "rocksdb/convenience.h" +#include "db/convenience_impl.h" #include "db/db_impl/db_impl.h" #include "util/cast_util.h" @@ -39,9 +40,25 @@ Status VerifySstFileChecksum(const Options& options, } Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, - const ReadOptions& read_options, + const ReadOptions& _read_options, const std::string& file_path, const SequenceNumber& largest_seqno) { + if (_read_options.io_activity != Env::IOActivity::kUnknown) { + return Status::InvalidArgument( + "Can only call VerifySstFileChecksum with `ReadOptions::io_activity` " + "is " + "`Env::IOActivity::kUnknown`"); + } + ReadOptions read_options(_read_options); + return VerifySstFileChecksumInternal(options, env_options, read_options, + file_path, largest_seqno); +} + +Status VerifySstFileChecksumInternal(const Options& options, + const EnvOptions& env_options, + const ReadOptions& read_options, + const std::string& file_path, + const SequenceNumber& largest_seqno) { std::unique_ptr file; uint64_t file_size; InternalKeyComparator internal_comparator(options.comparator); @@ -68,8 +85,8 @@ Status VerifySstFileChecksum(const Options& options, !kImmortal, false /* force_direct_prefetch */, -1 /* level */); reader_options.largest_seqno = largest_seqno; s = ioptions.table_factory->NewTableReader( - reader_options, std::move(file_reader), file_size, &table_reader, - false /* prefetch_index_and_filter_in_cache */); + read_options, reader_options, std::move(file_reader), file_size, + &table_reader, false /* prefetch_index_and_filter_in_cache */); if (!s.ok()) { return s; } diff --git a/db/convenience_impl.h b/db/convenience_impl.h new file mode 100644 index 000000000..32f4476bd --- /dev/null +++ b/db/convenience_impl.h @@ -0,0 +1,15 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#include "rocksdb/db.h" + +namespace ROCKSDB_NAMESPACE { +Status VerifySstFileChecksumInternal(const Options& options, + const EnvOptions& env_options, + const ReadOptions& read_options, + const std::string& file_path, + const SequenceNumber& largest_seqno = 0); +} // namespace ROCKSDB_NAMESPACE diff --git a/db/db_impl/compacted_db_impl.cc b/db/db_impl/compacted_db_impl.cc index 3d824baf2..b1338ec99 100644 --- a/db/db_impl/compacted_db_impl.cc +++ b/db/db_impl/compacted_db_impl.cc @@ -43,18 +43,25 @@ Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, /*timestamp*/ nullptr); } -Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, - const Slice& key, PinnableSlice* value, - std::string* timestamp) { - if (options.io_activity != Env::IOActivity::kUnknown) { +Status CompactedDBImpl::Get(const ReadOptions& _read_options, + ColumnFamilyHandle*, const Slice& key, + PinnableSlice* value, std::string* timestamp) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; } + assert(user_comparator_); - if (options.timestamp) { - const Status s = FailIfTsMismatchCf( - DefaultColumnFamily(), *(options.timestamp), /*ts_for_read=*/true); + if (read_options.timestamp) { + const Status s = + FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp), + /*ts_for_read=*/true); if (!s.ok()) { return s; } @@ -74,7 +81,7 @@ Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, GetWithTimestampReadCallback read_cb(kMaxSequenceNumber); std::string* ts = user_comparator_->timestamp_size() > 0 ? timestamp : nullptr; - LookupKey lkey(key, kMaxSequenceNumber, options.timestamp); + LookupKey lkey(key, kMaxSequenceNumber, read_options.timestamp); GetContext get_context(user_comparator_, nullptr, nullptr, nullptr, GetContext::kNotFound, lkey.user_key(), value, /*columns=*/nullptr, ts, nullptr, nullptr, true, @@ -88,8 +95,8 @@ Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*, /*b_has_ts=*/false) < 0) { return Status::NotFound(); } - Status s = f.fd.table_reader->Get(options, lkey.internal_key(), &get_context, - nullptr); + Status s = f.fd.table_reader->Get(read_options, lkey.internal_key(), + &get_context, nullptr); if (!s.ok() && !s.IsNotFound()) { return s; } @@ -106,15 +113,28 @@ std::vector CompactedDBImpl::MultiGet( } std::vector CompactedDBImpl::MultiGet( - const ReadOptions& options, const std::vector&, + const ReadOptions& _read_options, const std::vector&, const std::vector& keys, std::vector* values, std::vector* timestamps) { assert(user_comparator_); size_t num_keys = keys.size(); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + return std::vector(num_keys, s); + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } - if (options.timestamp) { - Status s = FailIfTsMismatchCf(DefaultColumnFamily(), *(options.timestamp), - /*ts_for_read=*/true); + if (read_options.timestamp) { + Status s = + FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp), + /*ts_for_read=*/true); if (!s.ok()) { return std::vector(num_keys, s); } @@ -136,7 +156,7 @@ std::vector CompactedDBImpl::MultiGet( GetWithTimestampReadCallback read_cb(kMaxSequenceNumber); autovector reader_list; for (const auto& key : keys) { - LookupKey lkey(key, kMaxSequenceNumber, options.timestamp); + LookupKey lkey(key, kMaxSequenceNumber, read_options.timestamp); const FdWithKeyRange& f = files_.files[FindFile(lkey.user_key())]; if (user_comparator_->CompareWithoutTimestamp( key, /*a_has_ts=*/false, @@ -159,14 +179,15 @@ std::vector CompactedDBImpl::MultiGet( if (r != nullptr) { PinnableSlice pinnable_val; std::string& value = (*values)[idx]; - LookupKey lkey(keys[idx], kMaxSequenceNumber, options.timestamp); + LookupKey lkey(keys[idx], kMaxSequenceNumber, read_options.timestamp); std::string* timestamp = timestamps ? &(*timestamps)[idx] : nullptr; GetContext get_context( user_comparator_, nullptr, nullptr, nullptr, GetContext::kNotFound, lkey.user_key(), &pinnable_val, /*columns=*/nullptr, user_comparator_->timestamp_size() > 0 ? timestamp : nullptr, nullptr, nullptr, true, nullptr, nullptr, nullptr, nullptr, &read_cb); - Status s = r->Get(options, lkey.internal_key(), &get_context, nullptr); + Status s = + r->Get(read_options, lkey.internal_key(), &get_context, nullptr); assert(static_cast(idx) < statuses.size()); if (!s.ok() && !s.IsNotFound()) { statuses[idx] = s; diff --git a/db/db_impl/compacted_db_impl.h b/db/db_impl/compacted_db_impl.h index ad192b4f1..e1c605e42 100644 --- a/db/db_impl/compacted_db_impl.h +++ b/db/db_impl/compacted_db_impl.h @@ -30,9 +30,9 @@ class CompactedDBImpl : public DBImpl { ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; - Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value, - std::string* timestamp) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp) override; using DB::MultiGet; // Note that CompactedDBImpl::MultiGet is not the optimized version of @@ -43,7 +43,7 @@ class CompactedDBImpl : public DBImpl { const std::vector& keys, std::vector* values) override; - std::vector MultiGet(const ReadOptions& options, + std::vector MultiGet(const ReadOptions& _read_options, const std::vector&, const std::vector& keys, std::vector* values, diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 906b42bb6..859f3144a 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -28,6 +28,7 @@ #include "db/arena_wrapped_db_iter.h" #include "db/builder.h" #include "db/compaction/compaction_job.h" +#include "db/convenience_impl.h" #include "db/db_info_dumper.h" #include "db/db_iter.h" #include "db/dbformat.h" @@ -74,7 +75,6 @@ #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" -#include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" @@ -1922,15 +1922,43 @@ Status DBImpl::Get(const ReadOptions& read_options, return Get(read_options, column_family, key, value, /*timestamp=*/nullptr); } -Status DBImpl::Get(const ReadOptions& read_options, +Status DBImpl::GetImpl(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) { + return GetImpl(read_options, column_family, key, value, + /*timestamp=*/nullptr); +} + +Status DBImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) { assert(value != nullptr); value->Reset(); + + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { + return Status::InvalidArgument( + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } + + Status s = GetImpl(read_options, column_family, key, value, timestamp); + return s; +} + +Status DBImpl::GetImpl(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp) { GetImplOptions get_impl_options; get_impl_options.column_family = column_family; get_impl_options.value = value; get_impl_options.timestamp = timestamp; + Status s = GetImpl(read_options, key, get_impl_options); return s; } @@ -1999,11 +2027,6 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, assert(get_impl_options.column_family); - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } if (read_options.timestamp) { const Status s = FailIfTsMismatchCf(get_impl_options.column_family, @@ -2297,7 +2320,7 @@ std::vector DBImpl::MultiGet( } std::vector DBImpl::MultiGet( - const ReadOptions& read_options, + const ReadOptions& _read_options, const std::vector& column_family, const std::vector& keys, std::vector* values, std::vector* timestamps) { @@ -2309,12 +2332,30 @@ std::vector DBImpl::MultiGet( assert(column_family.size() == num_keys); std::vector stat_list(num_keys); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + stat_list[i] = s; + } + return stat_list; + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } + bool should_fail = false; for (size_t i = 0; i < num_keys; ++i) { assert(column_family[i]); if (read_options.timestamp) { - stat_list[i] = FailIfTsMismatchCf( - column_family[i], *(read_options.timestamp), /*ts_for_read=*/true); + stat_list[i] = + FailIfTsMismatchCf(column_family[i], *(read_options.timestamp), + /*ts_for_read=*/true); if (!stat_list[i].ok()) { should_fail = true; } @@ -2627,10 +2668,26 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, /* timestamps */ nullptr, statuses, sorted_input); } -void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, +void DBImpl::MultiGet(const ReadOptions& _read_options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } MultiGetCommon(read_options, num_keys, column_families, keys, values, /* columns */ nullptr, timestamps, statuses, sorted_input); } @@ -2645,7 +2702,6 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, if (num_keys == 0) { return; } - bool should_fail = false; for (size_t i = 0; i < num_keys; ++i) { ColumnFamilyHandle* cfh = column_families[i]; @@ -2828,11 +2884,28 @@ void DBImpl::MultiGet(const ReadOptions& read_options, /* timestamps */ nullptr, statuses, sorted_input); } -void DBImpl::MultiGet(const ReadOptions& read_options, +void DBImpl::MultiGet(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } MultiGetCommon(read_options, column_family, num_keys, keys, values, /* columns */ nullptr, timestamps, statuses, sorted_input); } @@ -2881,9 +2954,20 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, } void DBImpl::MultiGetWithCallback( - const ReadOptions& read_options, ColumnFamilyHandle* column_family, + const ReadOptions& _read_options, ColumnFamilyHandle* column_family, ReadCallback* callback, autovector* sorted_keys) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + assert(false); + return; + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } + std::array multiget_cf_data; multiget_cf_data[0] = MultiGetColumnFamilyData(column_family, nullptr); std::function* sorted_keys, SuperVersion* super_version, SequenceNumber snapshot, ReadCallback* callback) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call MultiGet with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET); @@ -3383,8 +3462,19 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options, return s.ok() || s.IsIncomplete(); } -Iterator* DBImpl::NewIterator(const ReadOptions& read_options, +Iterator* DBImpl::NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { + return NewErrorIterator(Status::InvalidArgument( + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; + } + if (read_options.managed) { return NewErrorIterator( Status::NotSupported("Managed iterator is not supported anymore.")); @@ -3394,17 +3484,12 @@ Iterator* DBImpl::NewIterator(const ReadOptions& read_options, return NewErrorIterator(Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators.")); } - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); - } - assert(column_family); if (read_options.timestamp) { - const Status s = FailIfTsMismatchCf( - column_family, *(read_options.timestamp), /*ts_for_read=*/true); + const Status s = + FailIfTsMismatchCf(column_family, *(read_options.timestamp), + /*ts_for_read=*/true); if (!s.ok()) { return NewErrorIterator(s); } @@ -3524,9 +3609,19 @@ ArenaWrappedDBIter* DBImpl::NewIteratorImpl(const ReadOptions& read_options, } Status DBImpl::NewIterators( - const ReadOptions& read_options, + const ReadOptions& _read_options, const std::vector& column_families, std::vector* iterators) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { + return Status::InvalidArgument( + "Can only call NewIterators with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; + } if (read_options.managed) { return Status::NotSupported("Managed iterator is not supported anymore."); } @@ -3534,11 +3629,6 @@ Status DBImpl::NewIterators( return Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators."); } - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call NewIterators with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } if (read_options.timestamp) { for (auto* cf : column_families) { @@ -5793,12 +5883,35 @@ Status DBImpl::ClipColumnFamily(ColumnFamilyHandle* column_family, return status; } -Status DBImpl::VerifyFileChecksums(const ReadOptions& read_options) { - return VerifyChecksumInternal(read_options, /*use_file_checksum=*/true); +Status DBImpl::VerifyFileChecksums(const ReadOptions& _read_options) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kVerifyFileChecksums) { + return Status::InvalidArgument( + "Can only call VerifyFileChecksums with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or " + "`Env::IOActivity::kVerifyFileChecksums`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kVerifyFileChecksums; + } + return VerifyChecksumInternal(read_options, + /*use_file_checksum=*/true); } -Status DBImpl::VerifyChecksum(const ReadOptions& read_options) { - return VerifyChecksumInternal(read_options, /*use_file_checksum=*/false); +Status DBImpl::VerifyChecksum(const ReadOptions& _read_options) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kVerifyDBChecksum) { + return Status::InvalidArgument( + "Can only call VerifyChecksum with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kVerifyDBChecksum`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kVerifyDBChecksum; + } + return VerifyChecksumInternal(read_options, + /*use_file_checksum=*/false); } Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, @@ -5810,12 +5923,6 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, Status s; - if (read_options.io_activity != Env::IOActivity::kUnknown) { - s = Status::InvalidArgument( - "Cannot verify file checksum with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - return s; - } if (use_file_checksum) { FileChecksumGenFactory* const file_checksum_gen_factory = immutable_db_options_.file_checksum_gen_factory.get(); @@ -5867,7 +5974,7 @@ Status DBImpl::VerifyChecksumInternal(const ReadOptions& read_options, fmeta->file_checksum_func_name, fname, read_options); } else { - s = ROCKSDB_NAMESPACE::VerifySstFileChecksum( + s = ROCKSDB_NAMESPACE::VerifySstFileChecksumInternal( opts, file_options_, read_options, fname, fd.largest_seqno); } RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES, @@ -5930,12 +6037,6 @@ Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected, const std::string& func_name_expected, const std::string& fname, const ReadOptions& read_options) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call VerifyChecksum with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } - Status s; if (file_checksum_expected == kUnknownFileChecksum) { return s; @@ -5946,8 +6047,7 @@ Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected, fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(), func_name_expected, &file_checksum, &func_name, read_options.readahead_size, immutable_db_options_.allow_mmap_reads, - io_tracer_, immutable_db_options_.rate_limiter.get(), - read_options.rate_limiter_priority); + io_tracer_, immutable_db_options_.rate_limiter.get(), read_options); if (s.ok()) { assert(func_name_expected == func_name); if (file_checksum != file_checksum_expected) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 0c654035b..3711f66a3 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -234,7 +234,7 @@ class DBImpl : public DB { virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; - virtual Status Get(const ReadOptions& options, + virtual Status Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) override; @@ -265,7 +265,7 @@ class DBImpl : public DB { const std::vector& keys, std::vector* values) override; virtual std::vector MultiGet( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_family, const std::vector& keys, std::vector* values, std::vector* timestamps) override; @@ -280,8 +280,9 @@ class DBImpl : public DB { void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override; - void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, - const size_t num_keys, const Slice* keys, PinnableSlice* values, + void MultiGet(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const size_t num_keys, + const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input = false) override; @@ -289,13 +290,13 @@ class DBImpl : public DB { ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input = false) override; - void MultiGet(const ReadOptions& options, const size_t num_keys, + void MultiGet(const ReadOptions& _read_options, const size_t num_keys, ColumnFamilyHandle** column_families, const Slice* keys, PinnableSlice* values, std::string* timestamps, Status* statuses, const bool sorted_input = false) override; void MultiGetWithCallback( - const ReadOptions& options, ColumnFamilyHandle* column_family, + const ReadOptions& _read_options, ColumnFamilyHandle* column_family, ReadCallback* callback, autovector* sorted_keys); @@ -336,10 +337,10 @@ class DBImpl : public DB { bool* value_found = nullptr) override; using DB::NewIterator; - virtual Iterator* NewIterator(const ReadOptions& options, + virtual Iterator* NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) override; virtual Status NewIterators( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_families, std::vector* iterators) override; @@ -627,6 +628,14 @@ class DBImpl : public DB { int* number_of_operands = nullptr; }; + Status GetImpl(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value); + + Status GetImpl(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp); + // Function that Get and KeyMayExist call with no_io true or false // Note: 'value_found' from KeyMayExist propagates here // This function is also called by GetMergeOperands diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 6f7e95fa7..783d55082 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -36,14 +36,19 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options, /*timestamp*/ nullptr); } -Status DBImplReadOnly::Get(const ReadOptions& read_options, +Status DBImplReadOnly::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val, std::string* timestamp) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; } assert(pinnable_val != nullptr); PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); @@ -52,8 +57,9 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options, assert(column_family); if (read_options.timestamp) { - const Status s = FailIfTsMismatchCf( - column_family, *(read_options.timestamp), /*ts_for_read=*/true); + const Status s = + FailIfTsMismatchCf(column_family, *(read_options.timestamp), + /*ts_for_read=*/true); if (!s.ok()) { return s; } @@ -116,17 +122,23 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options, return s; } -Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options, +Iterator* DBImplReadOnly::NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; } assert(column_family); if (read_options.timestamp) { - const Status s = FailIfTsMismatchCf( - column_family, *(read_options.timestamp), /*ts_for_read=*/true); + const Status s = + FailIfTsMismatchCf(column_family, *(read_options.timestamp), + /*ts_for_read=*/true); if (!s.ok()) { return NewErrorIterator(s); } diff --git a/db/db_impl/db_impl_readonly.h b/db/db_impl/db_impl_readonly.h index ccd52e407..972e5531a 100644 --- a/db/db_impl/db_impl_readonly.h +++ b/db/db_impl/db_impl_readonly.h @@ -28,14 +28,14 @@ class DBImplReadOnly : public DBImpl { virtual Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; - Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value, - std::string* timestamp) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp) override; // TODO: Implement ReadOnly MultiGet? using DBImpl::NewIterator; - virtual Iterator* NewIterator(const ReadOptions&, + virtual Iterator* NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) override; virtual Status NewIterators( @@ -179,4 +179,3 @@ class DBImplReadOnly : public DBImpl { friend class DB; }; } // namespace ROCKSDB_NAMESPACE - diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index de3536841..ebab44cca 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -340,16 +340,36 @@ Status DBImplSecondary::RecoverLogFiles( } // Implementation of the DB interface -Status DBImplSecondary::Get(const ReadOptions& read_options, +Status DBImplSecondary::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { + return Status::InvalidArgument( + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } return GetImpl(read_options, column_family, key, value, /*timestamp*/ nullptr); } -Status DBImplSecondary::Get(const ReadOptions& read_options, +Status DBImplSecondary::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, std::string* timestamp) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { + return Status::InvalidArgument( + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } return GetImpl(read_options, column_family, key, value, timestamp); } @@ -357,11 +377,6 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val, std::string* timestamp) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } assert(pinnable_val != nullptr); PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); StopWatch sw(immutable_db_options_.clock, stats_, DB_GET); @@ -452,8 +467,18 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, return s; } -Iterator* DBImplSecondary::NewIterator(const ReadOptions& read_options, +Iterator* DBImplSecondary::NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { + return NewErrorIterator(Status::InvalidArgument( + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; + } if (read_options.managed) { return NewErrorIterator( Status::NotSupported("Managed iterator is not supported anymore.")); @@ -462,16 +487,12 @@ Iterator* DBImplSecondary::NewIterator(const ReadOptions& read_options, return NewErrorIterator(Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators.")); } - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); - } assert(column_family); if (read_options.timestamp) { - const Status s = FailIfTsMismatchCf( - column_family, *(read_options.timestamp), /*ts_for_read=*/true); + const Status s = + FailIfTsMismatchCf(column_family, *(read_options.timestamp), + /*ts_for_read=*/true); if (!s.ok()) { return NewErrorIterator(s); } @@ -523,9 +544,19 @@ ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl( } Status DBImplSecondary::NewIterators( - const ReadOptions& read_options, + const ReadOptions& _read_options, const std::vector& column_families, std::vector* iterators) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { + return Status::InvalidArgument( + "Can only call NewIterators with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; + } if (read_options.managed) { return Status::NotSupported("Managed iterator is not supported anymore."); } @@ -533,11 +564,6 @@ Status DBImplSecondary::NewIterators( return Status::NotSupported( "ReadTier::kPersistedData is not yet supported in iterators."); } - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call NewIterators with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } ReadCallback* read_callback = nullptr; // No read callback provided. if (iterators == nullptr) { return Status::InvalidArgument("iterators not allowed to be nullptr"); diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index faaa98721..50edc4ec9 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -96,12 +96,13 @@ class DBImplSecondary : public DBImpl { // workaround, the secondaries can be opened with `max_open_files=-1` so that // it eagerly keeps all talbe files open and is able to access the contents of // deleted files via prior open fd. - Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) override; - Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value, - std::string* timestamp) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, std::string* timestamp) override; Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, @@ -117,7 +118,7 @@ class DBImplSecondary : public DBImpl { // deleted. As a partial hacky workaround, the secondaries can be opened with // `max_open_files=-1` so that it eagerly keeps all talbe files open and is // able to access the contents of deleted files via prior open fd. - Iterator* NewIterator(const ReadOptions&, + Iterator* NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) override; ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& read_options, @@ -127,7 +128,7 @@ class DBImplSecondary : public DBImpl { bool expose_blob_index = false, bool allow_refresh = true); - Status NewIterators(const ReadOptions& options, + Status NewIterators(const ReadOptions& _read_options, const std::vector& column_families, std::vector* iterators) override; @@ -324,4 +325,3 @@ class DBImplSecondary : public DBImpl { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/db/db_iter.cc b/db/db_iter.cc index 3d980c878..e547c2e1c 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -77,6 +77,7 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options, expose_blob_index_(expose_blob_index), is_blob_(false), arena_mode_(arena_mode), + io_activity_(read_options.io_activity), db_impl_(db_impl), cfd_(cfd), timestamp_ub_(read_options.timestamp), @@ -196,12 +197,11 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key, // TODO: consider moving ReadOptions from ArenaWrappedDBIter to DBIter to // avoid having to copy options back and forth. - // TODO: plumb Env::IOActivity ReadOptions read_options; read_options.read_tier = read_tier_; read_options.fill_cache = fill_cache_; read_options.verify_checksums = verify_checksums_; - + read_options.io_activity = io_activity_; constexpr FilePrefetchBuffer* prefetch_buffer = nullptr; constexpr uint64_t* bytes_read = nullptr; diff --git a/db/db_iter.h b/db/db_iter.h index e1663bb7e..163da3265 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -384,6 +384,7 @@ class DBIter final : public Iterator { bool expose_blob_index_; bool is_blob_; bool arena_mode_; + const Env::IOActivity io_activity_; // List of operands for merge operator. MergeContext merge_context_; LocalStatistics local_stats_; diff --git a/db/db_rate_limiter_test.cc b/db/db_rate_limiter_test.cc index 84c2df230..60cde3157 100644 --- a/db/db_rate_limiter_test.cc +++ b/db/db_rate_limiter_test.cc @@ -235,9 +235,18 @@ TEST_P(DBRateLimiterOnReadTest, VerifyChecksum) { ASSERT_EQ(0, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); ASSERT_OK(db_->VerifyChecksum(GetReadOptions())); - // There are 3 reads per file: ReadMetaIndexBlock, - // VerifyChecksumInMetaBlocks, VerifyChecksumInBlocks - int expected = kNumFiles * 3; + // In BufferedIO, + // there are 7 reads per file, each of which will be rate-limited. + // During open: read footer, meta index block, properties block, index block. + // During actual checksum verification: read meta index block, verify checksum + // in meta blocks and verify checksum in file blocks. + // + // In DirectIO, where we support tail prefetching, during table open, we only + // do 1 read instead of 4 as described above. Actual checksum verification + // reads stay the same. + int num_read_per_file = (!use_direct_io_) ? 7 : 4; + int expected = kNumFiles * num_read_per_file; + ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); } diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index c665ed83d..d25c42118 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -217,6 +217,8 @@ Status ExternalSstFileIngestionJob::Prepare( std::string requested_checksum_func_name; // TODO: rate limit file reads for checksum calculation during file // ingestion. + // TODO: plumb Env::IOActivity + ReadOptions ro; IOStatus io_s = GenerateOneFileChecksum( fs_.get(), files_to_ingest_[i].internal_file_path, db_options_.file_checksum_gen_factory.get(), @@ -224,8 +226,7 @@ Status ExternalSstFileIngestionJob::Prepare( &generated_checksum_func_name, ingestion_options_.verify_checksums_readahead_size, db_options_.allow_mmap_reads, io_tracer_, - db_options_.rate_limiter.get(), - Env::IO_TOTAL /* rate_limiter_priority */); + db_options_.rate_limiter.get(), ro); if (!io_s.ok()) { status = io_s; ROCKS_LOG_WARN(db_options_.info_log, @@ -1058,13 +1059,15 @@ IOStatus ExternalSstFileIngestionJob::GenerateChecksumForIngestedFile( std::string file_checksum_func_name; std::string requested_checksum_func_name; // TODO: rate limit file reads for checksum calculation during file ingestion. + // TODO: plumb Env::IOActivity + ReadOptions ro; IOStatus io_s = GenerateOneFileChecksum( fs_.get(), file_to_ingest->internal_file_path, db_options_.file_checksum_gen_factory.get(), requested_checksum_func_name, &file_checksum, &file_checksum_func_name, ingestion_options_.verify_checksums_readahead_size, db_options_.allow_mmap_reads, io_tracer_, db_options_.rate_limiter.get(), - Env::IO_TOTAL /* rate_limiter_priority */); + ro); if (!io_s.ok()) { return io_s; } diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 0a9dd4251..a954d747b 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -248,6 +248,7 @@ DECLARE_bool(avoid_flush_during_recovery); DECLARE_uint64(max_write_batch_group_size_bytes); DECLARE_bool(level_compaction_dynamic_level_bytes); DECLARE_int32(verify_checksum_one_in); +DECLARE_int32(verify_file_checksums_one_in); DECLARE_int32(verify_db_one_in); DECLARE_int32(continuous_verification_interval); DECLARE_int32(get_property_one_in); diff --git a/db_stress_tool/db_stress_driver.cc b/db_stress_tool/db_stress_driver.cc index 4bf82c9d1..e8b75c1d1 100644 --- a/db_stress_tool/db_stress_driver.cc +++ b/db_stress_tool/db_stress_driver.cc @@ -14,6 +14,7 @@ namespace ROCKSDB_NAMESPACE { void ThreadBody(void* v) { + ThreadStatusUtil::RegisterThread(db_stress_env, ThreadStatus::USER); ThreadState* thread = reinterpret_cast(v); SharedState* shared = thread->shared; @@ -54,6 +55,7 @@ void ThreadBody(void* v) { shared->GetCondVar()->SignalAll(); } } + ThreadStatusUtil::UnregisterThread(); } bool RunStressTestImpl(SharedState* shared) { SystemClock* clock = db_stress_env->GetSystemClock().get(); diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h index 612d9fc6b..83e6838c7 100644 --- a/db_stress_tool/db_stress_env_wrapper.h +++ b/db_stress_tool/db_stress_env_wrapper.h @@ -32,6 +32,48 @@ class DbStressRandomAccessFileWrapper : public FSRandomAccessFileOwnerWrapper { #endif return target()->Read(offset, n, options, result, scratch, dbg); } + + IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, + const IOOptions& options, IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->MultiRead(reqs, num_reqs, options, dbg); + } + + IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Prefetch(offset, n, options, dbg); + } + + IOStatus ReadAsync(FSReadRequest& req, const IOOptions& options, + std::function cb, + void* cb_arg, void** io_handle, IOHandleDeleter* del_fn, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->ReadAsync(req, options, cb, cb_arg, io_handle, del_fn, + dbg); + } }; class DbStressFSWrapper : public FileSystemWrapper { diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 32764189c..7c499faf7 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -929,6 +929,13 @@ DEFINE_int32(verify_checksum_one_in, 0, " checksum verification of all the files in the database once for" " every N ops on average. 0 indicates that calls to" " VerifyChecksum() are disabled."); + +DEFINE_int32(verify_file_checksums_one_in, 0, + "If non-zero, then DB::VerifyFileChecksums() will be called to do" + " checksum verification of all the files in the database once for" + " every N ops on average. 0 indicates that calls to" + " VerifyFileChecksums() are disabled."); + DEFINE_int32(verify_db_one_in, 0, "If non-zero, call VerifyDb() once for every N ops. 0 indicates " "that VerifyDb() will not be called in OperateDb(). Note that " diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index fce0e0f6a..66a630b3d 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -929,12 +929,28 @@ void StressTest::OperateDb(ThreadState* thread) { } if (thread->rand.OneInOpt(FLAGS_verify_checksum_one_in)) { + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_VERIFY_DB_CHECKSUM); Status status = db_->VerifyChecksum(); + ThreadStatusUtil::ResetThreadStatus(); if (!status.ok()) { VerificationAbort(shared, "VerifyChecksum status not OK", status); } } + if (thread->rand.OneInOpt(FLAGS_verify_file_checksums_one_in)) { + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_VERIFY_FILE_CHECKSUMS); + Status status = db_->VerifyFileChecksums(read_opts); + ThreadStatusUtil::ResetThreadStatus(); + if (!status.ok()) { + VerificationAbort(shared, "VerifyFileChecksums status not OK", + status); + } + } + if (thread->rand.OneInOpt(FLAGS_get_property_one_in)) { TestGetProperty(thread); } @@ -1034,10 +1050,18 @@ void StressTest::OperateDb(ThreadState* thread) { // If its the last iteration, ensure that multiget_batch_size is 1 multiget_batch_size = std::max(multiget_batch_size, 1); rand_keys = GenerateNKeys(thread, multiget_batch_size, i); + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_MULTIGET); TestMultiGet(thread, read_opts, rand_column_families, rand_keys); + ThreadStatusUtil::ResetThreadStatus(); i += multiget_batch_size - 1; } else { + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_GET); TestGet(thread, read_opts, rand_column_families, rand_keys); + ThreadStatusUtil::ResetThreadStatus(); } } else if (prob_op < prefix_bound) { assert(static_cast(FLAGS_readpercent) <= prob_op); @@ -1066,8 +1090,12 @@ void StressTest::OperateDb(ThreadState* thread) { if (!FLAGS_skip_verifydb && thread->rand.OneInOpt( FLAGS_verify_iterator_with_expected_state_one_in)) { + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_DBITERATOR); TestIterateAgainstExpected(thread, read_opts, rand_column_families, rand_keys); + ThreadStatusUtil::ResetThreadStatus(); } else { int num_seeks = static_cast(std::min( std::max(static_cast(thread->rand.Uniform(4)), @@ -1076,7 +1104,11 @@ void StressTest::OperateDb(ThreadState* thread) { static_cast(1)))); rand_keys = GenerateNKeys(thread, num_seeks, i); i += num_seeks - 1; + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_DBITERATOR); TestIterate(thread, read_opts, rand_column_families, rand_keys); + ThreadStatusUtil::ResetThreadStatus(); } } else { assert(iterate_bound <= prob_op); diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 5d0ee2205..ed1b88b7b 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -585,6 +585,7 @@ class NonBatchedOpsStressTest : public StressTest { bool do_consistency_check = thread->rand.OneIn(4); ReadOptions readoptionscopy = read_opts; + if (do_consistency_check) { readoptionscopy.snapshot = db_->GetSnapshot(); } @@ -778,9 +779,17 @@ class NonBatchedOpsStressTest : public StressTest { if (use_txn) { assert(txn); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_GET); tmp_s = txn->Get(readoptionscopy, cfh, key, &value); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_MULTIGET); } else { + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_GET); tmp_s = db_->Get(readoptionscopy, cfh, key, &value); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_MULTIGET); } if (!tmp_s.ok() && !tmp_s.IsNotFound()) { fprintf(stderr, "Get error: %s\n", s.ToString().c_str()); diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 59fbf1285..618d49e03 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -81,13 +81,12 @@ void FilePrefetchBuffer::CalculateOffsetAndLen(size_t alignment, Status FilePrefetchBuffer::Read(const IOOptions& opts, RandomAccessFileReader* reader, - Env::IOPriority rate_limiter_priority, uint64_t read_len, uint64_t chunk_len, uint64_t rounddown_start, uint32_t index) { Slice result; Status s = reader->Read(opts, rounddown_start + chunk_len, read_len, &result, bufs_[index].buffer_.BufferStart() + chunk_len, - /*aligned_buf=*/nullptr, rate_limiter_priority); + /*aligned_buf=*/nullptr); #ifndef NDEBUG if (result.size() < read_len) { // Fake an IO error to force db_stress fault injection to ignore @@ -134,8 +133,7 @@ Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts, Status FilePrefetchBuffer::Prefetch(const IOOptions& opts, RandomAccessFileReader* reader, - uint64_t offset, size_t n, - Env::IOPriority rate_limiter_priority) { + uint64_t offset, size_t n) { if (!enable_ || reader == nullptr) { return Status::OK(); } @@ -160,8 +158,7 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts, true /*refit_tail*/, chunk_len); size_t read_len = static_cast(roundup_len - chunk_len); - Status s = Read(opts, reader, rate_limiter_priority, read_len, chunk_len, - rounddown_offset, curr_); + Status s = Read(opts, reader, read_len, chunk_len, rounddown_offset, curr_); if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && s.ok()) { RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len); } @@ -328,8 +325,7 @@ void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) { Status FilePrefetchBuffer::HandleOverlappingData( const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, - size_t length, size_t readahead_size, - Env::IOPriority /*rate_limiter_priority*/, bool& copy_to_third_buffer, + size_t length, size_t readahead_size, bool& copy_to_third_buffer, uint64_t& tmp_offset, size_t& tmp_length) { Status s; size_t alignment = reader->file()->GetRequiredBufferAlignment(); @@ -412,10 +408,11 @@ Status FilePrefetchBuffer::HandleOverlappingData( // curr_, send async request on curr_, wait for poll to fill second // buffer (if any), and copy remaining data from second buffer to third // buffer. -Status FilePrefetchBuffer::PrefetchAsyncInternal( - const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, - size_t length, size_t readahead_size, Env::IOPriority rate_limiter_priority, - bool& copy_to_third_buffer) { +Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, + RandomAccessFileReader* reader, + uint64_t offset, size_t length, + size_t readahead_size, + bool& copy_to_third_buffer) { if (!enable_) { return Status::OK(); } @@ -442,8 +439,7 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal( // - switch buffers and curr_ now points to second buffer to copy remaining // data. s = HandleOverlappingData(opts, reader, offset, length, readahead_size, - rate_limiter_priority, copy_to_third_buffer, - tmp_offset, tmp_length); + copy_to_third_buffer, tmp_offset, tmp_length); if (!s.ok()) { return s; } @@ -581,8 +577,7 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal( } if (read_len1 > 0) { - s = Read(opts, reader, rate_limiter_priority, read_len1, chunk_len1, - rounddown_start1, curr_); + s = Read(opts, reader, read_len1, chunk_len1, rounddown_start1, curr_); if (!s.ok()) { if (bufs_[second].io_handle_ != nullptr) { std::vector handles; @@ -610,10 +605,9 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, size_t n, Slice* result, Status* status, - Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */) { bool ret = TryReadFromCacheUntracked(opts, reader, offset, n, result, status, - rate_limiter_priority, for_compaction); + for_compaction); if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) { if (ret) { RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT); @@ -627,7 +621,7 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts, bool FilePrefetchBuffer::TryReadFromCacheUntracked( const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, size_t n, Slice* result, Status* status, - Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */) { + bool for_compaction /* = false */) { if (track_min_offset_ && offset < min_offset_read_) { min_offset_read_ = static_cast(offset); } @@ -647,8 +641,7 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( assert(reader != nullptr); assert(max_readahead_size_ >= readahead_size_); if (for_compaction) { - s = Prefetch(opts, reader, offset, std::max(n, readahead_size_), - rate_limiter_priority); + s = Prefetch(opts, reader, offset, std::max(n, readahead_size_)); } else { if (implicit_auto_readahead_) { if (!IsEligibleForPrefetch(offset, n)) { @@ -657,8 +650,7 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( return false; } } - s = Prefetch(opts, reader, offset, n + readahead_size_, - rate_limiter_priority); + s = Prefetch(opts, reader, offset, n + readahead_size_); } if (!s.ok()) { if (status) { @@ -681,12 +673,12 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( return true; } -bool FilePrefetchBuffer::TryReadFromCacheAsync( - const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, - size_t n, Slice* result, Status* status, - Env::IOPriority rate_limiter_priority) { - bool ret = TryReadFromCacheAsyncUntracked(opts, reader, offset, n, result, - status, rate_limiter_priority); +bool FilePrefetchBuffer::TryReadFromCacheAsync(const IOOptions& opts, + RandomAccessFileReader* reader, + uint64_t offset, size_t n, + Slice* result, Status* status) { + bool ret = + TryReadFromCacheAsyncUntracked(opts, reader, offset, n, result, status); if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) { if (ret) { RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT); @@ -699,8 +691,7 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync( bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, - size_t n, Slice* result, Status* status, - Env::IOPriority rate_limiter_priority) { + size_t n, Slice* result, Status* status) { if (track_min_offset_ && offset < min_offset_read_) { min_offset_read_ = static_cast(offset); } @@ -755,7 +746,7 @@ bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( // Prefetch n + readahead_size_/2 synchronously as remaining // readahead_size_/2 will be prefetched asynchronously. s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2, - rate_limiter_priority, copy_to_third_buffer); + copy_to_third_buffer); explicit_prefetch_submitted_ = false; if (!s.ok()) { if (status) { diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index ae8472496..89d96d438 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -183,14 +183,12 @@ class FilePrefetchBuffer { bool Enabled() const { return enable_; } // Load data into the buffer from a file. + // opts : the IO options to use. // reader : the file reader. // offset : the file offset to start reading from. // n : the number of bytes to read. - // rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to - // bypass. Status Prefetch(const IOOptions& opts, RandomAccessFileReader* reader, - uint64_t offset, size_t n, - Env::IOPriority rate_limiter_priority); + uint64_t offset, size_t n); // Request for reading the data from a file asynchronously. // If data already exists in the buffer, result will be updated. @@ -217,18 +215,14 @@ class FilePrefetchBuffer { // n : the number of bytes. // result : output buffer to put the data into. // s : output status. - // rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to - // bypass. // for_compaction : true if cache read is done for compaction read. bool TryReadFromCache(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, size_t n, Slice* result, Status* s, - Env::IOPriority rate_limiter_priority, bool for_compaction = false); bool TryReadFromCacheAsync(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, - size_t n, Slice* result, Status* status, - Env::IOPriority rate_limiter_priority); + size_t n, Slice* result, Status* status); // The minimum `offset` ever passed to TryReadFromCache(). This will nly be // tracked if track_min_offset = true. @@ -305,12 +299,11 @@ class FilePrefetchBuffer { Status PrefetchAsyncInternal(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, size_t length, size_t readahead_size, - Env::IOPriority rate_limiter_priority, bool& copy_to_third_buffer); Status Read(const IOOptions& opts, RandomAccessFileReader* reader, - Env::IOPriority rate_limiter_priority, uint64_t read_len, - uint64_t chunk_len, uint64_t rounddown_start, uint32_t index); + uint64_t read_len, uint64_t chunk_len, uint64_t rounddown_start, + uint32_t index); Status ReadAsync(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t read_len, uint64_t rounddown_start, uint32_t index); @@ -409,7 +402,6 @@ class FilePrefetchBuffer { Status HandleOverlappingData(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, size_t length, size_t readahead_size, - Env::IOPriority rate_limiter_priority, bool& copy_to_third_buffer, uint64_t& tmp_offset, size_t& tmp_length); @@ -417,14 +409,12 @@ class FilePrefetchBuffer { RandomAccessFileReader* reader, uint64_t offset, size_t n, Slice* result, Status* s, - Env::IOPriority rate_limiter_priority, bool for_compaction = false); bool TryReadFromCacheAsyncUntracked(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, size_t n, Slice* result, - Status* status, - Env::IOPriority rate_limiter_priority); + Status* status); std::vector bufs_; // curr_ represents the index for bufs_ indicating which buffer is being diff --git a/file/file_util.cc b/file/file_util.cc index 46faac67c..c5bb22e48 100644 --- a/file/file_util.cc +++ b/file/file_util.cc @@ -137,7 +137,7 @@ IOStatus GenerateOneFileChecksum( std::string* file_checksum_func_name, size_t verify_checksums_readahead_size, bool /*allow_mmap_reads*/, std::shared_ptr& io_tracer, RateLimiter* rate_limiter, - Env::IOPriority rate_limiter_priority) { + const ReadOptions& read_options) { if (checksum_factory == nullptr) { return IOStatus::InvalidArgument("Checksum factory is invalid"); } @@ -206,11 +206,15 @@ IOStatus GenerateOneFileChecksum( Slice slice; uint64_t offset = 0; IOOptions opts; + io_s = reader->PrepareIOOptions(read_options, opts); + if (!io_s.ok()) { + return io_s; + } while (size > 0) { size_t bytes_to_read = static_cast(std::min(uint64_t{readahead_size}, size)); - io_s = reader->Read(opts, offset, bytes_to_read, &slice, buf.get(), nullptr, - rate_limiter_priority); + io_s = + reader->Read(opts, offset, bytes_to_read, &slice, buf.get(), nullptr); if (!io_s.ok()) { return IOStatus::Corruption("file read failed with error: " + io_s.ToString()); diff --git a/file/file_util.h b/file/file_util.h index 8b59731eb..1ee297955 100644 --- a/file/file_util.h +++ b/file/file_util.h @@ -59,7 +59,7 @@ extern IOStatus GenerateOneFileChecksum( std::string* file_checksum_func_name, size_t verify_checksums_readahead_size, bool allow_mmap_reads, std::shared_ptr& io_tracer, RateLimiter* rate_limiter, - Env::IOPriority rate_limiter_priority); + const ReadOptions& read_options); inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, SystemClock* clock, IOOptions& opts) { diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 15b1c6beb..fe3d2cf70 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -2606,8 +2606,10 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) { fpb.UpdateReadPattern(0, 4096, false); // Now read some data that straddles the two prefetch buffers - offset 8192 to // 16384 - ASSERT_TRUE(fpb.TryReadFromCacheAsync(IOOptions(), r.get(), 8192, 8192, - &result, &s, Env::IOPriority::IO_LOW)); + IOOptions io_opts; + io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW; + ASSERT_TRUE( + fpb.TryReadFromCacheAsync(io_opts, r.get(), 8192, 8192, &result, &s)); } TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { @@ -2642,9 +2644,10 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { } ASSERT_TRUE(s.IsTryAgain()); - ASSERT_TRUE(fpb.TryReadFromCacheAsync(IOOptions(), r.get(), /*offset=*/3000, - /*length=*/4000, &async_result, &s, - Env::IOPriority::IO_LOW)); + IOOptions io_opts; + io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW; + ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), /*offset=*/3000, + /*length=*/4000, &async_result, &s)); // No sync call should be made. HistogramData sst_read_micros; stats()->histogramData(SST_READ_MICROS, &sst_read_micros); diff --git a/file/random_access_file_reader.cc b/file/random_access_file_reader.cc index e38e9ec13..8b22d617b 100644 --- a/file/random_access_file_reader.cc +++ b/file/random_access_file_reader.cc @@ -22,12 +22,37 @@ #include "util/rate_limiter_impl.h" namespace ROCKSDB_NAMESPACE { -const std::array - kReadHistograms{{ - FILE_READ_FLUSH_MICROS, - FILE_READ_COMPACTION_MICROS, - FILE_READ_DB_OPEN_MICROS, - }}; +inline Histograms GetFileReadHistograms(Statistics* stats, + Env::IOActivity io_activity) { + switch (io_activity) { + case Env::IOActivity::kFlush: + return Histograms::FILE_READ_FLUSH_MICROS; + case Env::IOActivity::kCompaction: + return Histograms::FILE_READ_COMPACTION_MICROS; + case Env::IOActivity::kDBOpen: + return Histograms::FILE_READ_DB_OPEN_MICROS; + default: + break; + } + + if (stats && stats->get_stats_level() > StatsLevel::kExceptDetailedTimers) { + switch (io_activity) { + case Env::IOActivity::kGet: + return Histograms::FILE_READ_GET_MICROS; + case Env::IOActivity::kMultiGet: + return Histograms::FILE_READ_MULTIGET_MICROS; + case Env::IOActivity::kDBIterator: + return Histograms::FILE_READ_DB_ITERATOR_MICROS; + case Env::IOActivity::kVerifyDBChecksum: + return Histograms::FILE_READ_VERIFY_DB_CHECKSUM_MICROS; + case Env::IOActivity::kVerifyFileChecksums: + return Histograms::FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS; + default: + break; + } + } + return Histograms::HISTOGRAM_ENUM_MAX; +} inline void RecordIOStats(Statistics* stats, Temperature file_temperature, bool is_last_level, size_t size) { IOSTATS_ADD(bytes_read, size); @@ -79,11 +104,11 @@ IOStatus RandomAccessFileReader::Create( return io_s; } -IOStatus RandomAccessFileReader::Read( - const IOOptions& opts, uint64_t offset, size_t n, Slice* result, - char* scratch, AlignedBuf* aligned_buf, - Env::IOPriority rate_limiter_priority) const { +IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset, + size_t n, Slice* result, char* scratch, + AlignedBuf* aligned_buf) const { (void)aligned_buf; + const Env::IOPriority rate_limiter_priority = opts.rate_limiter_priority; TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::Read", nullptr); @@ -108,9 +133,7 @@ IOStatus RandomAccessFileReader::Read( { StopWatch sw(clock_, stats_, hist_type_, - (opts.io_activity != Env::IOActivity::kUnknown) - ? kReadHistograms[(std::size_t)(opts.io_activity)] - : Histograms::HISTOGRAM_ENUM_MAX, + GetFileReadHistograms(stats_, opts.io_activity), (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, true /*delay_enabled*/); auto prev_perf_level = GetPerfLevel(); @@ -277,9 +300,10 @@ bool TryMerge(FSReadRequest* dest, const FSReadRequest& src) { return true; } -IOStatus RandomAccessFileReader::MultiRead( - const IOOptions& opts, FSReadRequest* read_reqs, size_t num_reqs, - AlignedBuf* aligned_buf, Env::IOPriority rate_limiter_priority) const { +IOStatus RandomAccessFileReader::MultiRead(const IOOptions& opts, + FSReadRequest* read_reqs, + size_t num_reqs, + AlignedBuf* aligned_buf) const { (void)aligned_buf; // suppress warning of unused variable in LITE mode assert(num_reqs > 0); @@ -288,6 +312,7 @@ IOStatus RandomAccessFileReader::MultiRead( assert(read_reqs[i].offset <= read_reqs[i + 1].offset); } #endif // !NDEBUG + const Env::IOPriority rate_limiter_priority = opts.rate_limiter_priority; // To be paranoid modify scratch a little bit, so in case underlying // FileSystem doesn't fill the buffer but return success and `scratch` returns @@ -304,9 +329,7 @@ IOStatus RandomAccessFileReader::MultiRead( uint64_t elapsed = 0; { StopWatch sw(clock_, stats_, hist_type_, - (opts.io_activity != Env::IOActivity::kUnknown) - ? kReadHistograms[(std::size_t)(opts.io_activity)] - : Histograms::HISTOGRAM_ENUM_MAX, + GetFileReadHistograms(stats_, opts.io_activity), (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, true /*delay_enabled*/); auto prev_perf_level = GetPerfLevel(); @@ -495,16 +518,16 @@ IOStatus RandomAccessFileReader::ReadAsync( assert(read_async_info->buf_.CurrentSize() == 0); - StopWatch sw(clock_, nullptr /*stats*/, - Histograms::HISTOGRAM_ENUM_MAX /*hist_type*/, - Histograms::HISTOGRAM_ENUM_MAX, &elapsed, true /*overwrite*/, + StopWatch sw(clock_, stats_, hist_type_, + GetFileReadHistograms(stats_, opts.io_activity), + (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, true /*delay_enabled*/); s = file_->ReadAsync(aligned_req, opts, read_async_callback, read_async_info, io_handle, del_fn, nullptr /*dbg*/); } else { - StopWatch sw(clock_, nullptr /*stats*/, - Histograms::HISTOGRAM_ENUM_MAX /*hist_type*/, - Histograms::HISTOGRAM_ENUM_MAX, &elapsed, true /*overwrite*/, + StopWatch sw(clock_, stats_, hist_type_, + GetFileReadHistograms(stats_, opts.io_activity), + (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, true /*delay_enabled*/); s = file_->ReadAsync(req, opts, read_async_callback, read_async_info, io_handle, del_fn, nullptr /*dbg*/); diff --git a/file/random_access_file_reader.h b/file/random_access_file_reader.h index ab4d1e797..93cbe0e1a 100644 --- a/file/random_access_file_reader.h +++ b/file/random_access_file_reader.h @@ -164,31 +164,18 @@ class RandomAccessFileReader { // 2. Otherwise, scratch is not used and can be null, the aligned_buf owns // the internally allocated buffer on return, and the result refers to a // region in aligned_buf. - // - // `rate_limiter_priority` is used to charge the internal rate limiter when - // enabled. The special value `Env::IO_TOTAL` makes this operation bypass the - // rate limiter. IOStatus Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result, - char* scratch, AlignedBuf* aligned_buf, - Env::IOPriority rate_limiter_priority) const; + char* scratch, AlignedBuf* aligned_buf) const; // REQUIRES: // num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing. // In non-direct IO mode, aligned_buf should be null; // In direct IO mode, aligned_buf stores the aligned buffer allocated inside // MultiRead, the result Slices in reqs refer to aligned_buf. - // - // `rate_limiter_priority` will be used to charge the internal rate limiter. - // It is not yet supported so the client must provide the special value - // `Env::IO_TOTAL` to bypass the rate limiter. IOStatus MultiRead(const IOOptions& opts, FSReadRequest* reqs, - size_t num_reqs, AlignedBuf* aligned_buf, - Env::IOPriority rate_limiter_priority) const; + size_t num_reqs, AlignedBuf* aligned_buf) const; - IOStatus Prefetch(uint64_t offset, size_t n, - const Env::IOPriority rate_limiter_priority) const { - IOOptions opts; - opts.rate_limiter_priority = rate_limiter_priority; + IOStatus Prefetch(const IOOptions& opts, uint64_t offset, size_t n) const { return file_->Prefetch(offset, n, opts, nullptr); } diff --git a/file/random_access_file_reader_test.cc b/file/random_access_file_reader_test.cc index 82ddcfff9..6b7b7eb68 100644 --- a/file/random_access_file_reader_test.cc +++ b/file/random_access_file_reader_test.cc @@ -83,8 +83,9 @@ TEST_F(RandomAccessFileReaderTest, ReadDirectIO) { Slice result; AlignedBuf buf; for (Env::IOPriority rate_limiter_priority : {Env::IO_LOW, Env::IO_TOTAL}) { - ASSERT_OK(r->Read(IOOptions(), offset, len, &result, nullptr, &buf, - rate_limiter_priority)); + IOOptions io_opts; + io_opts.rate_limiter_priority = rate_limiter_priority; + ASSERT_OK(r->Read(io_opts, offset, len, &result, nullptr, &buf)); ASSERT_EQ(result.ToString(), content.substr(offset, len)); } } @@ -146,8 +147,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) { reqs.push_back(std::move(r0)); reqs.push_back(std::move(r1)); AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /*rate_limiter_priority*/)); + ASSERT_OK( + r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); @@ -191,8 +192,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) { reqs.push_back(std::move(r1)); reqs.push_back(std::move(r2)); AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /*rate_limiter_priority*/)); + ASSERT_OK( + r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); @@ -236,8 +237,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) { reqs.push_back(std::move(r1)); reqs.push_back(std::move(r2)); AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /*rate_limiter_priority*/)); + ASSERT_OK( + r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); @@ -273,8 +274,8 @@ TEST_F(RandomAccessFileReaderTest, MultiReadDirectIO) { reqs.push_back(std::move(r0)); reqs.push_back(std::move(r1)); AlignedBuf aligned_buf; - ASSERT_OK(r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf, - Env::IO_TOTAL /*rate_limiter_priority*/)); + ASSERT_OK( + r->MultiRead(IOOptions(), reqs.data(), reqs.size(), &aligned_buf)); AssertResult(content, reqs); diff --git a/file/sequence_file_reader.h b/file/sequence_file_reader.h index 14350e8de..dc0e61bd2 100644 --- a/file/sequence_file_reader.h +++ b/file/sequence_file_reader.h @@ -99,6 +99,9 @@ class SequentialFileReader { // when less than n bytes are actually read (e.g. at end of file). To avoid // overcharging the rate limiter, the caller can use file size to cap n to // read until end of file. + // + // TODO(hx235): accept parameter `IOOptions` containing + // `rate_limiter_priority` like RandomAccessFileReader::Read() IOStatus Read(size_t n, Slice* result, char* scratch, Env::IOPriority rate_limiter_priority); diff --git a/include/rocksdb/convenience.h b/include/rocksdb/convenience.h index 7ce676df0..cff03f2bc 100644 --- a/include/rocksdb/convenience.h +++ b/include/rocksdb/convenience.h @@ -459,7 +459,7 @@ Status VerifySstFileChecksum(const Options& options, // Verify the checksum of file Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, - const ReadOptions& read_options, + const ReadOptions& _read_options, const std::string& file_path, const SequenceNumber& largest_seqno = 0); diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index ea99a7a9e..7a09d17a5 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -441,6 +441,11 @@ class Env : public Customizable { kFlush = 0, kCompaction = 1, kDBOpen = 2, + kGet = 3, + kMultiGet = 4, + kDBIterator = 5, + kVerifyDBChecksum = 6, + kVerifyFileChecksums = 7, kUnknown, // Keep last for easy array of non-unknowns }; diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index a56b35dbe..7576d4a7b 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -555,6 +555,13 @@ enum Histograms : uint32_t { FILE_READ_FLUSH_MICROS, FILE_READ_COMPACTION_MICROS, FILE_READ_DB_OPEN_MICROS, + // The following `FILE_READ_*` require stats level greater than + // `StatsLevel::kExceptDetailedTimers` + FILE_READ_GET_MICROS, + FILE_READ_MULTIGET_MICROS, + FILE_READ_DB_ITERATOR_MICROS, + FILE_READ_VERIFY_DB_CHECKSUM_MICROS, + FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS, // The number of subcompactions actually scheduled during a compaction NUM_SUBCOMPACTIONS_SCHEDULED, diff --git a/include/rocksdb/thread_status.h b/include/rocksdb/thread_status.h index beecdfd25..8cfdf931f 100644 --- a/include/rocksdb/thread_status.h +++ b/include/rocksdb/thread_status.h @@ -57,6 +57,11 @@ struct ThreadStatus { OP_COMPACTION, OP_FLUSH, OP_DBOPEN, + OP_GET, + OP_MULTIGET, + OP_DBITERATOR, + OP_VERIFY_DB_CHECKSUM, + OP_VERIFY_FILE_CHECKSUMS, NUM_OP_TYPES }; diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 3cdcc9bb2..510ff1dd9 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -335,8 +335,22 @@ class Transaction { const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool /*sorted_input*/ = false) { + if (options.io_activity != Env::IOActivity::kUnknown && + options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } + for (size_t i = 0; i < num_keys; ++i) { - statuses[i] = Get(options, column_family, keys[i], &values[i]); + statuses[i] = GetImpl(options, column_family, keys[i], &values[i]); } } @@ -673,6 +687,21 @@ class Transaction { id_ = id; } + virtual Status GetImpl(const ReadOptions& /* options */, + ColumnFamilyHandle* /* column_family */, + const Slice& /* key */, std::string* /* value */) { + return Status::NotSupported("Not implemented"); + } + + virtual Status GetImpl(const ReadOptions& options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* pinnable_val) { + assert(pinnable_val != nullptr); + auto s = GetImpl(options, column_family, key, pinnable_val->GetSelf()); + pinnable_val->PinSelf(); + return s; + } + virtual uint64_t GetLastLogNumber() const { return log_number_; } private: diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index f75e002b8..c75c233db 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5629,6 +5629,17 @@ class HistogramTypeJni { return 0x3B; case ROCKSDB_NAMESPACE::Histograms::FILE_READ_DB_OPEN_MICROS: return 0x3C; + case ROCKSDB_NAMESPACE::Histograms::FILE_READ_GET_MICROS: + return 0x3D; + case ROCKSDB_NAMESPACE::Histograms::FILE_READ_MULTIGET_MICROS: + return 0x3E; + case ROCKSDB_NAMESPACE::Histograms::FILE_READ_DB_ITERATOR_MICROS: + return 0x3F; + case ROCKSDB_NAMESPACE::Histograms::FILE_READ_VERIFY_DB_CHECKSUM_MICROS: + return 0x40; + case ROCKSDB_NAMESPACE::Histograms:: + FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS: + return 0x41; case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: // 0x1F for backwards compatibility on current minor version. return 0x1F; @@ -5754,6 +5765,18 @@ class HistogramTypeJni { return ROCKSDB_NAMESPACE::Histograms::FILE_READ_COMPACTION_MICROS; case 0x3C: return ROCKSDB_NAMESPACE::Histograms::FILE_READ_DB_OPEN_MICROS; + case 0x3D: + return ROCKSDB_NAMESPACE::Histograms::FILE_READ_GET_MICROS; + case 0x3E: + return ROCKSDB_NAMESPACE::Histograms::FILE_READ_MULTIGET_MICROS; + case 0x3F: + return ROCKSDB_NAMESPACE::Histograms::FILE_READ_DB_ITERATOR_MICROS; + case 0x40: + return ROCKSDB_NAMESPACE::Histograms:: + FILE_READ_VERIFY_DB_CHECKSUM_MICROS; + case 0x41: + return ROCKSDB_NAMESPACE::Histograms:: + FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS; case 0x1F: // 0x1F for backwards compatibility on current minor version. return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/rocksdb/HistogramType.java index aad0d9550..41fe241ad 100644 --- a/java/src/main/java/org/rocksdb/HistogramType.java +++ b/java/src/main/java/org/rocksdb/HistogramType.java @@ -175,6 +175,16 @@ public enum HistogramType { FILE_READ_DB_OPEN_MICROS((byte) 0x3C), + FILE_READ_GET_MICROS((byte) 0x3D), + + FILE_READ_MULTIGET_MICROS((byte) 0x3E), + + FILE_READ_DB_ITERATOR_MICROS((byte) 0x3F), + + FILE_READ_VERIFY_DB_CHECKSUM_MICROS((byte) 0x40), + + FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS((byte) 0x41), + // 0x1F for backwards compatibility on current minor version. HISTOGRAM_ENUM_MAX((byte) 0x1F); diff --git a/microbench/db_basic_bench.cc b/microbench/db_basic_bench.cc index 20e7182f7..3851ddd5a 100644 --- a/microbench/db_basic_bench.cc +++ b/microbench/db_basic_bench.cc @@ -1555,8 +1555,7 @@ static void RandomAccessFileReaderRead(benchmark::State& state) { uint64_t idx = 0; for (auto _ : state) { s = readers[idx++ % kFileNum]->Read(io_options, 0, kDefaultPageSize / 3, - &result, scratch.get(), nullptr, - Env::IO_TOTAL); + &result, scratch.get(), nullptr); if (!s.ok()) { state.SkipWithError(s.ToString().c_str()); } diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index e4a18f943..c72754f7e 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -281,6 +281,13 @@ const std::vector> HistogramsNameMap = { {FILE_READ_FLUSH_MICROS, "rocksdb.file.read.flush.micros"}, {FILE_READ_COMPACTION_MICROS, "rocksdb.file.read.compaction.micros"}, {FILE_READ_DB_OPEN_MICROS, "rocksdb.file.read.db.open.micros"}, + {FILE_READ_GET_MICROS, "rocksdb.file.read.get.micros"}, + {FILE_READ_MULTIGET_MICROS, "rocksdb.file.read.multiget.micros"}, + {FILE_READ_DB_ITERATOR_MICROS, "rocksdb.file.read.db.iterator.micros"}, + {FILE_READ_VERIFY_DB_CHECKSUM_MICROS, + "rocksdb.file.read.verify.db.checksum.micros"}, + {FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS, + "rocksdb.file.read.verify.file.checksums.micros"}, {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, {BYTES_PER_READ, "rocksdb.bytes.per.read"}, {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, diff --git a/monitoring/thread_status_util_debug.cc b/monitoring/thread_status_util_debug.cc index 6e4fe8a9f..6d3f9be08 100644 --- a/monitoring/thread_status_util_debug.cc +++ b/monitoring/thread_status_util_debug.cc @@ -36,6 +36,16 @@ Env::IOActivity ThreadStatusUtil::TEST_GetExpectedIOActivity( return Env::IOActivity::kCompaction; case ThreadStatus::OperationType::OP_DBOPEN: return Env::IOActivity::kDBOpen; + case ThreadStatus::OperationType::OP_GET: + return Env::IOActivity::kGet; + case ThreadStatus::OperationType::OP_MULTIGET: + return Env::IOActivity::kMultiGet; + case ThreadStatus::OperationType::OP_DBITERATOR: + return Env::IOActivity::kDBIterator; + case ThreadStatus::OperationType::OP_VERIFY_DB_CHECKSUM: + return Env::IOActivity::kVerifyDBChecksum; + case ThreadStatus::OperationType::OP_VERIFY_FILE_CHECKSUMS: + return Env::IOActivity::kVerifyFileChecksums; default: return Env::IOActivity::kUnknown; } diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 57744a587..beccc0112 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -281,7 +281,7 @@ void BlockBasedTableIterator::InitDataBlock() { // Enabled from the very first IO when ReadOptions.readahead_size is set. block_prefetcher_.PrefetchIfNeeded( rep, data_block_handle, read_options_.readahead_size, is_for_compaction, - /*no_sequential_checking=*/false, read_options_.rate_limiter_priority); + /*no_sequential_checking=*/false, read_options_); Status s; table_->NewDataBlockIterator( read_options_, data_block_handle, &block_iter_, BlockType::kData, @@ -326,7 +326,7 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { block_prefetcher_.PrefetchIfNeeded( rep, data_block_handle, read_options_.readahead_size, is_for_compaction, /*no_sequential_checking=*/read_options_.async_io, - read_options_.rate_limiter_priority); + read_options_); Status s; table_->NewDataBlockIterator( diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 129f87888..bebeece75 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -860,10 +860,11 @@ Status BlockBasedTable::PrefetchTail( &prefetch_off_len_pair); #endif // NDEBUG + IOOptions opts; + Status s = file->PrepareIOOptions(ro, opts); // Try file system prefetch - if (!file->use_direct_io() && !force_direct_prefetch) { - if (!file->Prefetch(prefetch_off, prefetch_len, ro.rate_limiter_priority) - .IsNotSupported()) { + if (s.ok() && !file->use_direct_io() && !force_direct_prefetch) { + if (!file->Prefetch(opts, prefetch_off, prefetch_len).IsNotSupported()) { prefetch_buffer->reset(new FilePrefetchBuffer( 0 /* readahead_size */, 0 /* max_readahead_size */, false /* enable */, true /* track_min_offset */)); @@ -879,12 +880,8 @@ Status BlockBasedTable::PrefetchTail( nullptr /* fs */, nullptr /* clock */, stats, FilePrefetchBufferUsage::kTableOpenPrefetchTail)); - IOOptions opts; - Status s = file->PrepareIOOptions(ro, opts); if (s.ok()) { - s = (*prefetch_buffer) - ->Prefetch(opts, file, prefetch_off, prefetch_len, - ro.rate_limiter_priority); + s = (*prefetch_buffer)->Prefetch(opts, file, prefetch_off, prefetch_len); } return s; } diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 43af02fad..ab3ee01bb 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -144,7 +144,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks) if (file->use_direct_io()) { #endif // WITH_COROUTINES s = file->MultiRead(opts, &read_reqs[0], read_reqs.size(), - &direct_io_buf, options.rate_limiter_priority); + &direct_io_buf); #if defined(WITH_COROUTINES) } else { co_await batch->context()->reader().MultiReadAsync( diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 1734e76d7..08d4cc9e8 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -12,11 +12,12 @@ #include "table/block_based/block_based_table_reader.h" namespace ROCKSDB_NAMESPACE { -void BlockPrefetcher::PrefetchIfNeeded( - const BlockBasedTable::Rep* rep, const BlockHandle& handle, - const size_t readahead_size, bool is_for_compaction, - const bool no_sequential_checking, - const Env::IOPriority rate_limiter_priority) { +void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, + const BlockHandle& handle, + const size_t readahead_size, + bool is_for_compaction, + const bool no_sequential_checking, + const ReadOptions& read_options) { const size_t len = BlockBasedTable::BlockSizeWithTrailer(handle); const size_t offset = handle.offset(); @@ -27,8 +28,12 @@ void BlockPrefetcher::PrefetchIfNeeded( if (offset + len <= readahead_limit_) { return; } - Status s = rep->file->Prefetch(offset, len + compaction_readahead_size_, - rate_limiter_priority); + IOOptions opts; + Status s = rep->file->PrepareIOOptions(read_options, opts); + if (!s.ok()) { + return; + } + s = rep->file->Prefetch(opts, offset, len + compaction_readahead_size_); if (s.ok()) { readahead_limit_ = offset + len + compaction_readahead_size_; return; @@ -117,10 +122,14 @@ void BlockPrefetcher::PrefetchIfNeeded( // If prefetch is not supported, fall back to use internal prefetch buffer. // Discarding other return status of Prefetch calls intentionally, as // we can fallback to reading from disk if Prefetch fails. - Status s = rep->file->Prefetch( - handle.offset(), - BlockBasedTable::BlockSizeWithTrailer(handle) + readahead_size_, - rate_limiter_priority); + IOOptions opts; + Status s = rep->file->PrepareIOOptions(read_options, opts); + if (!s.ok()) { + return; + } + s = rep->file->Prefetch( + opts, handle.offset(), + BlockBasedTable::BlockSizeWithTrailer(handle) + readahead_size_); if (s.IsNotSupported()) { rep->CreateFilePrefetchBufferIfNotExists( initial_auto_readahead_size_, max_auto_readahead_size, diff --git a/table/block_based/block_prefetcher.h b/table/block_based/block_prefetcher.h index 774bf85ac..e2032ed9d 100644 --- a/table/block_based/block_prefetcher.h +++ b/table/block_based/block_prefetcher.h @@ -22,7 +22,7 @@ class BlockPrefetcher { const BlockHandle& handle, size_t readahead_size, bool is_for_compaction, const bool no_sequential_checking, - Env::IOPriority rate_limiter_priority); + const ReadOptions& read_options); FilePrefetchBuffer* prefetch_buffer() { return prefetch_buffer_.get(); } void UpdateReadPattern(const uint64_t& offset, const size_t& len) { diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index faddaeb1e..9b53fe72f 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -503,8 +503,7 @@ Status PartitionedFilterBlockReader::CacheDependencies( s = rep->file->PrepareIOOptions(ro, opts); if (s.ok()) { s = prefetch_buffer->Prefetch(opts, rep->file.get(), prefetch_off, - static_cast(prefetch_len), - ro.rate_limiter_priority); + static_cast(prefetch_len)); } if (!s.ok()) { return s; diff --git a/table/block_based/partitioned_index_iterator.cc b/table/block_based/partitioned_index_iterator.cc index b9bc2155a..3b0527033 100644 --- a/table/block_based/partitioned_index_iterator.cc +++ b/table/block_based/partitioned_index_iterator.cc @@ -91,8 +91,7 @@ void PartitionedIndexIterator::InitPartitionedIndexBlock() { // Enabled from the very first IO when ReadOptions.readahead_size is set. block_prefetcher_.PrefetchIfNeeded( rep, partitioned_index_handle, read_options_.readahead_size, - is_for_compaction, /*no_sequential_checking=*/false, - read_options_.rate_limiter_priority); + is_for_compaction, /*no_sequential_checking=*/false, read_options_); Status s; table_->NewDataBlockIterator( read_options_, partitioned_index_handle, &block_iter_, diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index d1c5591eb..0c862b9b2 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -175,8 +175,7 @@ Status PartitionIndexReader::CacheDependencies( Status s = rep->file->PrepareIOOptions(ro, opts); if (s.ok()) { s = prefetch_buffer->Prefetch(opts, rep->file.get(), prefetch_off, - static_cast(prefetch_len), - ro.rate_limiter_priority); + static_cast(prefetch_len)); } if (!s.ok()) { return s; diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index 412ac4bde..34d3e23e9 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -80,11 +80,11 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() { if (read_options_.async_io && !for_compaction_) { read_from_prefetch_buffer = prefetch_buffer_->TryReadFromCacheAsync( opts, file_, handle_.offset(), block_size_with_trailer_, &slice_, - &io_s, read_options_.rate_limiter_priority); + &io_s); } else { read_from_prefetch_buffer = prefetch_buffer_->TryReadFromCache( opts, file_, handle_.offset(), block_size_with_trailer_, &slice_, - &io_s, read_options_.rate_limiter_priority, for_compaction_); + &io_s, for_compaction_); } if (read_from_prefetch_buffer) { ProcessTrailerIfPresent(); @@ -259,18 +259,18 @@ IOStatus BlockFetcher::ReadBlockContents() { if (file_->use_direct_io()) { PERF_TIMER_GUARD(block_read_time); PERF_CPU_TIMER_GUARD(block_read_cpu_time, nullptr); - io_status_ = file_->Read( - opts, handle_.offset(), block_size_with_trailer_, &slice_, nullptr, - &direct_io_buf_, read_options_.rate_limiter_priority); + io_status_ = + file_->Read(opts, handle_.offset(), block_size_with_trailer_, + &slice_, nullptr, &direct_io_buf_); PERF_COUNTER_ADD(block_read_count, 1); used_buf_ = const_cast(slice_.data()); } else { PrepareBufferForBlockFromFile(); PERF_TIMER_GUARD(block_read_time); PERF_CPU_TIMER_GUARD(block_read_cpu_time, nullptr); - io_status_ = file_->Read(opts, handle_.offset(), - block_size_with_trailer_, &slice_, used_buf_, - nullptr, read_options_.rate_limiter_priority); + io_status_ = + file_->Read(opts, handle_.offset(), block_size_with_trailer_, + &slice_, used_buf_, nullptr); PERF_COUNTER_ADD(block_read_count, 1); #ifndef NDEBUG if (slice_.data() == &stack_buf_[0]) { diff --git a/table/cuckoo/cuckoo_table_builder_test.cc b/table/cuckoo/cuckoo_table_builder_test.cc index ceddbf37a..1a0d58c76 100644 --- a/table/cuckoo/cuckoo_table_builder_test.cc +++ b/table/cuckoo/cuckoo_table_builder_test.cc @@ -122,8 +122,7 @@ class CuckooBuilderTest : public testing::Test { for (uint32_t i = 0; i + 1 < table_size + cuckoo_block_size; ++i) { Slice read_slice; ASSERT_OK(file_reader->Read(IOOptions(), i * bucket_size, bucket_size, - &read_slice, nullptr, nullptr, - Env::IO_TOTAL /* rate_limiter_priority */)); + &read_slice, nullptr, nullptr)); size_t key_idx = std::find(expected_locations.begin(), expected_locations.end(), i) - expected_locations.begin(); diff --git a/table/cuckoo/cuckoo_table_reader.cc b/table/cuckoo/cuckoo_table_reader.cc index d64761962..a4479ab60 100644 --- a/table/cuckoo/cuckoo_table_reader.cc +++ b/table/cuckoo/cuckoo_table_reader.cc @@ -144,9 +144,8 @@ CuckooTableReader::CuckooTableReader( *reinterpret_cast(cuckoo_block_size->second.data()); cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1; // TODO: rate limit reads of whole cuckoo tables. - status_ = - file_->Read(IOOptions(), 0, static_cast(file_size), &file_data_, - nullptr, nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + status_ = file_->Read(IOOptions(), 0, static_cast(file_size), + &file_data_, nullptr, nullptr); } Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, diff --git a/table/format.cc b/table/format.cc index 8825384f0..30847554b 100644 --- a/table/format.cc +++ b/table/format.cc @@ -509,18 +509,16 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, // need to pass a timeout at that point // TODO: rate limit footer reads. if (prefetch_buffer == nullptr || - !prefetch_buffer->TryReadFromCache( - opts, file, read_offset, Footer::kMaxEncodedLength, &footer_input, - nullptr, opts.rate_limiter_priority)) { + !prefetch_buffer->TryReadFromCache(opts, file, read_offset, + Footer::kMaxEncodedLength, + &footer_input, nullptr)) { if (file->use_direct_io()) { s = file->Read(opts, read_offset, Footer::kMaxEncodedLength, - &footer_input, nullptr, &internal_buf, - opts.rate_limiter_priority); + &footer_input, nullptr, &internal_buf); } else { footer_buf.reserve(Footer::kMaxEncodedLength); s = file->Read(opts, read_offset, Footer::kMaxEncodedLength, - &footer_input, &footer_buf[0], nullptr, - opts.rate_limiter_priority); + &footer_input, &footer_buf[0], nullptr); } if (!s.ok()) return s; } diff --git a/table/mock_table.cc b/table/mock_table.cc index d6229ef60..1823758e4 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -305,8 +305,7 @@ Status MockTableFactory::GetIDFromFile(RandomAccessFileReader* file, uint32_t* id) const { char buf[4]; Slice result; - Status s = file->Read(IOOptions(), 0, 4, &result, buf, nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + Status s = file->Read(IOOptions(), 0, 4, &result, buf, nullptr); assert(result.size() == 4); *id = DecodeFixed32(buf); return s; diff --git a/table/plain/plain_table_key_coding.cc b/table/plain/plain_table_key_coding.cc index a40968a60..0ac423191 100644 --- a/table/plain/plain_table_key_coding.cc +++ b/table/plain/plain_table_key_coding.cc @@ -215,8 +215,7 @@ bool PlainTableFileReader::ReadNonMmap(uint32_t file_offset, uint32_t len, // TODO: rate limit plain table reads. Status s = file_info_->file->Read(IOOptions(), file_offset, size_to_read, - &read_result, new_buffer->buf.get(), nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + &read_result, new_buffer->buf.get(), nullptr); if (!s.ok()) { status_ = s; return false; diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index 2f0379f72..a74da1f89 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -284,9 +284,9 @@ void PlainTableReader::FillBloom(const std::vector& prefix_hashes) { Status PlainTableReader::MmapDataIfNeeded() { if (file_info_.is_mmap_mode) { // Get mmapped memory. - return file_info_.file->Read( - IOOptions(), 0, static_cast(file_size_), &file_info_.file_data, - nullptr, nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + return file_info_.file->Read(IOOptions(), 0, + static_cast(file_size_), + &file_info_.file_data, nullptr, nullptr); } return Status::OK(); } diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 85a264d59..4a9fd3807 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -109,8 +109,7 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) { uint64_t prefetch_off = file_size - prefetch_size; IOOptions opts; s = prefetch_buffer.Prefetch(opts, file_.get(), prefetch_off, - static_cast(prefetch_size), - Env::IO_TOTAL /* rate_limiter_priority */); + static_cast(prefetch_size)); s = ReadFooterFromFile(opts, file_.get(), *fs, &prefetch_buffer, file_size, &footer); diff --git a/table/table_test.cc b/table/table_test.cc index 5b7e4682f..e6f95243e 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -1330,7 +1330,7 @@ class FileChecksumTestHelper { uint64_t offset = 0; Status s; s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + nullptr); if (!s.ok()) { return s; } @@ -1338,8 +1338,7 @@ class FileChecksumTestHelper { file_checksum_generator->Update(scratch.get(), result.size()); offset += static_cast(result.size()); s = file_reader_->Read(IOOptions(), offset, 2048, &result, scratch.get(), - nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + nullptr); if (!s.ok()) { return s; } @@ -5396,16 +5395,13 @@ TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) { IOOptions opts; buffer.TryReadFromCache(opts, nullptr /* reader */, 500 /* offset */, 10 /* n */, nullptr /* result */, - nullptr /* status */, - Env::IO_TOTAL /* rate_limiter_priority */); + nullptr /* status */); buffer.TryReadFromCache(opts, nullptr /* reader */, 480 /* offset */, 10 /* n */, nullptr /* result */, - nullptr /* status */, - Env::IO_TOTAL /* rate_limiter_priority */); + nullptr /* status */); buffer.TryReadFromCache(opts, nullptr /* reader */, 490 /* offset */, 10 /* n */, nullptr /* result */, - nullptr /* status */, - Env::IO_TOTAL /* rate_limiter_priority */); + nullptr /* status */); ASSERT_EQ(480, buffer.min_offset_read()); } diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index fd753b90b..add4351da 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -169,6 +169,7 @@ ), "level_compaction_dynamic_level_bytes": lambda: random.randint(0, 1), "verify_checksum_one_in": 1000000, + "verify_file_checksums_one_in": 1000000, "verify_db_one_in": 100000, "continuous_verification_interval": 0, "max_key_len": 3, @@ -658,6 +659,8 @@ def finalize_and_sanitize(src_params): dest_params["ingest_external_file_one_in"] = 0 dest_params["use_merge"] = 0 dest_params["use_full_merge_v1"] = 0 + if dest_params["file_checksum_impl"] == "none": + dest_params["verify_file_checksums_one_in"] = 0 return dest_params diff --git a/unreleased_history/behavior_changes/more_sst_read_micros.md b/unreleased_history/behavior_changes/more_sst_read_micros.md new file mode 100644 index 000000000..99be430c8 --- /dev/null +++ b/unreleased_history/behavior_changes/more_sst_read_micros.md @@ -0,0 +1 @@ +Statistics `rocksdb.sst.read.micros` now includes time spent on multi read and async read into the file diff --git a/unreleased_history/new_features/user_read_io_activity_stats.md b/unreleased_history/new_features/user_read_io_activity_stats.md new file mode 100644 index 000000000..7a36aa98f --- /dev/null +++ b/unreleased_history/new_features/user_read_io_activity_stats.md @@ -0,0 +1 @@ +New statistics `rocksdb.file.read.{db.open|get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` measure read time of block-based SST tables or blob files during db open, `Get()`, `MultiGet()`, using db iterator, `VerifyFileChecksums()` and `VerifyChecksum()`. They require stats level greater than `StatsLevel::kExceptDetailedTimers`. diff --git a/util/thread_operation.h b/util/thread_operation.h index b6c106279..c95b7c693 100644 --- a/util/thread_operation.h +++ b/util/thread_operation.h @@ -39,7 +39,14 @@ static OperationInfo global_operation_table[] = { {ThreadStatus::OP_UNKNOWN, ""}, {ThreadStatus::OP_COMPACTION, "Compaction"}, {ThreadStatus::OP_FLUSH, "Flush"}, - {ThreadStatus::OP_DBOPEN, "DBOpen"}}; + {ThreadStatus::OP_DBOPEN, "DBOpen"}, + {ThreadStatus::OP_GET, "Get"}, + {ThreadStatus::OP_MULTIGET, "MultiGet"}, + {ThreadStatus::OP_DBITERATOR, "DBIterator"}, + {ThreadStatus::OP_VERIFY_DB_CHECKSUM, "VerifyDBChecksum"}, + {ThreadStatus::OP_VERIFY_FILE_CHECKSUMS, "VerifyFileChecksums"}, + +}; struct OperationStageInfo { const ThreadStatus::OperationStage stage; diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 7a4b603f2..69efe950b 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -1384,28 +1384,46 @@ Status BlobDBImpl::AppendBlob(const std::shared_ptr& bfile, return s; } -std::vector BlobDBImpl::MultiGet(const ReadOptions& read_options, +std::vector BlobDBImpl::MultiGet(const ReadOptions& _read_options, const std::vector& keys, std::vector* values) { StopWatch multiget_sw(clock_, statistics_, BLOB_DB_MULTIGET_MICROS); RecordTick(statistics_, BLOB_DB_NUM_MULTIGET); // Get a snapshot to avoid blob file get deleted between we // fetch and index entry and reading from the file. - ReadOptions ro(read_options); - bool snapshot_created = SetSnapshotIfNeeded(&ro); - std::vector statuses; - statuses.reserve(keys.size()); + std::size_t num_keys = keys.size(); + statuses.reserve(num_keys); + + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + statuses.push_back(s); + } + return statuses; + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } + bool snapshot_created = SetSnapshotIfNeeded(&read_options); + values->clear(); values->reserve(keys.size()); PinnableSlice value; for (size_t i = 0; i < keys.size(); i++) { - statuses.push_back(Get(ro, DefaultColumnFamily(), keys[i], &value)); + statuses.push_back( + GetImpl(read_options, DefaultColumnFamily(), keys[i], &value)); values->push_back(value.ToString()); value.Reset(); } if (snapshot_created) { - db_->ReleaseSnapshot(ro.snapshot); + db_->ReleaseSnapshot(read_options.snapshot); } return statuses; } @@ -1544,12 +1562,12 @@ Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number, if (reader->use_direct_io()) { s = reader->Read(IOOptions(), record_offset, static_cast(record_size), &blob_record, nullptr, - &aligned_buf, Env::IO_TOTAL /* rate_limiter_priority */); + &aligned_buf); } else { buf.reserve(static_cast(record_size)); s = reader->Read(IOOptions(), record_offset, static_cast(record_size), &blob_record, &buf[0], - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + nullptr); } RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, blob_record.size()); } @@ -1609,16 +1627,36 @@ Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number, return Status::OK(); } -Status BlobDBImpl::Get(const ReadOptions& read_options, +Status BlobDBImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { - return Get(read_options, column_family, key, value, - static_cast(nullptr) /*expiration*/); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { + return Status::InvalidArgument( + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } + return GetImpl(read_options, column_family, key, value); } -Status BlobDBImpl::Get(const ReadOptions& read_options, +Status BlobDBImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, uint64_t* expiration) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { + return Status::InvalidArgument( + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } + StopWatch get_sw(clock_, statistics_, BLOB_DB_GET_MICROS); RecordTick(statistics_, BLOB_DB_NUM_GET); return GetImpl(read_options, column_family, key, value, expiration); @@ -1631,11 +1669,6 @@ Status BlobDBImpl::GetImpl(const ReadOptions& read_options, return Status::NotSupported( "Blob DB doesn't support non-default column family."); } - if (read_options.io_activity != Env::IOActivity::kUnknown) { - return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - } // Get a snapshot to avoid blob file get deleted between we // fetch and index entry and reading from the file. // TODO(yiwu): For Get() retry if file not found would be a simpler strategy. @@ -2040,11 +2073,16 @@ void BlobDBImpl::CopyBlobFiles( } } -Iterator* BlobDBImpl::NewIterator(const ReadOptions& read_options) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { +Iterator* BlobDBImpl::NewIterator(const ReadOptions& _read_options) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; } auto* cfd = static_cast_with_check(DefaultColumnFamily()) diff --git a/utilities/blob_db/blob_db_impl.h b/utilities/blob_db/blob_db_impl.h index de888c068..2d0afc639 100644 --- a/utilities/blob_db/blob_db_impl.h +++ b/utilities/blob_db/blob_db_impl.h @@ -103,12 +103,13 @@ class BlobDBImpl : public BlobDB { const Slice& value) override; using BlobDB::Get; - Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) override; - Status Get(const ReadOptions& read_options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value, - uint64_t* expiration) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value, uint64_t* expiration) override; using BlobDB::NewIterator; virtual Iterator* NewIterator(const ReadOptions& read_options) override; @@ -123,7 +124,7 @@ class BlobDBImpl : public BlobDB { using BlobDB::MultiGet; virtual std::vector MultiGet( - const ReadOptions& read_options, const std::vector& keys, + const ReadOptions& _read_options, const std::vector& keys, std::vector* values) override; using BlobDB::Write; diff --git a/utilities/blob_db/blob_dump_tool.cc b/utilities/blob_db/blob_dump_tool.cc index 9b0fa314d..0c2fef5e1 100644 --- a/utilities/blob_db/blob_dump_tool.cc +++ b/utilities/blob_db/blob_dump_tool.cc @@ -102,8 +102,8 @@ Status BlobDumpTool::Read(uint64_t offset, size_t size, Slice* result) { } buffer_.reset(new char[buffer_size_]); } - Status s = reader_->Read(IOOptions(), offset, size, result, buffer_.get(), - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + Status s = + reader_->Read(IOOptions(), offset, size, result, buffer_.get(), nullptr); if (!s.ok()) { return s; } @@ -277,4 +277,3 @@ std::string BlobDumpTool::GetString(std::pair p) { } // namespace blob_db } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/blob_db/blob_file.cc b/utilities/blob_db/blob_file.cc index cad89f2e4..5b31d5697 100644 --- a/utilities/blob_db/blob_file.cc +++ b/utilities/blob_db/blob_file.cc @@ -114,13 +114,11 @@ Status BlobFile::ReadFooter(BlobLogFooter* bf) { // TODO: rate limit reading footers from blob files. if (ra_file_reader_->use_direct_io()) { s = ra_file_reader_->Read(IOOptions(), footer_offset, BlobLogFooter::kSize, - &result, nullptr, &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */); + &result, nullptr, &aligned_buf); } else { buf.reserve(BlobLogFooter::kSize + 10); s = ra_file_reader_->Read(IOOptions(), footer_offset, BlobLogFooter::kSize, - &result, &buf[0], nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + &result, &buf[0], nullptr); } if (!s.ok()) return s; if (result.size() != BlobLogFooter::kSize) { @@ -238,13 +236,11 @@ Status BlobFile::ReadMetadata(const std::shared_ptr& fs, // TODO: rate limit reading headers from blob files. if (file_reader->use_direct_io()) { s = file_reader->Read(IOOptions(), 0, BlobLogHeader::kSize, &header_slice, - nullptr, &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */); + nullptr, &aligned_buf); } else { header_buf.reserve(BlobLogHeader::kSize); s = file_reader->Read(IOOptions(), 0, BlobLogHeader::kSize, &header_slice, - &header_buf[0], nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + &header_buf[0], nullptr); } if (!s.ok()) { ROCKS_LOG_ERROR( @@ -281,13 +277,12 @@ Status BlobFile::ReadMetadata(const std::shared_ptr& fs, if (file_reader->use_direct_io()) { s = file_reader->Read(IOOptions(), file_size - BlobLogFooter::kSize, BlobLogFooter::kSize, &footer_slice, nullptr, - &aligned_buf, - Env::IO_TOTAL /* rate_limiter_priority */); + &aligned_buf); } else { footer_buf.reserve(BlobLogFooter::kSize); s = file_reader->Read(IOOptions(), file_size - BlobLogFooter::kSize, BlobLogFooter::kSize, &footer_slice, &footer_buf[0], - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + nullptr); } if (!s.ok()) { ROCKS_LOG_ERROR( diff --git a/utilities/cache_dump_load_impl.h b/utilities/cache_dump_load_impl.h index 9811ff2d9..59cabbf3b 100644 --- a/utilities/cache_dump_load_impl.h +++ b/utilities/cache_dump_load_impl.h @@ -249,8 +249,7 @@ class FromFileCacheDumpReader : public CacheDumpReader { while (to_read > 0) { io_s = file_reader_->Read(IOOptions(), offset_, to_read, &result_, - buffer_, nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + buffer_, nullptr); if (!io_s.ok()) { return io_s; } diff --git a/utilities/persistent_cache/block_cache_tier_file.cc b/utilities/persistent_cache/block_cache_tier_file.cc index 9a667c1fd..ff01c1abc 100644 --- a/utilities/persistent_cache/block_cache_tier_file.cc +++ b/utilities/persistent_cache/block_cache_tier_file.cc @@ -236,7 +236,7 @@ bool RandomAccessCacheFile::Read(const LBA& lba, Slice* key, Slice* val, Slice result; Status s = freader_->Read(IOOptions(), lba.off_, lba.size_, &result, scratch, - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + nullptr); if (!s.ok()) { Error(log_, "Error reading from file %s. %s", Path().c_str(), s.ToString().c_str()); @@ -605,4 +605,3 @@ void ThreadedWriter::DispatchIO(const IO& io) { } } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/trace/file_trace_reader_writer.cc b/utilities/trace/file_trace_reader_writer.cc index 5886d3539..f2ca74144 100644 --- a/utilities/trace/file_trace_reader_writer.cc +++ b/utilities/trace/file_trace_reader_writer.cc @@ -42,8 +42,7 @@ Status FileTraceReader::Reset() { Status FileTraceReader::Read(std::string* data) { assert(file_reader_ != nullptr); Status s = file_reader_->Read(IOOptions(), offset_, kTraceMetadataSize, - &result_, buffer_, nullptr, - Env::IO_TOTAL /* rate_limiter_priority */); + &result_, buffer_, nullptr); if (!s.ok()) { return s; } @@ -68,7 +67,7 @@ Status FileTraceReader::Read(std::string* data) { bytes_to_read > kBufferSize ? kBufferSize : bytes_to_read; while (to_read > 0) { s = file_reader_->Read(IOOptions(), offset_, to_read, &result_, buffer_, - nullptr, Env::IO_TOTAL /* rate_limiter_priority */); + nullptr); if (!s.ok()) { return s; } diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index 5963f7429..d51dafa3d 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -232,27 +232,56 @@ Status TransactionBaseImpl::PopSavePoint() { return write_batch_.PopSavePoint(); } -Status TransactionBaseImpl::Get(const ReadOptions& read_options, +Status TransactionBaseImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) { - if (read_options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } + auto s = GetImpl(read_options, column_family, key, value); + return s; +} + +Status TransactionBaseImpl::GetImpl(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, + const Slice& key, std::string* value) { assert(value != nullptr); PinnableSlice pinnable_val(value); assert(!pinnable_val.IsPinned()); - auto s = Get(read_options, column_family, key, &pinnable_val); + auto s = GetImpl(read_options, column_family, key, &pinnable_val); if (s.ok() && pinnable_val.IsPinned()) { value->assign(pinnable_val.data(), pinnable_val.size()); } // else value is already assigned return s; } -Status TransactionBaseImpl::Get(const ReadOptions& read_options, +Status TransactionBaseImpl::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { + return Status::InvalidArgument( + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } + return GetImpl(read_options, column_family, key, pinnable_val); +} + +Status TransactionBaseImpl::GetImpl(const ReadOptions& read_options, + ColumnFamilyHandle* column_family, + const Slice& key, + PinnableSlice* pinnable_val) { return write_batch_.GetFromBatchAndDB(db_, read_options, column_family, key, pinnable_val); } @@ -279,7 +308,7 @@ Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options, assert(value != nullptr); PinnableSlice pinnable_val(value); assert(!pinnable_val.IsPinned()); - s = Get(read_options, column_family, key, &pinnable_val); + s = GetImpl(read_options, column_family, key, &pinnable_val); if (s.ok() && pinnable_val.IsPinned()) { value->assign(pinnable_val.data(), pinnable_val.size()); } // else value is already assigned @@ -307,39 +336,63 @@ Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options, TryLock(column_family, key, true /* read_only */, exclusive, do_validate); if (s.ok() && pinnable_val != nullptr) { - s = Get(read_options, column_family, key, pinnable_val); + s = GetImpl(read_options, column_family, key, pinnable_val); } return s; } std::vector TransactionBaseImpl::MultiGet( - const ReadOptions& read_options, + const ReadOptions& _read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) { size_t num_keys = keys.size(); - if (read_options.io_activity != Env::IOActivity::kUnknown) { + std::vector stat_list(num_keys); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { Status s = Status::InvalidArgument( - "Cannot call MultiGet with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); - return std::vector(num_keys, s); + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + stat_list[i] = s; + } + return stat_list; + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; } values->resize(num_keys); - - std::vector stat_list(num_keys); for (size_t i = 0; i < num_keys; ++i) { - stat_list[i] = Get(read_options, column_family[i], keys[i], &(*values)[i]); + stat_list[i] = + GetImpl(read_options, column_family[i], keys[i], &(*values)[i]); } return stat_list; } -void TransactionBaseImpl::MultiGet(const ReadOptions& read_options, +void TransactionBaseImpl::MultiGet(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { - assert(read_options.io_activity == Env::IOActivity::kUnknown); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } write_batch_.MultiGetFromBatchAndDB(db_, read_options, column_family, num_keys, keys, values, statuses, sorted_input); @@ -349,7 +402,6 @@ std::vector TransactionBaseImpl::MultiGetForUpdate( const ReadOptions& read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) { - // Regardless of whether the MultiGet succeeded, track these keys. size_t num_keys = keys.size(); if (read_options.io_activity != Env::IOActivity::kUnknown) { Status s = Status::InvalidArgument( @@ -357,6 +409,7 @@ std::vector TransactionBaseImpl::MultiGetForUpdate( "`Env::IOActivity::kUnknown`"); return std::vector(num_keys, s); } + // Regardless of whether the MultiGet succeeded, track these keys. values->resize(num_keys); // Lock all keys @@ -372,7 +425,8 @@ std::vector TransactionBaseImpl::MultiGetForUpdate( // TODO(agiardullo): optimize multiget? std::vector stat_list(num_keys); for (size_t i = 0; i < num_keys; ++i) { - stat_list[i] = Get(read_options, column_family[i], keys[i], &(*values)[i]); + stat_list[i] = + GetImpl(read_options, column_family[i], keys[i], &(*values)[i]); } return stat_list; diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index bde09b699..be363b473 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -53,11 +53,13 @@ class TransactionBaseImpl : public Transaction { Status PopSavePoint() override; using Transaction::Get; - Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, std::string* value) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + std::string* value) override; - Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value) override; + Status Get(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) override; Status Get(const ReadOptions& options, const Slice& key, std::string* value) override { @@ -84,7 +86,7 @@ class TransactionBaseImpl : public Transaction { using Transaction::MultiGet; std::vector MultiGet( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) override; @@ -98,9 +100,10 @@ class TransactionBaseImpl : public Transaction { keys, values); } - void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family, - const size_t num_keys, const Slice* keys, PinnableSlice* values, - Status* statuses, const bool sorted_input = false) override; + void MultiGet(const ReadOptions& _read_options, + ColumnFamilyHandle* column_family, const size_t num_keys, + const Slice* keys, PinnableSlice* values, Status* statuses, + const bool sorted_input = false) override; using Transaction::MultiGetForUpdate; std::vector MultiGetForUpdate( @@ -260,6 +263,13 @@ class TransactionBaseImpl : public Transaction { LockTracker& GetTrackedLocks() { return *tracked_locks_; } protected: + Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, + const Slice& key, std::string* value) override; + + virtual Status GetImpl(const ReadOptions& options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) override; + // Add a key to the list of tracked keys. // // seqno is the earliest seqno this key was involved with this transaction. @@ -379,4 +389,3 @@ class TransactionBaseImpl : public Transaction { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/transactions/write_prepared_txn.cc b/utilities/transactions/write_prepared_txn.cc index c27a679e4..aa5091b95 100644 --- a/utilities/transactions/write_prepared_txn.cc +++ b/utilities/transactions/write_prepared_txn.cc @@ -39,20 +39,37 @@ void WritePreparedTxn::Initialize(const TransactionOptions& txn_options) { prepare_batch_cnt_ = 0; } -void WritePreparedTxn::MultiGet(const ReadOptions& options, +void WritePreparedTxn::MultiGet(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { - assert(options.io_activity == Env::IOActivity::kUnknown); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } + SequenceNumber min_uncommitted, snap_seq; - const SnapshotBackup backed_by_snapshot = - wpt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq); + const SnapshotBackup backed_by_snapshot = wpt_db_->AssignMinMaxSeqs( + read_options.snapshot, &min_uncommitted, &snap_seq); WritePreparedTxnReadCallback callback(wpt_db_, snap_seq, min_uncommitted, backed_by_snapshot); - write_batch_.MultiGetFromBatchAndDB(db_, options, column_family, num_keys, - keys, values, statuses, sorted_input, - &callback); + write_batch_.MultiGetFromBatchAndDB(db_, read_options, column_family, + num_keys, keys, values, statuses, + sorted_input, &callback); if (UNLIKELY(!callback.valid() || !wpt_db_->ValidateSnapshot(snap_seq, backed_by_snapshot))) { wpt_db_->WPRecordTick(TXN_GET_TRY_AGAIN); @@ -62,14 +79,27 @@ void WritePreparedTxn::MultiGet(const ReadOptions& options, } } -Status WritePreparedTxn::Get(const ReadOptions& options, +Status WritePreparedTxn::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val) { - if (options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; } + + return GetImpl(read_options, column_family, key, pinnable_val); +} + +Status WritePreparedTxn::GetImpl(const ReadOptions& options, + ColumnFamilyHandle* column_family, + const Slice& key, + PinnableSlice* pinnable_val) { SequenceNumber min_uncommitted, snap_seq; const SnapshotBackup backed_by_snapshot = wpt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq); diff --git a/utilities/transactions/write_prepared_txn.h b/utilities/transactions/write_prepared_txn.h index 3faf0c9b8..9a0fb81d1 100644 --- a/utilities/transactions/write_prepared_txn.h +++ b/utilities/transactions/write_prepared_txn.h @@ -51,12 +51,12 @@ class WritePreparedTxn : public PessimisticTransaction { // seq in the WAL that is also published, LastPublishedSequence, as opposed to // the last seq in the memtable. using Transaction::Get; - virtual Status Get(const ReadOptions& options, + virtual Status Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; using Transaction::MultiGet; - virtual void MultiGet(const ReadOptions& options, + virtual void MultiGet(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, @@ -86,6 +86,10 @@ class WritePreparedTxn : public PessimisticTransaction { friend class WriteUnpreparedTxnDB; friend class WriteUnpreparedTxn; + using Transaction::GetImpl; + Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, + const Slice& key, PinnableSlice* value) override; + Status PrepareInternal() override; Status CommitWithoutPrepareInternal() override; diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc index 6118c3549..5f17247e4 100644 --- a/utilities/transactions/write_prepared_txn_db.cc +++ b/utilities/transactions/write_prepared_txn_db.cc @@ -247,14 +247,26 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig, return s; } -Status WritePreparedTxnDB::Get(const ReadOptions& options, +Status WritePreparedTxnDB::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { - if (options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; + } + + return GetImpl(read_options, column_family, key, value); +} + +Status WritePreparedTxnDB::GetImpl(const ReadOptions& options, + ColumnFamilyHandle* column_family, + const Slice& key, PinnableSlice* value) { SequenceNumber min_uncommitted, snap_seq; const SnapshotBackup backed_by_snapshot = AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq); @@ -314,16 +326,35 @@ void WritePreparedTxnDB::UpdateCFComparatorMap(ColumnFamilyHandle* h) { } std::vector WritePreparedTxnDB::MultiGet( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) { assert(values); size_t num_keys = keys.size(); + std::vector stat_list(num_keys); + + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + stat_list[i] = s; + } + return stat_list; + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } + values->resize(num_keys); - std::vector stat_list(num_keys); for (size_t i = 0; i < num_keys; ++i) { - stat_list[i] = this->Get(options, column_family[i], keys[i], &(*values)[i]); + stat_list[i] = + this->GetImpl(read_options, column_family[i], keys[i], &(*values)[i]); } return stat_list; } @@ -346,22 +377,27 @@ static void CleanupWritePreparedTxnDBIterator(void* arg1, void* /*arg2*/) { } } // anonymous namespace -Iterator* WritePreparedTxnDB::NewIterator(const ReadOptions& options, +Iterator* WritePreparedTxnDB::NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) { - if (options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; } constexpr bool expose_blob_index = false; constexpr bool allow_refresh = false; std::shared_ptr own_snapshot = nullptr; SequenceNumber snapshot_seq = kMaxSequenceNumber; SequenceNumber min_uncommitted = 0; - if (options.snapshot != nullptr) { - snapshot_seq = options.snapshot->GetSequenceNumber(); + if (read_options.snapshot != nullptr) { + snapshot_seq = read_options.snapshot->GetSequenceNumber(); min_uncommitted = - static_cast_with_check(options.snapshot) + static_cast_with_check(read_options.snapshot) ->min_uncommitted_; } else { auto* snapshot = GetSnapshot(); @@ -377,26 +413,37 @@ Iterator* WritePreparedTxnDB::NewIterator(const ReadOptions& options, static_cast_with_check(column_family)->cfd(); auto* state = new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted); - auto* db_iter = - db_impl_->NewIteratorImpl(options, cfd, snapshot_seq, &state->callback, - expose_blob_index, allow_refresh); + auto* db_iter = db_impl_->NewIteratorImpl(read_options, cfd, snapshot_seq, + &state->callback, expose_blob_index, + allow_refresh); db_iter->RegisterCleanup(CleanupWritePreparedTxnDBIterator, state, nullptr); return db_iter; } Status WritePreparedTxnDB::NewIterators( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_families, std::vector* iterators) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { + return Status::InvalidArgument( + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`"); + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; + } constexpr bool expose_blob_index = false; constexpr bool allow_refresh = false; std::shared_ptr own_snapshot = nullptr; SequenceNumber snapshot_seq = kMaxSequenceNumber; SequenceNumber min_uncommitted = 0; - if (options.snapshot != nullptr) { - snapshot_seq = options.snapshot->GetSequenceNumber(); + if (read_options.snapshot != nullptr) { + snapshot_seq = read_options.snapshot->GetSequenceNumber(); min_uncommitted = - static_cast_with_check(options.snapshot) + static_cast_with_check(read_options.snapshot) ->min_uncommitted_; } else { auto* snapshot = GetSnapshot(); @@ -414,9 +461,9 @@ Status WritePreparedTxnDB::NewIterators( static_cast_with_check(column_family)->cfd(); auto* state = new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted); - auto* db_iter = - db_impl_->NewIteratorImpl(options, cfd, snapshot_seq, &state->callback, - expose_blob_index, allow_refresh); + auto* db_iter = db_impl_->NewIteratorImpl(read_options, cfd, snapshot_seq, + &state->callback, + expose_blob_index, allow_refresh); db_iter->RegisterCleanup(CleanupWritePreparedTxnDBIterator, state, nullptr); iterators->push_back(db_iter); } diff --git a/utilities/transactions/write_prepared_txn_db.h b/utilities/transactions/write_prepared_txn_db.h index f5b641160..6c0e292b0 100644 --- a/utilities/transactions/write_prepared_txn_db.h +++ b/utilities/transactions/write_prepared_txn_db.h @@ -83,24 +83,24 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { size_t batch_cnt, WritePreparedTxn* txn); using DB::Get; - virtual Status Get(const ReadOptions& options, + virtual Status Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; using DB::MultiGet; virtual std::vector MultiGet( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_family, const std::vector& keys, std::vector* values) override; using DB::NewIterator; - virtual Iterator* NewIterator(const ReadOptions& options, + virtual Iterator* NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) override; using DB::NewIterators; virtual Status NewIterators( - const ReadOptions& options, + const ReadOptions& _read_options, const std::vector& column_families, std::vector* iterators) override; @@ -520,6 +520,21 @@ class WritePreparedTxnDB : public PessimisticTransactionDB { RecordTick(db_impl_->immutable_db_options_.statistics.get(), ticker_type); } + Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, + const Slice& key, std::string* value) { + assert(value != nullptr); + PinnableSlice pinnable_val(value); + assert(!pinnable_val.IsPinned()); + auto s = GetImpl(options, column_family, key, &pinnable_val); + if (s.ok() && pinnable_val.IsPinned()) { + value->assign(pinnable_val.data(), pinnable_val.size()); + } // else value is already assigned + return s; + } + + Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, + const Slice& key, PinnableSlice* value); + // A heap with the amortized O(1) complexity for erase. It uses one extra heap // to keep track of erased entries that are not yet on top of the main heap. class PreparedHeap { diff --git a/utilities/transactions/write_unprepared_txn.cc b/utilities/transactions/write_unprepared_txn.cc index 845b117cf..4c9c2a3dd 100644 --- a/utilities/transactions/write_unprepared_txn.cc +++ b/utilities/transactions/write_unprepared_txn.cc @@ -943,20 +943,36 @@ Status WriteUnpreparedTxn::PopSavePoint() { return Status::NotFound(); } -void WriteUnpreparedTxn::MultiGet(const ReadOptions& options, +void WriteUnpreparedTxn::MultiGet(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, const bool sorted_input) { - assert(options.io_activity == Env::IOActivity::kUnknown); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGet) { + Status s = Status::InvalidArgument( + "Can only call MultiGet with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`"); + + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGet; + } SequenceNumber min_uncommitted, snap_seq; - const SnapshotBackup backed_by_snapshot = - wupt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq); + const SnapshotBackup backed_by_snapshot = wupt_db_->AssignMinMaxSeqs( + read_options.snapshot, &min_uncommitted, &snap_seq); WriteUnpreparedTxnReadCallback callback(wupt_db_, snap_seq, min_uncommitted, unprep_seqs_, backed_by_snapshot); - write_batch_.MultiGetFromBatchAndDB(db_, options, column_family, num_keys, - keys, values, statuses, sorted_input, - &callback); + write_batch_.MultiGetFromBatchAndDB(db_, read_options, column_family, + num_keys, keys, values, statuses, + sorted_input, &callback); if (UNLIKELY(!callback.valid() || !wupt_db_->ValidateSnapshot(snap_seq, backed_by_snapshot))) { wupt_db_->WPRecordTick(TXN_GET_TRY_AGAIN); @@ -966,14 +982,26 @@ void WriteUnpreparedTxn::MultiGet(const ReadOptions& options, } } -Status WriteUnpreparedTxn::Get(const ReadOptions& options, +Status WriteUnpreparedTxn::Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) { - if (options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGet) { return Status::InvalidArgument( - "Cannot call Get with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "Can only call Get with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGet; } + + return GetImpl(read_options, column_family, key, value); +} + +Status WriteUnpreparedTxn::GetImpl(const ReadOptions& options, + ColumnFamilyHandle* column_family, + const Slice& key, PinnableSlice* value) { SequenceNumber min_uncommitted, snap_seq; const SnapshotBackup backed_by_snapshot = wupt_db_->AssignMinMaxSeqs(options.snapshot, &min_uncommitted, &snap_seq); diff --git a/utilities/transactions/write_unprepared_txn.h b/utilities/transactions/write_unprepared_txn.h index 63c65f00a..fe47c8cd8 100644 --- a/utilities/transactions/write_unprepared_txn.h +++ b/utilities/transactions/write_unprepared_txn.h @@ -184,12 +184,12 @@ class WriteUnpreparedTxn : public WritePreparedTxn { // Get and GetIterator needs to be overridden so that a ReadCallback to // handle read-your-own-write is used. using Transaction::Get; - virtual Status Get(const ReadOptions& options, + virtual Status Get(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value) override; using Transaction::MultiGet; - virtual void MultiGet(const ReadOptions& options, + virtual void MultiGet(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, @@ -211,6 +211,10 @@ class WriteUnpreparedTxn : public WritePreparedTxn { friend class WriteUnpreparedTxnDB; const std::map& GetUnpreparedSequenceNumbers(); + using Transaction::GetImpl; + Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, + const Slice& key, PinnableSlice* value) override; + Status WriteRollbackKeys(const LockTracker& tracked_keys, WriteBatchWithIndex* rollback_batch, ReadCallback* callback, const ReadOptions& roptions); @@ -336,4 +340,3 @@ class WriteUnpreparedTxn : public WritePreparedTxn { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/transactions/write_unprepared_txn_db.cc b/utilities/transactions/write_unprepared_txn_db.cc index fd0ba0aed..973aa49fd 100644 --- a/utilities/transactions/write_unprepared_txn_db.cc +++ b/utilities/transactions/write_unprepared_txn_db.cc @@ -385,13 +385,19 @@ static void CleanupWriteUnpreparedTxnDBIterator(void* arg1, void* /*arg2*/) { } } // anonymous namespace -Iterator* WriteUnpreparedTxnDB::NewIterator(const ReadOptions& options, +Iterator* WriteUnpreparedTxnDB::NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, WriteUnpreparedTxn* txn) { - if (options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); + } + + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; } // TODO(lth): Refactor so that this logic is shared with WritePrepared. constexpr bool expose_blob_index = false; @@ -431,11 +437,11 @@ Iterator* WriteUnpreparedTxnDB::NewIterator(const ReadOptions& options, // max_visible_seq, and then return the last visible value, so that this // restriction can be lifted. const Snapshot* snapshot = nullptr; - if (options.snapshot == nullptr) { + if (read_options.snapshot == nullptr) { snapshot = GetSnapshot(); own_snapshot = std::make_shared(db_impl_, snapshot); } else { - snapshot = options.snapshot; + snapshot = read_options.snapshot; } snapshot_seq = snapshot->GetSequenceNumber(); @@ -467,8 +473,8 @@ Iterator* WriteUnpreparedTxnDB::NewIterator(const ReadOptions& options, auto* state = new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted, txn); auto* db_iter = db_impl_->NewIteratorImpl( - options, cfd, state->MaxVisibleSeq(), &state->callback, expose_blob_index, - allow_refresh); + read_options, cfd, state->MaxVisibleSeq(), &state->callback, + expose_blob_index, allow_refresh); db_iter->RegisterCleanup(CleanupWriteUnpreparedTxnDBIterator, state, nullptr); return db_iter; } diff --git a/utilities/transactions/write_unprepared_txn_db.h b/utilities/transactions/write_unprepared_txn_db.h index a7b10f153..409d73a0a 100644 --- a/utilities/transactions/write_unprepared_txn_db.h +++ b/utilities/transactions/write_unprepared_txn_db.h @@ -27,7 +27,7 @@ class WriteUnpreparedTxnDB : public WritePreparedTxnDB { struct IteratorState; using WritePreparedTxnDB::NewIterator; - Iterator* NewIterator(const ReadOptions& options, + Iterator* NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, WriteUnpreparedTxn* txn); diff --git a/utilities/ttl/db_ttl_impl.cc b/utilities/ttl/db_ttl_impl.cc index 2b261ec6f..f45a6f0c1 100644 --- a/utilities/ttl/db_ttl_impl.cc +++ b/utilities/ttl/db_ttl_impl.cc @@ -594,14 +594,19 @@ Status DBWithTTLImpl::Write(const WriteOptions& opts, WriteBatch* updates) { } } -Iterator* DBWithTTLImpl::NewIterator(const ReadOptions& opts, +Iterator* DBWithTTLImpl::NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) { - if (opts.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kDBIterator) { return NewErrorIterator(Status::InvalidArgument( - "Cannot call NewIterator with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`")); + "Can only call NewIterator with `ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kDBIterator`")); } - return new TtlIterator(db_->NewIterator(opts, column_family)); + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kDBIterator; + } + return new TtlIterator(db_->NewIterator(read_options, column_family)); } void DBWithTTLImpl::SetTtl(ColumnFamilyHandle* h, int32_t ttl) { diff --git a/utilities/ttl/db_ttl_impl.h b/utilities/ttl/db_ttl_impl.h index 6ac662467..b125d79b0 100644 --- a/utilities/ttl/db_ttl_impl.h +++ b/utilities/ttl/db_ttl_impl.h @@ -78,7 +78,7 @@ class DBWithTTLImpl : public DBWithTTL { virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override; using StackableDB::NewIterator; - virtual Iterator* NewIterator(const ReadOptions& opts, + virtual Iterator* NewIterator(const ReadOptions& _read_options, ColumnFamilyHandle* column_family) override; virtual DB* GetBaseDB() override { return db_; } diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index f20b7e5e3..208eeb44b 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -531,7 +531,8 @@ Status WriteBatchWithIndex::GetFromBatchAndDB( // Did not find key in batch OR could not resolve Merges. Try DB. if (!callback) { - s = db->Get(read_options, column_family, key, pinnable_val); + s = static_cast_with_check(db->GetRootDB()) + ->GetImpl(read_options, column_family, key, pinnable_val); } else { DBImpl::GetImplOptions get_impl_options; get_impl_options.column_family = column_family; From c751583c03c93a0dfca31714cfe64b9ee8bd511c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 9 Aug 2023 13:49:42 -0700 Subject: [PATCH 029/386] Set default cf ts sz for a reused transaction (#11685) Summary: Set up the default column family timestamp size for a reused write committed transaction. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11685 Test Plan: Added unit test. Reviewed By: ltamasi Differential Revision: D48195129 Pulled By: jowlyzhang fbshipit-source-id: 54faa900c123fc6daa412c01490e36c10a24a678 --- db/write_batch.cc | 5 +++++ db/write_batch_internal.h | 3 +++ utilities/transactions/transaction_base.cc | 2 ++ .../transactions/write_committed_transaction_ts_test.cc | 7 +++++++ 4 files changed, 17 insertions(+) diff --git a/db/write_batch.cc b/db/write_batch.cc index 47e34e6f9..78a137d21 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -746,6 +746,11 @@ size_t WriteBatchInternal::GetFirstOffset(WriteBatch* /*b*/) { return WriteBatchInternal::kHeader; } +void WriteBatchInternal::SetDefaultColumnFamilyTimestampSize( + WriteBatch* wb, size_t default_cf_ts_sz) { + wb->default_cf_ts_sz_ = default_cf_ts_sz; +} + std::tuple WriteBatchInternal::GetColumnFamilyIdAndTimestampSize( WriteBatch* b, ColumnFamilyHandle* column_family) { diff --git a/db/write_batch_internal.h b/db/write_batch_internal.h index 1be0bd140..52bbe4545 100644 --- a/db/write_batch_internal.h +++ b/db/write_batch_internal.h @@ -224,6 +224,9 @@ class WriteBatchInternal { static void SetAsLatestPersistentState(WriteBatch* b); static bool IsLatestPersistentState(const WriteBatch* b); + static void SetDefaultColumnFamilyTimestampSize(WriteBatch* wb, + size_t default_cf_ts_sz); + static std::tuple GetColumnFamilyIdAndTimestampSize( WriteBatch* b, ColumnFamilyHandle* column_family); diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index d51dafa3d..b232736cf 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -110,6 +110,8 @@ void TransactionBaseImpl::Reinitialize(DB* db, start_time_ = dbimpl_->GetSystemClock()->NowMicros(); indexing_enabled_ = true; cmp_ = GetColumnFamilyUserComparator(db_->DefaultColumnFamily()); + WriteBatchInternal::SetDefaultColumnFamilyTimestampSize( + write_batch_.GetWriteBatch(), cmp_->timestamp_size()); WriteBatchInternal::UpdateProtectionInfo( write_batch_.GetWriteBatch(), write_options_.protection_bytes_per_key) .PermitUncheckedError(); diff --git a/utilities/transactions/write_committed_transaction_ts_test.cc b/utilities/transactions/write_committed_transaction_ts_test.cc index e23851a31..dc25b9da8 100644 --- a/utilities/transactions/write_committed_transaction_ts_test.cc +++ b/utilities/transactions/write_committed_transaction_ts_test.cc @@ -564,6 +564,13 @@ TEST_P(WriteCommittedTxnWithTsTest, CheckKeysForConflicts) { ASSERT_TRUE(txn1->GetForUpdate(ReadOptions(), "foo", &dontcare).IsBusy()); ASSERT_TRUE(called); + Transaction* reused_txn = + db->BeginTransaction(WriteOptions(), TransactionOptions(), txn1.get()); + ASSERT_EQ(reused_txn, txn1.get()); + ASSERT_OK(reused_txn->Put("foo", "v1")); + ASSERT_OK(reused_txn->SetCommitTimestamp(40)); + ASSERT_OK(reused_txn->Commit()); + SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); } From 76ed9a3990066b6cd089cb64d1a32d9fd970e076 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 9 Aug 2023 15:46:44 -0700 Subject: [PATCH 030/386] Add missing status check when compiling with `ASSERT_STATUS_CHECKED=1` (#11686) Summary: It seems the flag `-fno-elide-constructors` is incorrectly overwritten in Makefile by https://github.com/facebook/rocksdb/blob/9c2ebcc2c365bb89af566b3076f813d7bf11146b/Makefile#L243 Applying the change in PR https://github.com/facebook/rocksdb/issues/11675 shows a lot of missing status checks. This PR adds the missing status checks. Most of changes are just adding asserts in unit tests. I'll add pr comment around more interesting changes that need review. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11686 Test Plan: change Makefile as in https://github.com/facebook/rocksdb/issues/11675, and run `ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 J=24 check` Reviewed By: hx235 Differential Revision: D48176132 Pulled By: cbi42 fbshipit-source-id: 6758946cfb1c6ff84c4c1e0ca540d05e6fc390bd --- db/compact_files_test.cc | 2 +- db/comparator_db_test.cc | 3 +- db/db_bloom_filter_test.cc | 28 +++++++++---------- db/db_compaction_test.cc | 2 +- db/db_flush_test.cc | 2 +- db/db_impl/db_impl_secondary.cc | 2 ++ db/db_options_test.cc | 2 +- db/db_statistics_test.cc | 2 +- db/db_test.cc | 26 ++++++++--------- db/db_test2.cc | 14 +++++----- db/db_test_util.cc | 12 ++++---- db/db_write_buffer_manager_test.cc | 12 ++++---- db/db_write_test.cc | 4 +-- db/error_handler_fs_test.cc | 8 +++--- db/external_sst_file_basic_test.cc | 8 +++--- db/external_sst_file_test.cc | 4 +-- db/fault_injection_test.cc | 2 +- db/listener_test.cc | 2 +- db/perf_context_test.cc | 4 +-- db/version_set_test.cc | 9 +++--- file/delete_scheduler_test.cc | 6 ++-- file/prefetch_test.cc | 6 ++-- options/options_settable_test.cc | 3 ++ utilities/backup/backup_engine_test.cc | 8 +++--- utilities/blob_db/blob_db_impl.cc | 10 +++++-- utilities/blob_db/blob_db_listener.h | 2 +- utilities/checkpoint/checkpoint_test.cc | 4 +-- .../option_change_migration_test.cc | 6 ++-- .../lock/point/point_lock_manager_test.cc | 4 +-- .../lock/point/point_lock_manager_test.h | 2 +- .../optimistic_transaction_test.cc | 15 +++++----- utilities/transactions/transaction_test.cc | 28 +++++++++---------- utilities/transactions/transaction_test.h | 6 ++-- .../write_prepared_transaction_test.cc | 2 +- utilities/ttl/ttl_test.cc | 2 +- .../write_batch_with_index_test.cc | 12 ++++---- 36 files changed, 139 insertions(+), 125 deletions(-) diff --git a/db/compact_files_test.cc b/db/compact_files_test.cc index 5ca80d9aa..2d53f2b99 100644 --- a/db/compact_files_test.cc +++ b/db/compact_files_test.cc @@ -345,7 +345,7 @@ TEST_F(CompactFilesTest, CompactionFilterWithGetSv) { return true; } std::string res; - db_->Get(ReadOptions(), "", &res); + EXPECT_TRUE(db_->Get(ReadOptions(), "", &res).IsNotFound()); return true; } diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc index e5e3493b3..d75851083 100644 --- a/db/comparator_db_test.cc +++ b/db/comparator_db_test.cc @@ -77,7 +77,7 @@ void DoRandomIteraratorTest(DB* db, std::vector source_strings, for (int i = 0; i < num_writes; i++) { if (num_trigger_flush > 0 && i != 0 && i % num_trigger_flush == 0) { - db->Flush(FlushOptions()); + ASSERT_OK(db->Flush(FlushOptions())); } int type = rnd->Uniform(2); @@ -156,6 +156,7 @@ void DoRandomIteraratorTest(DB* db, std::vector source_strings, if (map.find(key) == map.end()) { ASSERT_TRUE(status.IsNotFound()); } else { + ASSERT_OK(status); ASSERT_EQ(map[key], result); } break; diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 69face563..bdeb5706d 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -654,7 +654,7 @@ TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { for (i = 0; i < maxKey; i++) { ASSERT_OK(Put(Key(i), Key(i))); } - Flush(); + ASSERT_OK(Flush()); }; auto GetFn = [&]() { int i; @@ -792,7 +792,7 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) { } // Add a large key to make the file contain wide range ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); + ASSERT_OK(Flush(1)); // Check if they can be found for (int i = 0; i < maxKey; i++) { @@ -1696,7 +1696,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { table_options.format_version = 5; options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - TryReopen(options); + ASSERT_OK(TryReopen(options)); CreateAndReopenWithCF({fifo ? "abe" : "bob"}, options); const int maxKey = 10000; @@ -1705,7 +1705,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { } // Add a large key to make the file contain wide range ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - Flush(1); + ASSERT_OK(Flush(1)); EXPECT_EQ(policy->DumpTestReport(), fifo ? "cf=abe,s=kCompactionStyleFIFO,n=7,l=0,b=0,r=kFlush\n" : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); @@ -1713,7 +1713,7 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { for (int i = maxKey / 2; i < maxKey; i++) { ASSERT_OK(Put(1, Key(i), Key(i))); } - Flush(1); + ASSERT_OK(Flush(1)); EXPECT_EQ(policy->DumpTestReport(), fifo ? "cf=abe,s=kCompactionStyleFIFO,n=7,l=0,b=0,r=kFlush\n" : "cf=bob,s=kCompactionStyleLevel,n=7,l=0,b=0,r=kFlush\n"); @@ -2261,7 +2261,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTest) { ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); ASSERT_EQ(0, get_perf_context()->bloom_sst_miss_count); - Flush(); + ASSERT_OK(Flush()); // sanity checks ASSERT_EQ(0, get_perf_context()->bloom_sst_hit_count); @@ -2311,7 +2311,7 @@ TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) { ASSERT_EQ(1, get_perf_context()->bloom_memtable_miss_count); ASSERT_EQ(2, get_perf_context()->bloom_memtable_hit_count); - Flush(); + ASSERT_OK(Flush()); iter.reset(dbfull()->NewIterator(ReadOptions())); @@ -2379,7 +2379,7 @@ void PrefixScanInit(DBBloomFilterTest* dbtest) { snprintf(buf, sizeof(buf), "%02d______:end", i + 1); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); + ASSERT_OK(dbtest->Flush()); } // GROUP 2 @@ -2390,7 +2390,7 @@ void PrefixScanInit(DBBloomFilterTest* dbtest) { snprintf(buf, sizeof(buf), "%02d______:end", small_range_sstfiles + i + 1); keystr = std::string(buf); ASSERT_OK(dbtest->Put(keystr, keystr)); - dbtest->Flush(); + ASSERT_OK(dbtest->Flush()); } } } // anonymous namespace @@ -2853,7 +2853,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterMultipleSST) { ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foq1", "bar1")); ASSERT_OK(Put("fpa", "0")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); std::unique_ptr iter_old(db_->NewIterator(read_options)); ASSERT_EQ(CountIter(iter_old, "foo"), 4); EXPECT_EQ(PopTicker(options, NON_LAST_LEVEL_SEEK_FILTERED), 0); @@ -2981,7 +2981,7 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterNewColumnFamily) { ASSERT_OK(Put(2, "foo5", "bar5")); ASSERT_OK(Put(2, "foq6", "bar6")); ASSERT_OK(Put(2, "fpq7", "bar7")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); { std::unique_ptr iter( db_->NewIterator(read_options, handles_[2])); @@ -3031,17 +3031,17 @@ TEST_F(DBBloomFilterTest, DynamicBloomFilterOptions) { ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Put("foo1", "bar1")); ASSERT_OK(Put("fpa", "0")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); ASSERT_OK(Put("foo3", "bar3")); ASSERT_OK(Put("foo4", "bar4")); ASSERT_OK(Put("foo5", "bar5")); ASSERT_OK(Put("fpb", "1")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); ASSERT_OK(Put("foo6", "bar6")); ASSERT_OK(Put("foo7", "bar7")); ASSERT_OK(Put("foo8", "bar8")); ASSERT_OK(Put("fpc", "2")); - dbfull()->Flush(FlushOptions()); + ASSERT_OK(dbfull()->Flush(FlushOptions())); ReadOptions read_options; read_options.prefix_same_as_start = true; diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 3e565108a..d71561283 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -9792,7 +9792,7 @@ TEST_F(DBCompactionTest, NumberOfSubcompactions) { SubCompactionEventListener* listener = new SubCompactionEventListener(); options.listeners.clear(); options.listeners.emplace_back(listener); - TryReopen(options); + ASSERT_OK(TryReopen(options)); for (int file = 0; file < kLevel0CompactTrigger; ++file) { for (int key = file; key < 2 * kNumKeyPerFile; key += 2) { diff --git a/db/db_flush_test.cc b/db/db_flush_test.cc index 0b2e7abb1..acf9723e9 100644 --- a/db/db_flush_test.cc +++ b/db/db_flush_test.cc @@ -222,7 +222,7 @@ TEST_F(DBFlushTest, CloseDBWhenFlushInLowPri) { sleeping_task_low.WaitUntilDone(); ASSERT_EQ(0, num_flushes); - TryReopenWithColumnFamilies({"default", "cf1", "cf2"}, options); + ASSERT_OK(TryReopenWithColumnFamilies({"default", "cf1", "cf2"}, options)); ASSERT_OK(Put(0, "key3", DummyString(8192))); ASSERT_OK(Flush(0)); ASSERT_EQ(1, num_flushes); diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index ebab44cca..ad8783042 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -986,6 +986,8 @@ Status DB::OpenAndCompact( delete db; if (s.ok()) { return serialization_status; + } else { + serialization_status.PermitUncheckedError(); } return s; } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index df6e10850..b7c132aee 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1308,7 +1308,7 @@ TEST_F(DBOptionsTest, TempOptionsFailTest) { [&](void* /*arg*/) { fs->SetFilesystemActive(true); }); SyncPoint::GetInstance()->EnableProcessing(); - TryReopen(options); + ASSERT_NOK(TryReopen(options)); SyncPoint::GetInstance()->DisableProcessing(); std::vector filenames; diff --git a/db/db_statistics_test.cc b/db/db_statistics_test.cc index 8b291acda..054fbc56c 100644 --- a/db/db_statistics_test.cc +++ b/db/db_statistics_test.cc @@ -219,7 +219,7 @@ TEST_F(DBStatisticsTest, VerifyChecksumReadStat) { ASSERT_OK(Flush()); std::unordered_map table_files; uint64_t table_files_size = 0; - GetAllDataFiles(kTableFile, &table_files, &table_files_size); + ASSERT_OK(GetAllDataFiles(kTableFile, &table_files, &table_files_size)); { // Scenario 1: Table verified in `VerifyFileChecksums()`. This should read diff --git a/db/db_test.cc b/db/db_test.cc index 1301c2f3b..d5c5c4413 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -697,7 +697,7 @@ TEST_F(DBTest, ReadFromPersistedTier) { // 3rd round: delete and flush ASSERT_OK(db_->Delete(wopt, handles_[1], "foo")); - Flush(1); + ASSERT_OK(Flush(1)); ASSERT_OK(db_->Delete(wopt, handles_[1], "bar")); ASSERT_TRUE(db_->Get(ropt, handles_[1], "foo", &value).IsNotFound()); @@ -860,7 +860,7 @@ TEST_F(DBTest, DISABLED_VeryLargeValue) { ASSERT_EQ('w', value[0]); // Compact all files. - Flush(); + ASSERT_OK(Flush()); db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); // Check DB is not in read-only state. @@ -1300,7 +1300,7 @@ TEST_F(DBTest, MetaDataTest) { options.disable_auto_compactions = true; int64_t temp_time = 0; - options.env->GetCurrentTime(&temp_time); + ASSERT_OK(options.env->GetCurrentTime(&temp_time)); uint64_t start_time = static_cast(temp_time); DestroyAndReopen(options); @@ -1329,7 +1329,7 @@ TEST_F(DBTest, MetaDataTest) { std::vector> files_by_level; dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files_by_level); - options.env->GetCurrentTime(&temp_time); + ASSERT_OK(options.env->GetCurrentTime(&temp_time)); uint64_t end_time = static_cast(temp_time); ColumnFamilyMetaData cf_meta; @@ -3648,7 +3648,7 @@ TEST_F(DBTest, BlockBasedTablePrefixHashIndexTest) { ASSERT_OK(Put("kk2", "v2")); ASSERT_OK(Put("kk", "v3")); ASSERT_OK(Put("k", "v4")); - Flush(); + ASSERT_OK(Flush()); ASSERT_EQ("v1", Get("kk1")); ASSERT_EQ("v2", Get("kk2")); @@ -4280,8 +4280,8 @@ TEST_F(DBTest, ConcurrentMemtableNotSupported) { options.soft_pending_compaction_bytes_limit = 0; options.hard_pending_compaction_bytes_limit = 100; options.create_if_missing = true; - - DestroyDB(dbname_, options); + Close(); + ASSERT_OK(DestroyDB(dbname_, options)); options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true, 4)); ASSERT_NOK(TryReopen(options)); @@ -4622,7 +4622,7 @@ TEST_F(DBTest, GetThreadStatus) { Options options; options.env = env_; options.enable_thread_tracking = true; - TryReopen(options); + ASSERT_OK(TryReopen(options)); std::vector thread_list; Status s = env_->GetThreadList(&thread_list); @@ -4693,7 +4693,7 @@ TEST_F(DBTest, DisableThreadStatus) { Options options; options.env = env_; options.enable_thread_tracking = false; - TryReopen(options); + ASSERT_OK(TryReopen(options)); CreateAndReopenWithCF({"pikachu", "about-to-remove"}, options); // Verify non of the column family info exists env_->GetThreadStatusUpdater()->TEST_VerifyColumnFamilyInfoMap(handles_, @@ -4902,7 +4902,7 @@ TEST_P(DBTestWithParam, PreShutdownMultipleCompaction) { options.level0_slowdown_writes_trigger = 1 << 10; options.max_subcompactions = max_subcompactions_; - TryReopen(options); + ASSERT_OK(TryReopen(options)); Random rnd(301); std::vector thread_list; @@ -4991,7 +4991,7 @@ TEST_P(DBTestWithParam, PreShutdownCompactionMiddle) { options.level0_slowdown_writes_trigger = 1 << 10; options.max_subcompactions = max_subcompactions_; - TryReopen(options); + ASSERT_OK(TryReopen(options)); Random rnd(301); std::vector thread_list; @@ -7206,14 +7206,14 @@ TEST_F(DBTest, CreationTimeOfOldestFile) { int idx = 0; int64_t time_1 = 0; - env_->GetCurrentTime(&time_1); + ASSERT_OK(env_->GetCurrentTime(&time_1)); const uint64_t uint_time_1 = static_cast(time_1); // Add 50 hours env_->MockSleepForSeconds(50 * 60 * 60); int64_t time_2 = 0; - env_->GetCurrentTime(&time_2); + ASSERT_OK(env_->GetCurrentTime(&time_2)); const uint64_t uint_time_2 = static_cast(time_2); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( diff --git a/db/db_test2.cc b/db/db_test2.cc index 52d64a900..f03444647 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -266,7 +266,7 @@ TEST_F(DBTest2, CacheIndexAndFilterWithDBRestart) { ASSERT_OK(Put(1, "a", "begin")); ASSERT_OK(Put(1, "z", "end")); ASSERT_OK(Flush(1)); - TryReopenWithColumnFamilies({"default", "pikachu"}, options); + ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); std::string value; value = Get(1, "a"); @@ -357,10 +357,10 @@ TEST_P(DBTestSharedWriteBufferAcrossCFs, SharedWriteBufferAcrossCFs) { // are newer CFs created. flush_listener->expected_flush_reason = FlushReason::kManualFlush; ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); + ASSERT_OK(Flush(3)); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); + ASSERT_OK(Flush(0)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "default"), static_cast(1)); ASSERT_EQ(GetNumberOfSstFilesForColumnFamily(db_, "nikitich"), @@ -2068,7 +2068,7 @@ class PinL0IndexAndFilterBlocksTest // reset block cache table_options.block_cache = NewLRUCache(64 * 1024); options->table_factory.reset(NewBlockBasedTableFactory(table_options)); - TryReopenWithColumnFamilies({"default", "pikachu"}, *options); + ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, *options)); // create new table at L0 ASSERT_OK(Put(1, "a2", "begin2")); ASSERT_OK(Put(1, "z2", "end2")); @@ -2188,7 +2188,7 @@ TEST_P(PinL0IndexAndFilterBlocksTest, DisablePrefetchingNonL0IndexAndFilter) { // Reopen database. If max_open_files is set as -1, table readers will be // preloaded. This will trigger a BlockBasedTable::Open() and prefetch // L0 index and filter. Level 1's prefetching is disabled in DB::Open() - TryReopenWithColumnFamilies({"default", "pikachu"}, options); + ASSERT_OK(TryReopenWithColumnFamilies({"default", "pikachu"}, options)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); @@ -3804,7 +3804,7 @@ TEST_F(DBTest2, MemtableOnlyIterator) { ASSERT_EQ(2, count); delete it; - Flush(1); + ASSERT_OK(Flush(1)); // After flushing // point lookups @@ -4112,7 +4112,7 @@ TEST_F(DBTest2, LiveFilesOmitObsoleteFiles) { ASSERT_OK(Put("key", "val")); FlushOptions flush_opts; flush_opts.wait = false; - db_->Flush(flush_opts); + ASSERT_OK(db_->Flush(flush_opts)); TEST_SYNC_POINT("DBTest2::LiveFilesOmitObsoleteFiles:FlushTriggered"); ASSERT_OK(db_->DisableFileDeletions()); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 5a64b2f3f..01d934bd1 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -259,7 +259,7 @@ bool DBTestBase::ChangeFilterOptions() { auto options = CurrentOptions(); options.create_if_missing = true; - TryReopen(options); + EXPECT_OK(TryReopen(options)); return true; } @@ -270,34 +270,34 @@ bool DBTestBase::ChangeOptionsForFileIngestionTest() { Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; - TryReopen(options); + EXPECT_OK(TryReopen(options)); return true; } else if (option_config_ == kUniversalCompaction) { option_config_ = kUniversalCompactionMultiLevel; Destroy(last_options_); auto options = CurrentOptions(); options.create_if_missing = true; - TryReopen(options); + EXPECT_OK(TryReopen(options)); return true; } else if (option_config_ == kUniversalCompactionMultiLevel) { option_config_ = kLevelSubcompactions; Destroy(last_options_); auto options = CurrentOptions(); assert(options.max_subcompactions > 1); - TryReopen(options); + EXPECT_OK(TryReopen(options)); return true; } else if (option_config_ == kLevelSubcompactions) { option_config_ = kUniversalSubcompactions; Destroy(last_options_); auto options = CurrentOptions(); assert(options.max_subcompactions > 1); - TryReopen(options); + EXPECT_OK(TryReopen(options)); return true; } else if (option_config_ == kUniversalSubcompactions) { option_config_ = kDirectIO; Destroy(last_options_); auto options = CurrentOptions(); - TryReopen(options); + EXPECT_OK(TryReopen(options)); return true; } else { return false; diff --git a/db/db_write_buffer_manager_test.cc b/db/db_write_buffer_manager_test.cc index 82704e194..eb33ec41e 100644 --- a/db/db_write_buffer_manager_test.cc +++ b/db/db_write_buffer_manager_test.cc @@ -42,10 +42,10 @@ TEST_P(DBWriteBufferManagerTest, SharedBufferAcrossCFs1) { CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); + ASSERT_OK(Flush(3)); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); + ASSERT_OK(Flush(0)); // Write to "Default", "cf2" and "cf3". ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); @@ -84,10 +84,10 @@ TEST_P(DBWriteBufferManagerTest, SharedWriteBufferAcrossCFs2) { CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); + ASSERT_OK(Flush(3)); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); + ASSERT_OK(Flush(0)); // Write to "Default", "cf2" and "cf3". No flush will be triggered. ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); @@ -471,10 +471,10 @@ TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsSingleDB) { CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); - Flush(3); + ASSERT_OK(Flush(3)); ASSERT_OK(Put(3, Key(1), DummyString(1), wo)); ASSERT_OK(Put(0, Key(1), DummyString(1), wo)); - Flush(0); + ASSERT_OK(Flush(0)); // Write to "Default", "cf2" and "cf3". No flush will be triggered. ASSERT_OK(Put(3, Key(1), DummyString(30000), wo)); diff --git a/db/db_write_test.cc b/db/db_write_test.cc index d1e3d53f6..59c26eaaa 100644 --- a/db/db_write_test.cc +++ b/db/db_write_test.cc @@ -495,7 +495,7 @@ TEST_P(DBWriteTest, UnflushedPutRaceWithTrackedWalSync) { // Simulate full loss of unsynced data. This drops "key2" -> "val2" from the // DB WAL. - fault_env->DropUnsyncedFileData(); + ASSERT_OK(fault_env->DropUnsyncedFileData()); Reopen(options); @@ -536,7 +536,7 @@ TEST_P(DBWriteTest, InactiveWalFullySyncedBeforeUntracked) { // Simulate full loss of unsynced data. This should drop nothing since we did // `FlushWAL(true /* sync */)` before `Close()`. - fault_env->DropUnsyncedFileData(); + ASSERT_OK(fault_env->DropUnsyncedFileData()); Reopen(options); diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 442631ded..bbff8c7fe 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -2565,8 +2565,8 @@ TEST_F(DBErrorHandlingFSTest, AtomicFlushReadError) { s = dbfull()->TEST_GetBGError(); ASSERT_OK(s); - TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, - GetDefaultOptions()); + ASSERT_OK(TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, + GetDefaultOptions())); ASSERT_EQ("val", Get(Key(0))); } @@ -2606,8 +2606,8 @@ TEST_F(DBErrorHandlingFSTest, AtomicFlushNoSpaceError) { s = dbfull()->TEST_GetBGError(); ASSERT_OK(s); - TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, - GetDefaultOptions()); + ASSERT_OK(TryReopenWithColumnFamilies({kDefaultColumnFamilyName, "pikachu"}, + GetDefaultOptions())); ASSERT_EQ("val", Get(Key(0))); } diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index 39334994a..cd1d7dd0e 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -1346,7 +1346,7 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { // range del [0, 50) in L6 file, [50, 100) in L0 file, [100, 150) in memtable for (int i = 0; i < 3; i++) { if (i != 0) { - db_->Flush(FlushOptions()); + ASSERT_OK(db_->Flush(FlushOptions())); if (i == 1) { MoveFilesToLevel(kNumLevels - 1); } @@ -1747,11 +1747,11 @@ TEST_F(ExternalSSTFileBasicTest, IngestFileAfterDBPut) { Options options = CurrentOptions(); ASSERT_OK(Put("k", "a")); - Flush(); + ASSERT_OK(Flush()); ASSERT_OK(Put("k", "a")); - Flush(); + ASSERT_OK(Flush()); ASSERT_OK(Put("k", "a")); - Flush(); + ASSERT_OK(Flush()); SstFileWriter sst_file_writer(EnvOptions(), options); // Current file size should be 0 after sst_file_writer init and before open a diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index 4249b2fc6..89fb0419a 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -2223,7 +2223,7 @@ TEST_P(ExternalSSTFileTest, IngestBehind) { // Trigger compaction if size amplification exceeds 110%. options.compaction_options_universal.max_size_amplification_percent = 110; options.level0_file_num_compaction_trigger = 4; - TryReopen(options); + ASSERT_OK(TryReopen(options)); Random rnd(301); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 10; j++) { @@ -2239,7 +2239,7 @@ TEST_P(ExternalSSTFileTest, IngestBehind) { // Turning off the option allows DB to compact ingested files. options.allow_ingest_behind = false; - TryReopen(options); + ASSERT_OK(TryReopen(options)); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files); ASSERT_EQ(1, level_to_files[2].size()); diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc index ddd4b47cc..d888dfde1 100644 --- a/db/fault_injection_test.cc +++ b/db/fault_injection_test.cc @@ -443,7 +443,7 @@ TEST_P(FaultInjectionTest, UninstalledCompaction) { options_.level0_stop_writes_trigger = 1 << 10; options_.level0_slowdown_writes_trigger = 1 << 10; options_.max_background_compactions = 1; - OpenDB(); + ASSERT_OK(OpenDB()); if (!sequential_order_) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ diff --git a/db/listener_test.cc b/db/listener_test.cc index 206dba973..41577b92c 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -582,7 +582,7 @@ TEST_F(EventListenerTest, CompactionReasonLevel) { for (int k = 1; k <= 30; k++) { ASSERT_OK(Put(Key(k), Key(k))); if (k % 10 == 0) { - Flush(); + ASSERT_OK(Flush()); } } diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index bb8691b96..eb51bcfbd 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -262,7 +262,7 @@ void ProfileQueries(bool enabled_time = false) { for (const int i : keys) { if (i == kFlushFlag) { FlushOptions fo; - db->Flush(fo); + ASSERT_OK(db->Flush(fo)); continue; } @@ -1111,7 +1111,7 @@ TEST_F(PerfContextTest, MergeOperandCount) { verify(); // Verify counters when reading from table files - db->Flush(FlushOptions()); + ASSERT_OK(db->Flush(FlushOptions())); verify(); } diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 135b2d64f..86f9798c8 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1221,7 +1221,7 @@ class VersionSetTestBase { tmp_db_options.env = env_; std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); + ASSERT_OK(impl->GetDbIdentityFromIdentityFile(&db_id)); new_db.SetDBId(db_id); } new_db.SetLogNumber(0); @@ -1277,7 +1277,7 @@ class VersionSetTestBase { void NewDB() { SequenceNumber last_seqno; std::unique_ptr log_writer; - SetIdentityFile(env_, dbname_); + ASSERT_OK(SetIdentityFile(env_, dbname_)); PrepareManifest(&column_families_, &last_seqno, &log_writer); log_writer.reset(); // Make "CURRENT" file point to the new manifest file. @@ -2862,11 +2862,12 @@ class VersionSetTestEmptyDb assert(nullptr != log_writer); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { + ASSERT_OK(SetIdentityFile(env_, dbname_)); DBOptions tmp_db_options; tmp_db_options.env = env_; std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); + ASSERT_OK(impl->GetDbIdentityFromIdentityFile(&db_id)); new_db.SetDBId(db_id); } const std::string manifest_path = DescriptorFileName(dbname_, 1); @@ -3196,7 +3197,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, tmp_db_options.env = env_; std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); std::string db_id; - impl->GetDbIdentityFromIdentityFile(&db_id); + ASSERT_OK(impl->GetDbIdentityFromIdentityFile(&db_id)); new_db.SetDBId(db_id); } { diff --git a/file/delete_scheduler_test.cc b/file/delete_scheduler_test.cc index 62de5d27c..46e834879 100644 --- a/file/delete_scheduler_test.cc +++ b/file/delete_scheduler_test.cc @@ -40,7 +40,7 @@ class DeleteSchedulerTest : public testing::Test { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({}); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); for (const auto& dummy_files_dir : dummy_files_dirs_) { - DestroyDir(env_, dummy_files_dir); + EXPECT_OK(DestroyDir(env_, dummy_files_dir)); } } @@ -82,11 +82,11 @@ class DeleteSchedulerTest : public testing::Test { std::string file_path = dummy_files_dirs_[dummy_files_dirs_idx] + "/" + file_name; std::unique_ptr f; - env_->NewWritableFile(file_path, &f, EnvOptions()); + EXPECT_OK(env_->NewWritableFile(file_path, &f, EnvOptions())); std::string data(size, 'A'); EXPECT_OK(f->Append(data)); EXPECT_OK(f->Close()); - sst_file_mgr_->OnAddFile(file_path); + EXPECT_OK(sst_file_mgr_->OnAddFile(file_path)); return file_path; } diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index fe3d2cf70..7cb2d3876 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -550,7 +550,7 @@ TEST_P(PrefetchTest, ConfigureAutoMaxReadaheadSize) { } Close(); std::vector buff_prefectch_level_count = {0, 0, 0}; - TryReopen(options); + ASSERT_OK(TryReopen(options)); { auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); fs->ClearPrefetchCount(); @@ -678,7 +678,7 @@ TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) { } Close(); - TryReopen(options); + ASSERT_OK(TryReopen(options)); { auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); fs->ClearPrefetchCount(); @@ -793,7 +793,7 @@ TEST_P(PrefetchTest, ConfigureNumFilesReadsForReadaheadSize) { ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); Close(); - TryReopen(options); + ASSERT_OK(TryReopen(options)); fs->ClearPrefetchCount(); buff_prefetch_count = 0; diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index c2bf8640b..60a25dc42 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -378,6 +378,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { delete[] new_options_ptr; } +// status check adds CXX flag -fno-elide-constructors which fails this test. +#ifndef ROCKSDB_ASSERT_STATUS_CHECKED // If the test fails, likely a new option is added to ColumnFamilyOptions // but it cannot be set through GetColumnFamilyOptionsFromString(), or the // test is not updated accordingly. @@ -641,6 +643,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { delete[] mcfo2_ptr; delete[] cfo_clean_ptr; } +#endif // !ROCKSDB_ASSERT_STATUS_CHECKED #endif // !ROCKSDB_UBSAN_RUN #endif // !__clang__ #endif // OS_LINUX || OS_WIN diff --git a/utilities/backup/backup_engine_test.cc b/utilities/backup/backup_engine_test.cc index dd41937d4..5ed6ae895 100644 --- a/utilities/backup/backup_engine_test.cc +++ b/utilities/backup/backup_engine_test.cc @@ -589,7 +589,7 @@ void AssertExists(DB* db, int from, int to) { for (int i = from; i < to; ++i) { std::string key = "testkey" + std::to_string(i); std::string value; - Status s = db->Get(ReadOptions(), Slice(key), &value); + ASSERT_OK(db->Get(ReadOptions(), Slice(key), &value)); ASSERT_EQ(value, "testvalue" + std::to_string(i)); } } @@ -4308,13 +4308,13 @@ TEST_F(BackupEngineTest, ExcludeFiles) { for (auto be_pair : {std::make_pair(backup_engine_.get(), alt_backup_engine), std::make_pair(alt_backup_engine, backup_engine_.get())}) { - DestroyDB(dbname_, options_); + ASSERT_OK(DestroyDB(dbname_, options_)); RestoreOptions ro; // Fails without alternate dir ASSERT_TRUE(be_pair.first->RestoreDBFromLatestBackup(dbname_, dbname_, ro) .IsInvalidArgument()); - DestroyDB(dbname_, options_); + ASSERT_OK(DestroyDB(dbname_, options_)); // Works with alternate dir ro.alternate_dirs.push_front(be_pair.second); ASSERT_OK(be_pair.first->RestoreDBFromLatestBackup(dbname_, dbname_, ro)); @@ -4332,7 +4332,7 @@ TEST_F(BackupEngineTest, ExcludeFiles) { for (auto be_pair : {std::make_pair(backup_engine_.get(), alt_backup_engine), std::make_pair(alt_backup_engine, backup_engine_.get())}) { - DestroyDB(dbname_, options_); + ASSERT_OK(DestroyDB(dbname_, options_)); RestoreOptions ro; ro.alternate_dirs.push_front(be_pair.second); ASSERT_OK(be_pair.first->RestoreDBFromLatestBackup(dbname_, dbname_, ro)); diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 69efe950b..f610b9ec4 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -269,7 +269,13 @@ Status BlobDBImpl::Open(std::vector* handles) { // Add trash files in blob dir to file delete scheduler. SstFileManagerImpl* sfm = static_cast( db_impl_->immutable_db_options().sst_file_manager.get()); - DeleteScheduler::CleanupDirectory(env_, sfm, blob_dir_); + s = DeleteScheduler::CleanupDirectory(env_, sfm, blob_dir_); + if (!s.ok()) { + ROCKS_LOG_ERROR(db_options_.info_log, + "Failed to clean up directory %s, status: %s", + blob_dir_.c_str(), s.ToString().c_str()); + return s; + } UpdateLiveSSTSize(); @@ -1915,7 +1921,7 @@ std::pair BlobDBImpl::EvictExpiredFiles(bool aborted) { } if (!blob_file->Immutable()) { - CloseBlobFile(blob_file); + CloseBlobFile(blob_file).PermitUncheckedError(); } assert(blob_file->Immutable()); diff --git a/utilities/blob_db/blob_db_listener.h b/utilities/blob_db/blob_db_listener.h index 16aed3340..c95740c50 100644 --- a/utilities/blob_db/blob_db_listener.h +++ b/utilities/blob_db/blob_db_listener.h @@ -22,7 +22,7 @@ class BlobDBListener : public EventListener { void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*info*/) override { assert(blob_db_impl_ != nullptr); - blob_db_impl_->SyncBlobFiles(); + blob_db_impl_->SyncBlobFiles().PermitUncheckedError(); } void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& /*info*/) override { diff --git a/utilities/checkpoint/checkpoint_test.cc b/utilities/checkpoint/checkpoint_test.cc index 1cc062667..a9cea1c05 100644 --- a/utilities/checkpoint/checkpoint_test.cc +++ b/utilities/checkpoint/checkpoint_test.cc @@ -925,7 +925,7 @@ TEST_F(CheckpointTest, CheckpointWithDbPath) { options.db_paths.emplace_back(dbname_ + "_2", 0); Reopen(options); ASSERT_OK(Put("key1", "val1")); - Flush(); + ASSERT_OK(Flush()); Checkpoint* checkpoint; ASSERT_OK(Checkpoint::Create(db_, &checkpoint)); // Currently not supported @@ -968,7 +968,7 @@ TEST_F(CheckpointTest, PutRaceWithCheckpointTrackedWalSync) { // Simulate full loss of unsynced data. This drops "key2" -> "val2" from the // DB WAL. - fault_env->DropUnsyncedFileData(); + ASSERT_OK(fault_env->DropUnsyncedFileData()); // Before the bug fix, reopening the DB would fail because the MANIFEST's // AddWal entry indicated the WAL should be synced through "key2" -> "val2". diff --git a/utilities/option_change_migration/option_change_migration_test.cc b/utilities/option_change_migration/option_change_migration_test.cc index e1f9edc89..9fa718cfc 100644 --- a/utilities/option_change_migration/option_change_migration_test.cc +++ b/utilities/option_change_migration/option_change_migration_test.cc @@ -229,7 +229,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate3) { for (int i = 0; i < 50; i++) { ASSERT_OK(Put(Key(num * 100 + i), rnd.RandomString(900))); } - Flush(); + ASSERT_OK(Flush()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); if (num == 9) { // Issue a full compaction to generate some zero-out files @@ -313,7 +313,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate4) { for (int i = 0; i < 50; i++) { ASSERT_OK(Put(Key(num * 100 + i), rnd.RandomString(900))); } - Flush(); + ASSERT_OK(Flush()); ASSERT_OK(dbfull()->TEST_WaitForCompact()); if (num == 9) { // Issue a full compaction to generate some zero-out files @@ -496,7 +496,7 @@ TEST_F(DBOptionChangeMigrationTest, CompactedSrcToUniversal) { ASSERT_OK(Put(Key(num * 100 + i), rnd.RandomString(900))); } } - Flush(); + ASSERT_OK(Flush()); CompactRangeOptions cro; cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; ASSERT_OK(dbfull()->CompactRange(cro, nullptr, nullptr)); diff --git a/utilities/transactions/lock/point/point_lock_manager_test.cc b/utilities/transactions/lock/point/point_lock_manager_test.cc index 00cb37d93..28ce5275d 100644 --- a/utilities/transactions/lock/point/point_lock_manager_test.cc +++ b/utilities/transactions/lock/point/point_lock_manager_test.cc @@ -128,14 +128,14 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) { port::Thread t1 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() { ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true)); // block because txn1 is holding a lock on k1. - locker_->TryLock(txn2, 1, "k1", env_, true); + ASSERT_OK(locker_->TryLock(txn2, 1, "k1", env_, true)); }); ASSERT_OK(locker_->TryLock(txn3, 1, "k3", env_, true)); port::Thread t2 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() { // block because txn3 is holding a lock on k1. - locker_->TryLock(txn4, 1, "k3", env_, true); + ASSERT_OK(locker_->TryLock(txn4, 1, "k3", env_, true)); }); auto s = locker_->TryLock(txn3, 1, "k2", env_, true); diff --git a/utilities/transactions/lock/point/point_lock_manager_test.h b/utilities/transactions/lock/point/point_lock_manager_test.h index ca9f46bf9..51d9076b2 100644 --- a/utilities/transactions/lock/point/point_lock_manager_test.h +++ b/utilities/transactions/lock/point/point_lock_manager_test.h @@ -244,7 +244,7 @@ TEST_P(AnyLockManagerTest, Deadlock) { // txn1 tries to lock k2, will block forever. port::Thread t = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() { // block because txn2 is holding a lock on k2. - locker_->TryLock(txn1, 1, "k2", env_, true); + ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true)); }); auto s = locker_->TryLock(txn2, 1, "k1", env_, true); diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 04e443a74..835b9f097 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -704,6 +704,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { s = txn_db->Get(read_options, "AAA", &value); ASSERT_TRUE(s.IsNotFound()); s = txn_db->Get(read_options, handles[2], "AAAZZZ", &value); + ASSERT_OK(s); ASSERT_EQ(value, "barbar"); Slice key_slices[3] = {Slice("AAA"), Slice("ZZ"), Slice("Z")}; @@ -830,7 +831,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { cur_seen = {}; txn = txn_db->BeginTransaction(write_options, txn_options); for (const auto& key : keys) { - txn->Put(handles[0], key, "blah"); + ASSERT_OK(txn->Put(handles[0], key, "blah")); } ASSERT_OK(txn->Commit()); // Sufficiently large hash coverage of the space @@ -843,7 +844,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { cur_seen = {}; txn = txn_db->BeginTransaction(write_options, txn_options, txn); for (const auto& key : keys) { - txn->Put(handles[0], key, "moo"); + ASSERT_OK(txn->Put(handles[0], key, "moo")); } ASSERT_OK(txn->Commit()); ASSERT_EQ(cur_seen.rolling_hash, base_seen.rolling_hash); @@ -854,7 +855,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { cur_seen = {}; txn = txn_db->BeginTransaction(write_options, txn_options, txn); for (const auto& key : keys) { - txn->Put(handles[1], key, "blah"); + ASSERT_OK(txn->Put(handles[1], key, "blah")); } ASSERT_OK(txn->Commit()); // Different access pattern (different hash seed) @@ -871,7 +872,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { cur_seen = {}; txn = txn_db->BeginTransaction(write_options, txn_options, txn); for (const auto& key : keys) { - txn->Put(handles[2], key, "blah"); + ASSERT_OK(txn->Put(handles[2], key, "blah")); } ASSERT_OK(txn->Commit()); // Different access pattern (different hash seed) @@ -888,7 +889,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { delete txn; txn = shared_txn_db->BeginTransaction(write_options, txn_options); for (const auto& key : keys) { - txn->Put(key, "blah"); + ASSERT_OK(txn->Put(key, "blah")); } ASSERT_OK(txn->Commit()); // Different access pattern (different hash seed) @@ -905,7 +906,7 @@ TEST_P(OptimisticTransactionTest, ColumnFamiliesTest) { delete txn; txn = nonshared_txn_db->BeginTransaction(write_options, txn_options); for (const auto& key : keys) { - txn->Put(key, "blah"); + ASSERT_OK(txn->Put(key, "blah")); } ASSERT_OK(txn->Commit()); // Different access pattern (different hash seed) @@ -1422,7 +1423,7 @@ TEST_P(OptimisticTransactionTest, UndoGetForUpdateTest) { txn1->UndoGetForUpdate("A"); Transaction* txn2 = txn_db->BeginTransaction(write_options); - txn2->Put("A", "x"); + ASSERT_OK(txn2->Put("A", "x")); ASSERT_OK(txn2->Commit()); delete txn2; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index b46bac0d9..4552835aa 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -1382,7 +1382,7 @@ TEST_P(TransactionTest, PersistentTwoPhaseTransactionTest) { ASSERT_OK(db_impl->TEST_FlushMemTable(true)); // regular db read - db->Get(read_options, "foo2", &value); + ASSERT_OK(db->Get(read_options, "foo2", &value)); ASSERT_EQ(value, "bar2"); // nothing has been prepped yet @@ -1430,7 +1430,7 @@ TEST_P(TransactionTest, PersistentTwoPhaseTransactionTest) { ASSERT_OK(s); // value is now available - db->Get(read_options, "foo", &value); + ASSERT_OK(db->Get(read_options, "foo", &value)); ASSERT_EQ(value, "bar"); // we already committed @@ -1601,10 +1601,10 @@ TEST_P(TransactionStressTest, TwoPhaseLongPrepareTest) { // crash fault_fs->SetFilesystemActive(false); reinterpret_cast(db)->TEST_Crash(); - ReOpenNoDelete(); + ASSERT_OK(ReOpenNoDelete()); } else if (i % 37 == 0) { // close - ReOpenNoDelete(); + ASSERT_OK(ReOpenNoDelete()); } } @@ -1668,7 +1668,7 @@ TEST_P(TransactionTest, TwoPhaseSequenceTest) { // kill and reopen fault_fs->SetFilesystemActive(false); - ReOpenNoDelete(); + ASSERT_OK(ReOpenNoDelete()); assert(db != nullptr); // value is now available @@ -1706,7 +1706,7 @@ TEST_P(TransactionTest, TwoPhaseDoubleRecoveryTest) { // kill and reopen fault_fs->SetFilesystemActive(false); reinterpret_cast(db)->TEST_Crash(); - ReOpenNoDelete(); + ASSERT_OK(ReOpenNoDelete()); // commit old txn assert(db != nullptr); // Make clang analyze happy. @@ -2186,9 +2186,9 @@ TEST_P(TransactionTest, WriteConflictTest) { s = txn->Commit(); ASSERT_OK(s); - db->Get(read_options, "foo", &value); + ASSERT_OK(db->Get(read_options, "foo", &value)); ASSERT_EQ(value, "A2"); - db->Get(read_options, "foo2", &value); + ASSERT_OK(db->Get(read_options, "foo2", &value)); ASSERT_EQ(value, "B2"); delete txn; @@ -2230,13 +2230,13 @@ TEST_P(TransactionTest, WriteConflictTest2) { ASSERT_OK(s); // Txn should commit, but only write foo2 and foo3 // Verify that transaction wrote foo2 and foo3 but not foo - db->Get(read_options, "foo", &value); + ASSERT_OK(db->Get(read_options, "foo", &value)); ASSERT_EQ(value, "barz"); - db->Get(read_options, "foo2", &value); + ASSERT_OK(db->Get(read_options, "foo2", &value)); ASSERT_EQ(value, "X"); - db->Get(read_options, "foo3", &value); + ASSERT_OK(db->Get(read_options, "foo3", &value)); ASSERT_EQ(value, "Y"); delete txn; @@ -2328,13 +2328,13 @@ TEST_P(TransactionTest, FlushTest) { // force a memtable flush FlushOptions flush_ops; - db->Flush(flush_ops); + ASSERT_OK(db->Flush(flush_ops)); s = txn->Commit(); // txn should commit since the flushed table is still in MemtableList History ASSERT_OK(s); - db->Get(read_options, "foo", &value); + ASSERT_OK(db->Get(read_options, "foo", &value)); ASSERT_EQ(value, "bar2"); delete txn; @@ -6023,7 +6023,7 @@ TEST_P(TransactionTest, DuplicateKeys) { cf_options.max_successive_merges = 2; cf_options.merge_operator = MergeOperators::CreateStringAppendOperator(); ASSERT_OK(ReOpen()); - db->CreateColumnFamily(cf_options, cf_name, &cf_handle); + ASSERT_OK(db->CreateColumnFamily(cf_options, cf_name, &cf_handle)); WriteOptions write_options; // Ensure one value for the key ASSERT_OK(db->Put(write_options, cf_handle, Slice("key"), Slice("value"))); diff --git a/utilities/transactions/transaction_test.h b/utilities/transactions/transaction_test.h index e9f53f250..3b72f21c9 100644 --- a/utilities/transactions/transaction_test.h +++ b/utilities/transactions/transaction_test.h @@ -109,7 +109,7 @@ class TransactionTestBase : public ::testing::Test { delete db; db = nullptr; fault_fs->AssertNoOpenFile(); - fault_fs->DropUnsyncedFileData(); + EXPECT_OK(fault_fs->DropUnsyncedFileData()); fault_fs->ResetState(); Status s; if (use_stackable_db_ == false) { @@ -130,7 +130,7 @@ class TransactionTestBase : public ::testing::Test { delete db; db = nullptr; fault_fs->AssertNoOpenFile(); - fault_fs->DropUnsyncedFileData(); + EXPECT_OK(fault_fs->DropUnsyncedFileData()); fault_fs->ResetState(); Status s; if (use_stackable_db_ == false) { @@ -146,7 +146,7 @@ class TransactionTestBase : public ::testing::Test { Status ReOpen() { delete db; db = nullptr; - DestroyDB(dbname, options); + EXPECT_OK(DestroyDB(dbname, options)); Status s; if (use_stackable_db_ == false) { s = TransactionDB::Open(options, txn_db_options, dbname, &db); diff --git a/utilities/transactions/write_prepared_transaction_test.cc b/utilities/transactions/write_prepared_transaction_test.cc index 4041b6ad8..8427e2d7c 100644 --- a/utilities/transactions/write_prepared_transaction_test.cc +++ b/utilities/transactions/write_prepared_transaction_test.cc @@ -1345,7 +1345,7 @@ TEST_P(WritePreparedTransactionTest, NewSnapshotLargerThanMax) { // Check that the new max has not advanced the last seq ASSERT_LT(wp_db->max_evicted_seq_.load(), last_seq); for (auto txn : txns) { - txn->Rollback(); + ASSERT_OK(txn->Rollback()); delete txn; } } diff --git a/utilities/ttl/ttl_test.cc b/utilities/ttl/ttl_test.cc index 225db59b5..723de25e0 100644 --- a/utilities/ttl/ttl_test.cc +++ b/utilities/ttl/ttl_test.cc @@ -652,7 +652,7 @@ TEST_F(TtlTest, ColumnFamiliesTest) { options.create_if_missing = true; options.env = env_.get(); - DB::Open(options, dbname_, &db); + ASSERT_OK(DB::Open(options, dbname_, &db)); ColumnFamilyHandle* handle; ASSERT_OK(db->CreateColumnFamily(ColumnFamilyOptions(options), "ttl_column_family", &handle)); diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index 7330c82ca..b438d7d23 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -261,14 +261,14 @@ class WBWIBaseTest : public testing::Test { std::string result; for (size_t i = 0; i < key.size(); i++) { if (key[i] == 'd') { - batch_->Delete(cf, key); + EXPECT_OK(batch_->Delete(cf, key)); result = ""; } else if (key[i] == 'p') { result = key + std::to_string(i); - batch_->Put(cf, key, result); + EXPECT_OK(batch_->Put(cf, key, result)); } else if (key[i] == 'm') { std::string value = key + std::to_string(i); - batch_->Merge(cf, key, value); + EXPECT_OK(batch_->Merge(cf, key, value)); if (result.empty()) { result = value; } else { @@ -1243,7 +1243,7 @@ TEST_F(WBWIOverwriteTest, TestGetFromBatchMerge2) { s = batch_->GetFromBatch(column_family, options_, "X", &value); ASSERT_TRUE(s.IsNotFound()); - batch_->Merge(column_family, "X", "ddd"); + ASSERT_OK(batch_->Merge(column_family, "X", "ddd")); ASSERT_OK(batch_->GetFromBatch(column_family, options_, "X", &value)); ASSERT_EQ("ddd", value); } @@ -2100,8 +2100,8 @@ TEST_P(WriteBatchWithIndexTest, GetFromBatchAfterMerge) { ASSERT_OK(OpenDB()); ASSERT_OK(db_->Put(write_opts_, "o", "aa")); - batch_->Merge("o", "bb"); // Merging bb under key "o" - batch_->Merge("m", "cc"); // Merging bc under key "m" + ASSERT_OK(batch_->Merge("o", "bb")); // Merging bb under key "o" + ASSERT_OK(batch_->Merge("m", "cc")); // Merging bc under key "m" s = batch_->GetFromBatch(options_, "m", &value); ASSERT_EQ(s.code(), Status::Code::kMergeInProgress); s = batch_->GetFromBatch(options_, "o", &value); From a85eccc6d6837f5ffb69427eb4074e13fa0dde10 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 10 Aug 2023 13:05:45 -0700 Subject: [PATCH 031/386] Adjust db_stress handling of TryAgain from optimistic txn (#11691) Summary: We're still getting some rare cases of 5x TryAgains in a row. Here I'm boosting the failure threshold to 10 in a row and adding more info in the output, to help us manually verify whether there's anything suspicous about the sequence of TryAgains, such as if Rollback failed to reset to new sequence numbers. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11691 Test Plan: By lowering the threshold to 2 and adjusting some other db_crashtest parameters, I was able to hit my new code and saw fresh sequence number on the subsequent TryAgain. Reviewed By: cbi42 Differential Revision: D48236153 Pulled By: pdillinger fbshipit-source-id: c0530e969ddcf8de7348e5cf7daf5d6d5dec24f4 --- db_stress_tool/db_stress_test_base.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 66a630b3d..2c3d89c80 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -695,6 +695,7 @@ Status StressTest::ExecuteTransaction( std::function&& ops) { std::unique_ptr txn; Status s = NewTxn(write_opts, &txn); + std::string try_again_messages; if (s.ok()) { for (int tries = 1;; ++tries) { s = ops(*txn); @@ -705,11 +706,21 @@ Status StressTest::ExecuteTransaction( } } // Optimistic txn might return TryAgain, in which case rollback - // and try again. But that shouldn't happen too many times in a row. + // and try again. if (!s.IsTryAgain() || !FLAGS_use_optimistic_txn) { break; } - if (tries >= 5) { + // Record and report historical TryAgain messages for debugging + try_again_messages += + std::to_string(SystemClock::Default()->NowMicros() / 1000); + try_again_messages += "ms "; + try_again_messages += s.getState(); + try_again_messages += "\n"; + // In theory, each Rollback after TryAgain should have an independent + // chance of success, so too many retries could indicate something is + // not working properly. + if (tries >= 10) { + s = Status::TryAgain(try_again_messages); break; } s = txn->Rollback(); From 36f48d16a8a14fb3bb22c6d9ed5ba4ad1f01bc50 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 10 Aug 2023 15:37:28 -0700 Subject: [PATCH 032/386] Add consistent ways to access the builtin UDT comparators (#11690) Summary: Expose the functions that creates these UDT aware comparators so that users can create all the RocksDB builtin comparators in consistent ways. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11690 Reviewed By: ltamasi Differential Revision: D48212021 Pulled By: jowlyzhang fbshipit-source-id: a17a9a11e36e4267551e193f1b22647414acf467 --- include/rocksdb/comparator.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/include/rocksdb/comparator.h b/include/rocksdb/comparator.h index ad1e71a11..d0ac9f1f4 100644 --- a/include/rocksdb/comparator.h +++ b/include/rocksdb/comparator.h @@ -155,10 +155,24 @@ class Comparator : public Customizable, public CompareInterface { // Return a builtin comparator that uses lexicographic byte-wise // ordering. The result remains the property of this module and // must not be deleted. -extern const Comparator* BytewiseComparator(); +const Comparator* BytewiseComparator(); // Return a builtin comparator that uses reverse lexicographic byte-wise // ordering. -extern const Comparator* ReverseBytewiseComparator(); +const Comparator* ReverseBytewiseComparator(); + +// Returns a builtin comparator that enables user-defined timestamps (formatted +// as uint64_t) while ordering the user key part without UDT with a +// BytewiseComparator. +// For the same user key with different timestamps, larger (newer) timestamp +// comes first. +const Comparator* BytewiseComparatorWithU64Ts(); + +// Returns a builtin comparator that enables user-defined timestamps (formatted +// as uint64_t) while ordering the user key part without UDT with a +// ReverseBytewiseComparator. +// For the same user key with different timestamps, larger (newer) timestamp +// comes first. +const Comparator* ReverseBytewiseComparatorWithU64Ts(); } // namespace ROCKSDB_NAMESPACE From 66643b8106ba72f5a266d9b06b65aaafb507911c Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Thu, 10 Aug 2023 18:16:10 -0700 Subject: [PATCH 033/386] PutEntity Support in SST File Writer (#11688) Summary: RocksDB provides APIs that enable creating SST files offline and then bulk loading them into the LSM tree quickly using metadata operations. Namely, clients can use the `SstFileWriter` class for the offline data preparation and then the IngestExternalFile family of APIs to perform the bulk loading. However, `SstFileWriter` currently does not support creating files with wide-column data in them. This PR adds `PutEntity` API implementation to `SstFileWriter` to support creating files with wide-column data. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11688 Test Plan: - `BasicWideColumn` test added in external_sst_file_test Reviewed By: ltamasi Differential Revision: D48243779 Pulled By: jaykorean fbshipit-source-id: 1697e5bd67121a648c03946f867416a94be0cadf --- db/external_sst_file_test.cc | 50 +++++++++++++++++++ include/rocksdb/sst_file_writer.h | 6 ++- table/block_based/block.cc | 9 ++-- table/sst_file_writer.cc | 27 +++++++++- .../put_entity_support_in_sst_file_writer.md | 1 + 5 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 unreleased_history/new_features/put_entity_support_in_sst_file_writer.md diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index 89fb0419a..4507b16c2 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -538,6 +538,56 @@ TEST_F(ExternalSSTFileTest, Basic) { kRangeDelSkipConfigs)); } +TEST_F(ExternalSSTFileTest, BasicWideColumn) { + do { + Options options = CurrentOptions(); + + SstFileWriter sst_file_writer(EnvOptions(), options); + + // Current file size should be 0 after sst_file_writer init and before open + // a file. + ASSERT_EQ(sst_file_writer.FileSize(), 0); + + std::string file = sst_files_dir_ + "wide_column_file.sst"; + ASSERT_OK(sst_file_writer.Open(file)); + for (int k = 0; k < 10; k++) { + std::string val1 = Key(k) + "_attr_1_val"; + std::string val2 = Key(k) + "_attr_2_val"; + WideColumns columns{{"attr_1", val1}, {"attr_2", val2}}; + ASSERT_OK(sst_file_writer.PutEntity(Key(k), columns)); + } + ExternalSstFileInfo file_info; + ASSERT_OK(sst_file_writer.Finish(&file_info)); + + // Current file size should be non-zero after success write. + ASSERT_GT(sst_file_writer.FileSize(), 0); + + ASSERT_EQ(file_info.file_path, file); + ASSERT_EQ(file_info.num_entries, 10); + ASSERT_EQ(file_info.smallest_key, Key(0)); + ASSERT_EQ(file_info.largest_key, Key(9)); + ASSERT_EQ(file_info.num_range_del_entries, 0); + ASSERT_EQ(file_info.smallest_range_del_key, ""); + ASSERT_EQ(file_info.largest_range_del_key, ""); + + DestroyAndReopen(options); + // Add file using file path + ASSERT_OK(DeprecatedAddFile({file})); + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); + for (int k = 0; k < 10; k++) { + PinnableWideColumns result; + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), + Key(k), &result)); + std::string val1 = Key(k) + "_attr_1_val"; + std::string val2 = Key(k) + "_attr_2_val"; + WideColumns expected_columns{{"attr_1", val1}, {"attr_2", val2}}; + ASSERT_EQ(result.columns(), expected_columns); + } + + } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction | + kRangeDelSkipConfigs)); +} + class SstFileWriterCollector : public TablePropertiesCollector { public: explicit SstFileWriterCollector(const std::string prefix) : prefix_(prefix) { diff --git a/include/rocksdb/sst_file_writer.h b/include/rocksdb/sst_file_writer.h index fb2806865..a17e575b7 100644 --- a/include/rocksdb/sst_file_writer.h +++ b/include/rocksdb/sst_file_writer.h @@ -13,6 +13,7 @@ #include "rocksdb/options.h" #include "rocksdb/table_properties.h" #include "rocksdb/types.h" +#include "rocksdb/wide_columns.h" #if defined(__GNUC__) || defined(__clang__) #define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__)) @@ -127,6 +128,10 @@ class SstFileWriter { // REQUIRES: timestamp's size is equal to what is expected by the comparator. Status Put(const Slice& user_key, const Slice& timestamp, const Slice& value); + // Add a PutEntity (key with the wide-column entity defined by "columns") to + // the currently opened file + Status PutEntity(const Slice& user_key, const WideColumns& columns); + // Add a Merge key with value to currently opened file // REQUIRES: user_key is after any previously added point (Put/Merge/Delete) // key according to the comparator. @@ -186,4 +191,3 @@ class SstFileWriter { std::unique_ptr rep_; }; } // namespace ROCKSDB_NAMESPACE - diff --git a/table/block_based/block.cc b/table/block_based/block.cc index 13e3397a1..9bebdfbdc 100644 --- a/table/block_based/block.cc +++ b/table/block_based/block.cc @@ -671,7 +671,8 @@ bool DataBlockIter::ParseNextDataKey(bool* is_shared) { // If we are reading a file with a global sequence number we should // expect that all encoded sequence numbers are zeros and any value // type is kTypeValue, kTypeMerge, kTypeDeletion, - // kTypeDeletionWithTimestamp, or kTypeRangeDeletion. + // kTypeDeletionWithTimestamp, kTypeRangeDeletion, or + // kTypeWideColumnEntity. uint64_t packed = ExtractInternalKeyFooter(raw_key_.GetKey()); SequenceNumber seqno; ValueType value_type; @@ -680,7 +681,8 @@ bool DataBlockIter::ParseNextDataKey(bool* is_shared) { value_type == ValueType::kTypeMerge || value_type == ValueType::kTypeDeletion || value_type == ValueType::kTypeDeletionWithTimestamp || - value_type == ValueType::kTypeRangeDeletion); + value_type == ValueType::kTypeRangeDeletion || + value_type == ValueType::kTypeWideColumnEntity); assert(seqno == 0); } #endif // NDEBUG @@ -736,7 +738,8 @@ void IndexBlockIter::DecodeCurrentValue(bool is_shared) { assert(value_type == ValueType::kTypeValue || value_type == ValueType::kTypeMerge || value_type == ValueType::kTypeDeletion || - value_type == ValueType::kTypeRangeDeletion); + value_type == ValueType::kTypeRangeDeletion || + value_type == ValueType::kTypeWideColumnEntity); first_internal_key.UpdateInternalKey(global_seqno_state_->global_seqno, value_type); diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index d187b741e..b929a7e28 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -9,6 +9,7 @@ #include "db/db_impl/db_impl.h" #include "db/dbformat.h" +#include "db/wide/wide_column_serialization.h" #include "file/writable_file_writer.h" #include "rocksdb/file_system.h" #include "rocksdb/table.h" @@ -81,7 +82,8 @@ struct SstFileWriter::Rep { assert(value_type == kTypeValue || value_type == kTypeMerge || value_type == kTypeDeletion || - value_type == kTypeDeletionWithTimestamp); + value_type == kTypeDeletionWithTimestamp || + value_type == kTypeWideColumnEntity); constexpr SequenceNumber sequence_number = 0; @@ -130,6 +132,24 @@ struct SstFileWriter::Rep { return AddImpl(user_key_with_ts, value, value_type); } + Status AddEntity(const Slice& user_key, const WideColumns& columns) { + WideColumns sorted_columns(columns); + std::sort(sorted_columns.begin(), sorted_columns.end(), + [](const WideColumn& lhs, const WideColumn& rhs) { + return lhs.name().compare(rhs.name()) < 0; + }); + + std::string entity; + const Status s = WideColumnSerialization::Serialize(sorted_columns, entity); + if (!s.ok()) { + return s; + } + if (entity.size() > size_t{std::numeric_limits::max()}) { + return Status::InvalidArgument("wide column entity is too large"); + } + return Add(user_key, entity, kTypeWideColumnEntity); + } + Status DeleteRangeImpl(const Slice& begin_key, const Slice& end_key) { if (!builder) { return Status::InvalidArgument("File is not opened"); @@ -371,6 +391,11 @@ Status SstFileWriter::Put(const Slice& user_key, const Slice& timestamp, return rep_->Add(user_key, timestamp, value, ValueType::kTypeValue); } +Status SstFileWriter::PutEntity(const Slice& user_key, + const WideColumns& columns) { + return rep_->AddEntity(user_key, columns); +} + Status SstFileWriter::Merge(const Slice& user_key, const Slice& value) { return rep_->Add(user_key, value, ValueType::kTypeMerge); } diff --git a/unreleased_history/new_features/put_entity_support_in_sst_file_writer.md b/unreleased_history/new_features/put_entity_support_in_sst_file_writer.md new file mode 100644 index 000000000..7420a67fa --- /dev/null +++ b/unreleased_history/new_features/put_entity_support_in_sst_file_writer.md @@ -0,0 +1 @@ +Add PutEntity API in sst_file_writer From 17b33c8b2f4d3485aa7fc461e468997e0d52b581 Mon Sep 17 00:00:00 2001 From: nikoPLP <57370730+nikoPLP@users.noreply.github.com> Date: Fri, 11 Aug 2023 10:59:49 -0700 Subject: [PATCH 034/386] fix CXX not initialized early enough in Makefile on openbsd + platform version 10.14 on macos (#11675) Summary: fixes https://github.com/facebook/rocksdb/issues/11220 fixes https://github.com/facebook/rocksdb/issues/11594 CXX is not initialized early enough in Makefile. On OpenBSD its value is `g++` at first, and this results in several `command not found`, notably during the tests for HAVE_POWER8 and HAS_ALTIVEC which results in the build problem mentionned in https://github.com/facebook/rocksdb/issues/11594 reordering the Makefile fixes the issue, by placing the creation of make_config.mk and its import before any use of `$(CXX)` Also, fixes the platofrm version for macos. it must be 10.14 now that rocksdb is using the C++17 standard Pull Request resolved: https://github.com/facebook/rocksdb/pull/11675 Reviewed By: cbi42 Differential Revision: D48101615 Pulled By: ajkr fbshipit-source-id: 1f1b4d4604480b31675140b92c6fe97dc55b8c75 --- Makefile | 38 +++++++++++++++---------------- build_tools/build_detect_platform | 12 +++++----- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index a1ea379d7..41ca0ee28 100644 --- a/Makefile +++ b/Makefile @@ -88,6 +88,25 @@ endif $(info $$DEBUG_LEVEL is $(DEBUG_LEVEL), $$LIB_MODE is $(LIB_MODE)) +# Detect what platform we're building on. +# Export some common variables that might have been passed as Make variables +# instead of environment variables. +dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ + export CXXFLAGS="$(EXTRA_CXXFLAGS)"; \ + export LDFLAGS="$(EXTRA_LDFLAGS)"; \ + export COMPILE_WITH_ASAN="$(COMPILE_WITH_ASAN)"; \ + export COMPILE_WITH_TSAN="$(COMPILE_WITH_TSAN)"; \ + export COMPILE_WITH_UBSAN="$(COMPILE_WITH_UBSAN)"; \ + export PORTABLE="$(PORTABLE)"; \ + export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \ + export USE_CLANG="$(USE_CLANG)"; \ + export LIB_MODE="$(LIB_MODE)"; \ + export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \ + export USE_FOLLY="$(USE_FOLLY)"; \ + "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) +# this file is generated by the previous line to set build flags and sources +include make_config.mk + # Figure out optimize level. ifneq ($(DEBUG_LEVEL), 2) OPTIMIZE_LEVEL ?= -O2 @@ -223,25 +242,6 @@ am__v_AR_1 = AM_LINK = $(AM_V_CCLD)$(CXX) -L. $(patsubst lib%.a, -l%, $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^)) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) AM_SHARE = $(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$@ -L. $(patsubst lib%.$(PLATFORM_SHARED_EXT), -l%, $^) $(EXEC_LDFLAGS) $(LDFLAGS) -o $@ -# Detect what platform we're building on. -# Export some common variables that might have been passed as Make variables -# instead of environment variables. -dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ - export CXXFLAGS="$(EXTRA_CXXFLAGS)"; \ - export LDFLAGS="$(EXTRA_LDFLAGS)"; \ - export COMPILE_WITH_ASAN="$(COMPILE_WITH_ASAN)"; \ - export COMPILE_WITH_TSAN="$(COMPILE_WITH_TSAN)"; \ - export COMPILE_WITH_UBSAN="$(COMPILE_WITH_UBSAN)"; \ - export PORTABLE="$(PORTABLE)"; \ - export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \ - export USE_CLANG="$(USE_CLANG)"; \ - export LIB_MODE="$(LIB_MODE)"; \ - export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \ - export USE_FOLLY="$(USE_FOLLY)"; \ - "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) -# this file is generated by the previous line to set build flags and sources -include make_config.mk - ROCKSDB_PLUGIN_MKS = $(foreach plugin, $(ROCKSDB_PLUGINS), plugin/$(plugin)/*.mk) include $(ROCKSDB_PLUGIN_MKS) ROCKSDB_PLUGIN_PROTO =ROCKSDB_NAMESPACE::ObjectLibrary\&, const std::string\& diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index f7e9c866b..a9a49e23a 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -674,13 +674,13 @@ else fi if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then - # For portability compile for macOS 10.13 (2017) or newer - COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.13" - PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.13" + # For portability compile for macOS 10.14 or newer + COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.14" + PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.14" # -mmacosx-version-min must come first here. - PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.13 $PLATFORM_SHARED_LDFLAGS" - PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13" - JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.13" + PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.14 $PLATFORM_SHARED_LDFLAGS" + PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.14" + JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.14" JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS" From 7cdbce4564fd7b9522f7f23021cc947c788e419c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 11 Aug 2023 11:26:38 -0700 Subject: [PATCH 035/386] Add UDT support in API DB::GetApproximateMemTableStats (#11689) Summary: This API should consider the case when user-defined timestamp is enabled. Also added some documentation to some related API to clarify the usage in the case when user-defined timestamp is enabled. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11689 Test Plan: Unit test added ``` make check ./db_with_timestamp_basic_test --gtest_filter=*GetApproximateSizes* ``` Reviewed By: ltamasi Differential Revision: D48208568 Pulled By: jowlyzhang fbshipit-source-id: c5baa4a2923441f8ea3a3672c98223a43a3428dc --- db/db_impl/db_impl.cc | 44 ++++++++++++++++++++---------- db/db_with_timestamp_basic_test.cc | 7 +++++ include/rocksdb/db.h | 6 ++++ 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 859f3144a..0567c6eff 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -149,6 +149,24 @@ void DumpSupportInfo(Logger* logger) { ROCKS_LOG_HEADER(logger, "DMutex implementation: %s", DMutex::kName()); } + +// `start` is the inclusive lower user key bound without user-defined timestamp +// `limit` is the exclusive upper user key bound without user-defined timestamp +std::tuple MaybeAddTimestampsToRange(const Slice& start, + const Slice& limit, + size_t ts_sz, + std::string* start_with_ts, + std::string* limit_with_ts) { + if (ts_sz == 0) { + return std::make_tuple(start, limit); + } + // Maximum timestamp means including all key with any timestamp + AppendKeyWithMaxTimestamp(start_with_ts, start, ts_sz); + // Append a maximum timestamp as the range limit is exclusive: + // [start, limit) + AppendKeyWithMaxTimestamp(limit_with_ts, limit, ts_sz); + return std::make_tuple(Slice(*start_with_ts), Slice(*limit_with_ts)); +} } // namespace DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, @@ -4275,9 +4293,17 @@ void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family, ColumnFamilyData* cfd = cfh->cfd(); SuperVersion* sv = GetAndRefSuperVersion(cfd); + const Comparator* const ucmp = column_family->GetComparator(); + assert(ucmp); + size_t ts_sz = ucmp->timestamp_size(); + + // Add timestamp if needed + std::string start_with_ts, limit_with_ts; + auto [start, limit] = MaybeAddTimestampsToRange( + range.start, range.limit, ts_sz, &start_with_ts, &limit_with_ts); // Convert user_key into a corresponding internal key. - InternalKey k1(range.start, kMaxSequenceNumber, kValueTypeForSeek); - InternalKey k2(range.limit, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k1(start, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(limit, kMaxSequenceNumber, kValueTypeForSeek); MemTable::MemTableStats memStats = sv->mem->ApproximateStats(k1.Encode(), k2.Encode()); MemTable::MemTableStats immStats = @@ -4308,20 +4334,10 @@ Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options, // TODO: plumb Env::IOActivity const ReadOptions read_options; for (int i = 0; i < n; i++) { - Slice start = range[i].start; - Slice limit = range[i].limit; - // Add timestamp if needed std::string start_with_ts, limit_with_ts; - if (ts_sz > 0) { - // Maximum timestamp means including all key with any timestamp - AppendKeyWithMaxTimestamp(&start_with_ts, start, ts_sz); - // Append a maximum timestamp as the range limit is exclusive: - // [start, limit) - AppendKeyWithMaxTimestamp(&limit_with_ts, limit, ts_sz); - start = start_with_ts; - limit = limit_with_ts; - } + auto [start, limit] = MaybeAddTimestampsToRange( + range[i].start, range[i].limit, ts_sz, &start_with_ts, &limit_with_ts); // Convert user_key into a corresponding internal key. InternalKey k1(start, kMaxSequenceNumber, kValueTypeForSeek); InternalKey k2(limit, kMaxSequenceNumber, kValueTypeForSeek); diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 202c4c345..2572bb892 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -459,6 +459,13 @@ TEST_F(DBBasicTestWithTimestamp, GetApproximateSizes) { db_->GetApproximateSizes(size_approx_options, default_cf, &r, 1, &size)); ASSERT_GT(size, 0); + uint64_t total_mem_count; + uint64_t total_mem_size; + db_->GetApproximateMemTableStats(default_cf, r, &total_mem_count, + &total_mem_size); + ASSERT_GT(total_mem_count, 0); + ASSERT_GT(total_mem_size, 0); + // Should exclude end key start = Key(900); end = Key(1000); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 436b430f1..25f745356 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -100,6 +100,8 @@ static const int kMinorVersion = __ROCKSDB_MINOR__; // A range of keys struct Range { + // In case of user_defined timestamp, if enabled, `start` and `limit` should + // point to key without timestamp part. Slice start; Slice limit; @@ -108,6 +110,8 @@ struct Range { }; struct RangePtr { + // In case of user_defined timestamp, if enabled, `start` and `limit` should + // point to key without timestamp part. const Slice* start; const Slice* limit; @@ -1352,6 +1356,8 @@ class DB { // the files. In this case, client could set options.change_level to true, to // move the files back to the minimum level capable of holding the data set // or a given level (specified by non-negative options.target_level). + // In case of user_defined timestamp, if enabled, `start` and `end` should + // point to key without timestamp part. virtual Status CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) = 0; From 52816ff64d8623c9263a9b99ffdae26f42785b35 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 11 Aug 2023 12:30:48 -0700 Subject: [PATCH 036/386] Close DB option in WaitForCompact() (#11497) Summary: Context: As mentioned in https://github.com/facebook/rocksdb/issues/11436, introducing `close_db` option in `WaitForCompactOptions` to close DB after waiting for compactions to finish. Must be set to true to close the DB upon compactions finishing. 1. `bool close_db = false` added to `WaitForCompactOptions` 2. Introduced `CancelPeriodicTaskSchedulers()` and moved unregistering PeriodicTaskSchedulers to it.`CancelAllBackgroundWork()` calls it now. 3. When close_db option is on, unpersisted data (data in memtable when WAL is disabled) will be flushed in `WaitForCompact()` if flush option is not on (and `mutable_db_options_.avoid_flush_during_shutdown` is not true). The unpersisted data flush in `CancelAllBackgroundWork()` will be skipped because `shutting_down_` flag will be set true before calling `Close()`. 4. Atomic boolean `reject_new_background_jobs_` is introduced to prevent new background jobs from being added during the short period of time after waiting is done and before `shutting_down_` is set by `Close()`. 5. `WaitForCompact()` now waits for recovery in progress to complete as well. (flush operations from WAL -> L0 files) 6. Added `close_db_` cases to all existing `WaitForCompactTests` 7. Added a scenario to `DBBasicTest::DBClose` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11497 Test Plan: - Existing DBCompactionTests - `WaitForCompactWithOptionToFlushAndCloseDB` added - Added a scenario to `DBBasicTest::DBClose` Reviewed By: pdillinger, jowlyzhang Differential Revision: D46337560 Pulled By: jaykorean fbshipit-source-id: 0f8c7ee09394847f2af5ea4bdd331b47bcdef0b0 --- db/db_basic_test.cc | 20 +++- db/db_compaction_test.cc | 104 +++++++++++++++--- db/db_impl/db_impl.cc | 63 ++++------- db/db_impl/db_impl.h | 8 ++ db/db_impl/db_impl_compaction_flush.cc | 28 ++++- include/rocksdb/db.h | 5 + include/rocksdb/options.h | 7 ++ .../wait_for_compact_close_db_option.md | 1 + 8 files changed, 175 insertions(+), 61 deletions(-) create mode 100644 unreleased_history/new_features/wait_for_compact_close_db_option.md diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 063b99839..15cc5d3f5 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1204,9 +1204,23 @@ TEST_F(DBBasicTest, DBClose) { delete db; ASSERT_EQ(env->GetCloseCount(), 2); + // close by WaitForCompact() with close_db option + options.create_if_missing = false; + s = DB::Open(options, dbname, &db); + ASSERT_OK(s); + ASSERT_TRUE(db != nullptr); + WaitForCompactOptions wait_for_compact_options = WaitForCompactOptions(); + wait_for_compact_options.close_db = true; + s = db->WaitForCompact(wait_for_compact_options); + ASSERT_EQ(env->GetCloseCount(), 3); + // see TestLogger::CloseHelper() + ASSERT_EQ(s, Status::IOError()); + + delete db; + ASSERT_EQ(env->GetCloseCount(), 3); + // Provide our own logger and ensure DB::Close() does not close it options.info_log.reset(new TestEnv::TestLogger(env)); - options.create_if_missing = false; s = DB::Open(options, dbname, &db); ASSERT_OK(s); ASSERT_TRUE(db != nullptr); @@ -1214,9 +1228,9 @@ TEST_F(DBBasicTest, DBClose) { s = db->Close(); ASSERT_EQ(s, Status::OK()); delete db; - ASSERT_EQ(env->GetCloseCount(), 2); - options.info_log.reset(); ASSERT_EQ(env->GetCloseCount(), 3); + options.info_log.reset(); + ASSERT_EQ(env->GetCloseCount(), 4); } TEST_F(DBBasicTest, DBCloseAllDirectoryFDs) { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index d71561283..d6f9f25fb 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -155,15 +155,17 @@ class DBCompactionDirectIOTest : public DBCompactionTest, class DBCompactionWaitForCompactTest : public DBTestBase, - public testing::WithParamInterface> { + public testing::WithParamInterface> { public: DBCompactionWaitForCompactTest() : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { abort_on_pause_ = std::get<0>(GetParam()); flush_ = std::get<1>(GetParam()); + close_db_ = std::get<2>(GetParam()); } bool abort_on_pause_; bool flush_; + bool close_db_; Options options_; WaitForCompactOptions wait_for_compact_options_; @@ -179,6 +181,7 @@ class DBCompactionWaitForCompactTest wait_for_compact_options_ = WaitForCompactOptions(); wait_for_compact_options_.abort_on_pause = abort_on_pause_; wait_for_compact_options_.flush = flush_; + wait_for_compact_options_.close_db = close_db_; DestroyAndReopen(options_); @@ -3333,10 +3336,8 @@ TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) { INSTANTIATE_TEST_CASE_P(DBCompactionWaitForCompactTest, DBCompactionWaitForCompactTest, - ::testing::Values(std::make_tuple(false, false), - std::make_tuple(false, true), - std::make_tuple(true, false), - std::make_tuple(true, true))); + ::testing::Combine(testing::Bool(), testing::Bool(), + testing::Bool())); TEST_P(DBCompactionWaitForCompactTest, WaitForCompactWaitsOnCompactionToFinish) { @@ -3476,19 +3477,19 @@ TEST_P(DBCompactionWaitForCompactTest, WaitForCompactWithOptionToFlush) { ASSERT_EQ("2", FilesPerLevel()); ASSERT_OK(dbfull()->WaitForCompact(wait_for_compact_options_)); - if (flush_) { - ASSERT_EQ("1,2", FilesPerLevel()); - ASSERT_EQ(1, compaction_finished); - ASSERT_EQ(1, flush_finished); - } else { - ASSERT_EQ(0, compaction_finished); - ASSERT_EQ(0, flush_finished); - ASSERT_EQ("2", FilesPerLevel()); + ASSERT_EQ(flush_, compaction_finished); + ASSERT_EQ(flush_, flush_finished); + + if (!close_db_) { + std::string expected_files_per_level = flush_ ? "1,2" : "2"; + ASSERT_EQ(expected_files_per_level, FilesPerLevel()); } compaction_finished = 0; flush_finished = 0; - Close(); + if (!close_db_) { + Close(); + } Reopen(options_); ASSERT_EQ(0, flush_finished); @@ -3503,7 +3504,80 @@ TEST_P(DBCompactionWaitForCompactTest, WaitForCompactWithOptionToFlush) { ASSERT_EQ(1, compaction_finished); } - ASSERT_EQ("1,2", FilesPerLevel()); + if (!close_db_) { + ASSERT_EQ("1,2", FilesPerLevel()); + } + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +TEST_P(DBCompactionWaitForCompactTest, + WaitForCompactWithOptionToFlushAndCloseDB) { + // After creating enough L0 files that one more file will trigger the + // compaction, write some data in memtable (WAL disabled). Calls + // WaitForCompact. If flush option is true, WaitForCompact will flush the + // memtable to a new L0 file which will trigger compaction. We expect the + // no-op second flush upon closing because WAL is disabled + // (has_unpersisted_data_ true) Check to make sure there's no extra L0 file + // created from WAL. Re-opening DB won't trigger any flush or compaction + + int compaction_finished = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:Finish", + [&](void*) { compaction_finished++; }); + + int flush_finished = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "FlushJob::End", [&](void*) { flush_finished++; }); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_FALSE(options_.avoid_flush_during_shutdown); + + // write to memtable, but no flush is needed at this point. + WriteOptions write_without_wal; + write_without_wal.disableWAL = true; + ASSERT_OK(Put(Key(0), "some random string", write_without_wal)); + ASSERT_EQ(0, compaction_finished); + ASSERT_EQ(0, flush_finished); + ASSERT_EQ("2", FilesPerLevel()); + + ASSERT_OK(dbfull()->WaitForCompact(wait_for_compact_options_)); + + int expected_flush_count = flush_ || close_db_; + ASSERT_EQ(expected_flush_count, flush_finished); + + if (!close_db_) { + // During CancelAllBackgroundWork(), a flush can be initiated due to + // unpersisted data (data that's still in the memtable when WAL is off). + // This results in an additional L0 file which can trigger a compaction. + // However, the compaction may not complete if the background thread's + // execution is slow enough for the front thread to set the 'shutting_down_' + // flag to true before the compaction job even starts. + ASSERT_EQ(expected_flush_count, compaction_finished); + Close(); + } + + // Because we had has_unpersisted_data_ = true, flush must have been triggered + // upon closing regardless of WaitForCompact. Reopen should have no flush + // debt. + flush_finished = 0; + Reopen(options_); + ASSERT_EQ(0, flush_finished); + + // However, if db was closed directly by calling Close(), instead + // of WaitForCompact with close_db option or we are in the scenario commented + // above, it's possible that the last compaction triggered by flushing + // unpersisted data was cancelled. Call WaitForCompact() here again to finish + // the compaction + if (compaction_finished == 0) { + ASSERT_OK(dbfull()->WaitForCompact(wait_for_compact_options_)); + } + ASSERT_EQ(1, compaction_finished); + if (!close_db_) { + ASSERT_EQ("1,2", FilesPerLevel()); + } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 0567c6eff..66d9be161 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -201,6 +201,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, batch_per_txn_(batch_per_txn), next_job_id_(1), shutting_down_(false), + reject_new_background_jobs_(false), db_lock_(nullptr), manual_compaction_paused_(false), bg_cv_(&mutex_), @@ -410,22 +411,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { FlushOptions flush_opts; // We allow flush to stall write since we are trying to resume from error. flush_opts.allow_write_stall = true; - if (immutable_db_options_.atomic_flush) { - mutex_.Unlock(); - s = AtomicFlushMemTables(flush_opts, context.flush_reason); - mutex_.Lock(); - } else { - for (auto cfd : versions_->GetRefedColumnFamilySet()) { - if (cfd->IsDropped()) { - continue; - } - InstrumentedMutexUnlock u(&mutex_); - s = FlushMemTable(cfd, flush_opts, context.flush_reason); - if (!s.ok()) { - break; - } - } - } + s = FlushAllColumnFamilies(flush_opts, context.flush_reason); if (!s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB resume requested but failed due to Flush failure [%s]", @@ -512,36 +498,14 @@ void DBImpl::WaitForBackgroundWork() { void DBImpl::CancelAllBackgroundWork(bool wait) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown: canceling all background work"); - - for (uint8_t task_type = 0; - task_type < static_cast(PeriodicTaskType::kMax); task_type++) { - Status s = periodic_task_scheduler_.Unregister( - static_cast(task_type)); - if (!s.ok()) { - ROCKS_LOG_WARN(immutable_db_options_.info_log, - "Failed to unregister periodic task %d, status: %s", - task_type, s.ToString().c_str()); - } - } - + CancelPeriodicTaskScheduler(); InstrumentedMutexLock l(&mutex_); if (!shutting_down_.load(std::memory_order_acquire) && has_unpersisted_data_.load(std::memory_order_relaxed) && !mutable_db_options_.avoid_flush_during_shutdown) { - if (immutable_db_options_.atomic_flush) { - mutex_.Unlock(); - Status s = AtomicFlushMemTables(FlushOptions(), FlushReason::kShutDown); - s.PermitUncheckedError(); //**TODO: What to do on error? - mutex_.Lock(); - } else { - for (auto cfd : versions_->GetRefedColumnFamilySet()) { - if (!cfd->IsDropped() && cfd->initialized() && !cfd->mem()->IsEmpty()) { - InstrumentedMutexUnlock u(&mutex_); - Status s = FlushMemTable(cfd, FlushOptions(), FlushReason::kShutDown); - s.PermitUncheckedError(); //**TODO: What to do on error? - } - } - } + Status s = + DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kShutDown); + s.PermitUncheckedError(); //**TODO: What to do on error? } shutting_down_.store(true, std::memory_order_release); @@ -890,6 +854,21 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() { return s; } +Status DBImpl::CancelPeriodicTaskScheduler() { + Status s = Status::OK(); + for (uint8_t task_type = 0; + task_type < static_cast(PeriodicTaskType::kMax); task_type++) { + s = periodic_task_scheduler_.Unregister( + static_cast(task_type)); + if (!s.ok()) { + ROCKS_LOG_WARN(immutable_db_options_.info_log, + "Failed to unregister periodic task %d, status: %s", + task_type, s.ToString().c_str()); + } + } + return s; +} + // esitmate the total size of stats_history_ size_t DBImpl::EstimateInMemoryStatsHistorySize() const { size_t size_total = diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 3711f66a3..b3a884783 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1349,6 +1349,11 @@ class DBImpl : public DB { std::atomic shutting_down_; + // No new background jobs can be queued if true. This is used to prevent new + // background jobs from being queued after WaitForCompact() completes waiting + // all background jobs then attempts to close when close_db_ option is true. + bool reject_new_background_jobs_; + // RecoveryContext struct stores the context about version edits along // with corresponding column_family_data and column_family_options. class RecoveryContext { @@ -2126,6 +2131,9 @@ class DBImpl : public DB { // Schedule background tasks Status StartPeriodicTaskScheduler(); + // Cancel scheduled periodic tasks + Status CancelPeriodicTaskScheduler(); + Status RegisterRecordSeqnoTimeWorker(); void PrintStatistics(); diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 9cde56061..157db28a1 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -2850,6 +2850,9 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue( void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) { mutex_.AssertHeld(); + if (reject_new_background_jobs_) { + return; + } if (flush_req.cfd_to_max_mem_id_to_persist.empty()) { return; } @@ -2879,6 +2882,9 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) { void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) { mutex_.AssertHeld(); + if (reject_new_background_jobs_) { + return; + } if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) { AddToCompactionQueue(cfd); ++unscheduled_compactions_; @@ -2888,6 +2894,9 @@ void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) { void DBImpl::SchedulePendingPurge(std::string fname, std::string dir_to_sync, FileType type, uint64_t number, int job_id) { mutex_.AssertHeld(); + if (reject_new_background_jobs_) { + return; + } PurgeFileInfo file_info(fname, dir_to_sync, type, number, job_id); purge_files_.insert({{number, std::move(file_info)}}); } @@ -4095,6 +4104,14 @@ Status DBImpl::WaitForCompact( if (!s.ok()) { return s; } + } else if (wait_for_compact_options.close_db && + has_unpersisted_data_.load(std::memory_order_relaxed) && + !mutable_db_options_.avoid_flush_during_shutdown) { + Status s = + DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kShutDown); + if (!s.ok()) { + return s; + } } TEST_SYNC_POINT("DBImpl::WaitForCompact:StartWaiting"); for (;;) { @@ -4106,9 +4123,18 @@ Status DBImpl::WaitForCompact( } if ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ || bg_flush_scheduled_ || unscheduled_compactions_ || - unscheduled_flushes_) && + unscheduled_flushes_ || error_handler_.IsRecoveryInProgress()) && (error_handler_.GetBGError().ok())) { bg_cv_.Wait(); + } else if (wait_for_compact_options.close_db) { + reject_new_background_jobs_ = true; + mutex_.Unlock(); + Status s = Close(); + mutex_.Lock(); + if (!s.ok()) { + reject_new_background_jobs_ = false; + } + return s; } else { return error_handler_.GetBGError(); } diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 25f745356..6c15213fa 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -326,12 +326,17 @@ class DB { // If syncing is required, the caller must first call SyncWAL(), or Write() // using an empty write batch with WriteOptions.sync=true. // Regardless of the return status, the DB must be freed. + // // If the return status is Aborted(), closing fails because there is // unreleased snapshot in the system. In this case, users can release // the unreleased snapshots and try again and expect it to succeed. For // other status, re-calling Close() will be no-op and return the original // close status. If the return status is NotSupported(), then the DB // implementation does cleanup in the destructor + // + // WaitForCompact() with WaitForCompactOptions.close_db=true will be a good + // choice for users who want to wait for background work before closing + // (rather than aborting and potentially redoing some work on re-open) virtual Status Close() { return Status::NotSupported(); } // ListColumnFamilies will open the DB specified by argument name diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 7fac24335..be42806f1 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -2125,6 +2125,13 @@ struct WaitForCompactOptions { // A boolean to flush all column families before starting to wait. bool flush = false; + + // A boolean to call Close() after waiting is done. By the time Close() is + // called here, there should be no background jobs in progress and no new + // background jobs should be added. DB may not have been closed if Close() + // returned Aborted status due to unreleased snapshots in the system. See + // comments in DB::Close() for details. + bool close_db = false; }; } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/new_features/wait_for_compact_close_db_option.md b/unreleased_history/new_features/wait_for_compact_close_db_option.md new file mode 100644 index 000000000..7fd48ff2e --- /dev/null +++ b/unreleased_history/new_features/wait_for_compact_close_db_option.md @@ -0,0 +1 @@ +Add close_db option to `WaitForCompactOptions` to call Close() after waiting is done. From 38ecfabed2db16799322c40f7e5c596b539cf2bc Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 11 Aug 2023 13:14:04 -0700 Subject: [PATCH 037/386] Remove comment about locking about TestIterateAgainstExpected (#11695) Summary: **Context/Summary** After https://github.com/facebook/rocksdb/pull/11058, we no longer lock the key range to iterate in TestIterateAgainstExpected, except for working with timestamp feature. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11695 Test Plan: no code change Reviewed By: ajkr Differential Revision: D48276668 Pulled By: hx235 fbshipit-source-id: dc92a3708b2281dc737c0877fb755548bf03a9fc --- db_stress_tool/no_batched_ops_stress.cc | 2 -- tools/db_crashtest.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index ed1b88b7b..c0159df4f 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1589,8 +1589,6 @@ class NonBatchedOpsStressTest : public StressTest { const int64_t ub = lb + num_iter; - // Lock the whole range over which we might iterate to ensure it doesn't - // change under us. const int rand_column_family = rand_column_families[0]; // Testing parallel read and write to the same key with user timestamp diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index add4351da..8ec54c9e6 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -346,7 +346,7 @@ def is_direct_io_supported(dbname): "write_buffer_size": 32 * 1024 * 1024, "level_compaction_dynamic_level_bytes": lambda: random.randint(0, 1), "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), - "verify_iterator_with_expected_state_one_in": 5, # this locks a range of keys + "verify_iterator_with_expected_state_one_in": 5, } blackbox_simple_default_params = { From ef6f0255634827b5a7172e99efe2534b8d4ebd2e Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 11 Aug 2023 16:27:38 -0700 Subject: [PATCH 038/386] Placeholder for AutoHyperClockCache, more (#11692) Summary: * The plan is for AutoHyperClockCache to be selected when HyperClockCacheOptions::estimated_entry_charge == 0, and in that case to use a new configuration option min_avg_entry_charge for determining an extreme case maximum size for the hash table. For the placeholder, a hack is in place in HyperClockCacheOptions::MakeSharedCache() to make the unit tests happy despite the new options not really making sense with the current implementation. * Mostly updating and refactoring tests to test both the current HCC (internal name FixedHyperClockCache) and a placeholder for the new version (internal name AutoHyperClockCache). * Simplify some existing tests not to depend directly on cache type. * Type-parameterize the shard-level unit tests, which unfortunately requires more syntax like `this->` in places for disambiguation. * Added means of choosing auto_hyper_clock_cache to cache_bench, db_bench, and db_stress, including add to crash test. * Add another templated class BaseHyperClockCache to reduce future copy-paste * Added ReportProblems support to cache_bench * Added a DEBUG-level diagnostic to ReportProblems for the variance in load factor throughout the table, which will become more of a concern with linear hashing to be used in the Auto implementation. Example with current Fixed HCC: ``` 2023/08/10-13:41:41.602450 6ac36 [DEBUG] [che/clock_cache.cc:1507] Slot occupancy stats: Overall 49% (129008/262144), Min/Max/Window = 39%/60%/500, MaxRun{Pos/Neg} = 18/17 ``` In other words, with overall occupancy of 49%, the lowest across any 500 contiguous cells is 39% and highest 60%. Longest run of occupied is 18 and longest run of unoccupied is 17. This seems consistent with random samples from a uniform distribution. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11692 Test Plan: Shouldn't be any meaningful changes yet to production code or to what is tested, but there is temporary redundancy in testing until the new implementation is plugged in. Reviewed By: jowlyzhang Differential Revision: D48247413 Pulled By: pdillinger fbshipit-source-id: 11541f996d97af403c2e43c92fb67ff22dd0b5da --- cache/cache_bench_tool.cc | 37 ++- cache/cache_test.cc | 104 ++----- cache/clock_cache.cc | 138 +++++++-- cache/clock_cache.h | 65 +++- cache/lru_cache_test.cc | 412 ++++++++++++++------------ cache/sharded_cache.h | 8 + db/db_block_cache_test.cc | 5 + db_stress_tool/db_stress_test_base.cc | 18 +- include/rocksdb/cache.h | 3 + test_util/secondary_cache_test_util.h | 18 +- tools/db_bench_tool.cc | 35 ++- tools/db_crashtest.py | 5 +- 12 files changed, 511 insertions(+), 337 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index f3992e43e..cb37862a2 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -31,6 +31,7 @@ #include "util/hash.h" #include "util/mutexlock.h" #include "util/random.h" +#include "util/stderr_logger.h" #include "util/stop_watch.h" #include "util/string_util.h" @@ -49,6 +50,9 @@ DEFINE_double(resident_ratio, 0.25, "Ratio of keys fitting in cache to keyspace."); DEFINE_uint64(ops_per_thread, 2000000U, "Number of operations per thread."); DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added."); +DEFINE_uint32(value_bytes_estimate, 0, + "If > 0, overrides estimated_entry_charge or " + "min_avg_entry_charge depending on cache_type."); DEFINE_uint32(skew, 5, "Degree of skew in key selection. 0 = no skew"); DEFINE_bool(populate_cache, true, "Populate cache before operations"); @@ -83,6 +87,8 @@ DEFINE_bool(early_exit, false, DEFINE_bool(histograms, true, "Whether to track and print histogram statistics."); +DEFINE_bool(report_problems, true, "Whether to ReportProblems() at the end."); + DEFINE_uint32(seed, 0, "Hashing/random seed to use. 0 = choose at random"); DEFINE_string(secondary_cache_uri, "", @@ -299,11 +305,23 @@ class CacheBench { if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache" || - FLAGS_cache_type == "fixed_hyper_clock_cache") { - HyperClockCacheOptions opts(FLAGS_cache_size, FLAGS_value_bytes, - FLAGS_num_shard_bits); + } else if (EndsWith(FLAGS_cache_type, "hyper_clock_cache")) { + HyperClockCacheOptions opts( + FLAGS_cache_size, /*estimated_entry_charge=*/0, FLAGS_num_shard_bits); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); + if (FLAGS_cache_type == "fixed_hyper_clock_cache" || + FLAGS_cache_type == "hyper_clock_cache") { + opts.estimated_entry_charge = FLAGS_value_bytes_estimate > 0 + ? FLAGS_value_bytes_estimate + : FLAGS_value_bytes; + } else if (FLAGS_cache_type == "auto_hyper_clock_cache") { + if (FLAGS_value_bytes_estimate > 0) { + opts.min_avg_entry_charge = FLAGS_value_bytes_estimate; + } + } else { + fprintf(stderr, "Cache type not supported."); + exit(1); + } cache_ = opts.MakeSharedCache(); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, @@ -454,7 +472,14 @@ class CacheBench { printf("%s", stats_hist.ToString().c_str()); } } - printf("\n%s", stats_report.c_str()); + + if (FLAGS_report_problems) { + printf("\n"); + std::shared_ptr logger = + std::make_shared(InfoLogLevel::DEBUG_LEVEL); + cache_->ReportProblems(logger); + } + printf("%s", stats_report.c_str()); return true; } @@ -499,7 +524,7 @@ class CacheBench { for (;;) { if (shared->AllDone()) { std::ostringstream ostr; - ostr << "Most recent cache entry stats:\n" + ostr << "\nMost recent cache entry stats:\n" << "Number of entries: " << total_entry_count << "\n" << "Table occupancy: " << table_occupancy << " / " << table_size << " = " diff --git a/cache/cache_test.cc b/cache/cache_test.cc index f65698873..969ab597b 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -70,18 +70,11 @@ const Cache::CacheItemHelper kDumbHelper{ CacheEntryRole::kMisc, [](Cache::ObjectPtr /*value*/, MemoryAllocator* /*alloc*/) {}}; -const Cache::CacheItemHelper kEraseOnDeleteHelper1{ +const Cache::CacheItemHelper kInvokeOnDeleteHelper{ CacheEntryRole::kMisc, [](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) { - Cache* cache = static_cast(value); - cache->Erase("foo"); - }}; - -const Cache::CacheItemHelper kEraseOnDeleteHelper2{ - CacheEntryRole::kMisc, - [](Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) { - Cache* cache = static_cast(value); - cache->Erase(EncodeKey16Bytes(1234)); + auto& fn = *static_cast*>(value); + fn(); }}; } // anonymous namespace @@ -180,8 +173,6 @@ std::string CacheTest::type_; class LRUCacheTest : public CacheTest {}; TEST_P(CacheTest, UsageTest) { - auto type = GetParam(); - // cache is std::shared_ptr and will be automatically cleaned up. const size_t kCapacity = 100000; auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); @@ -196,12 +187,7 @@ TEST_P(CacheTest, UsageTest) { char value[10] = "abcdef"; // make sure everything will be cached for (int i = 1; i < 100; ++i) { - std::string key; - if (type == kLRU) { - key = std::string(i, 'a'); - } else { - key = EncodeKey(i); - } + std::string key = EncodeKey(i); auto kv_size = key.size() + 5; ASSERT_OK(cache->Insert(key, value, &kDumbHelper, kv_size)); ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size)); @@ -221,12 +207,7 @@ TEST_P(CacheTest, UsageTest) { // make sure the cache will be overloaded for (size_t i = 1; i < kCapacity; ++i) { - std::string key; - if (type == kLRU) { - key = std::to_string(i); - } else { - key = EncodeKey(static_cast(1000 + i)); - } + std::string key = EncodeKey(static_cast(1000 + i)); ASSERT_OK(cache->Insert(key, value, &kDumbHelper, key.size() + 5)); ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, key.size() + 5)); } @@ -246,16 +227,14 @@ TEST_P(CacheTest, UsageTest) { } } -// TODO: This test takes longer than expected on ClockCache. This is -// because the values size estimate at construction is too sloppy. +// TODO: This test takes longer than expected on FixedHyperClockCache. +// This is because the values size estimate at construction is too sloppy. // Fix this. // Why is it so slow? The cache is constructed with an estimate of 1, but // then the charge is claimed to be 21. This will cause the hash table // to be extremely sparse, which in turn means clock needs to scan too // many slots to find victims. TEST_P(CacheTest, PinnedUsageTest) { - auto type = GetParam(); - // cache is std::shared_ptr and will be automatically cleaned up. const size_t kCapacity = 200000; auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); @@ -274,12 +253,7 @@ TEST_P(CacheTest, PinnedUsageTest) { // Add entries. Unpin some of them after insertion. Then, pin some of them // again. Check GetPinnedUsage(). for (int i = 1; i < 100; ++i) { - std::string key; - if (type == kLRU) { - key = std::string(i, 'a'); - } else { - key = EncodeKey(i); - } + std::string key = EncodeKey(i); auto kv_size = key.size() + 5; Cache::Handle* handle; Cache::Handle* handle_in_precise_cache; @@ -320,12 +294,7 @@ TEST_P(CacheTest, PinnedUsageTest) { // check that overloading the cache does not change the pinned usage for (size_t i = 1; i < 2 * kCapacity; ++i) { - std::string key; - if (type == kLRU) { - key = std::to_string(i); - } else { - key = EncodeKey(static_cast(1000 + i)); - } + std::string key = EncodeKey(static_cast(1000 + i)); ASSERT_OK(cache->Insert(key, value, &kDumbHelper, key.size() + 5)); ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, key.size() + 5)); } @@ -515,20 +484,20 @@ TEST_P(CacheTest, EvictionPolicyRef) { // Check whether the entries inserted in the beginning // are evicted. Ones without extra ref are evicted and // those with are not. - ASSERT_EQ(-1, Lookup(100)); - ASSERT_EQ(-1, Lookup(101)); - ASSERT_EQ(-1, Lookup(102)); - ASSERT_EQ(-1, Lookup(103)); + EXPECT_EQ(-1, Lookup(100)); + EXPECT_EQ(-1, Lookup(101)); + EXPECT_EQ(-1, Lookup(102)); + EXPECT_EQ(-1, Lookup(103)); - ASSERT_EQ(-1, Lookup(300)); - ASSERT_EQ(-1, Lookup(301)); - ASSERT_EQ(-1, Lookup(302)); - ASSERT_EQ(-1, Lookup(303)); + EXPECT_EQ(-1, Lookup(300)); + EXPECT_EQ(-1, Lookup(301)); + EXPECT_EQ(-1, Lookup(302)); + EXPECT_EQ(-1, Lookup(303)); - ASSERT_EQ(101, Lookup(200)); - ASSERT_EQ(102, Lookup(201)); - ASSERT_EQ(103, Lookup(202)); - ASSERT_EQ(104, Lookup(203)); + EXPECT_EQ(101, Lookup(200)); + EXPECT_EQ(102, Lookup(201)); + EXPECT_EQ(103, Lookup(202)); + EXPECT_EQ(104, Lookup(203)); // Cleaning up all the handles cache_->Release(h201); @@ -538,37 +507,22 @@ TEST_P(CacheTest, EvictionPolicyRef) { } TEST_P(CacheTest, EvictEmptyCache) { - auto type = GetParam(); - // Insert item large than capacity to trigger eviction on empty cache. auto cache = NewCache(1, 0, false); - if (type == kLRU) { - ASSERT_OK(cache->Insert("foo", nullptr, &kDumbHelper, 10)); - } else { - ASSERT_OK(cache->Insert(EncodeKey(1000), nullptr, &kDumbHelper, 10)); - } + ASSERT_OK(cache->Insert(EncodeKey(1000), nullptr, &kDumbHelper, 10)); } TEST_P(CacheTest, EraseFromDeleter) { - auto type = GetParam(); - // Have deleter which will erase item from cache, which will re-enter // the cache at that point. std::shared_ptr cache = NewCache(10, 0, false); - std::string foo, bar; - const Cache::CacheItemHelper* erase_helper; - if (type == kLRU) { - foo = "foo"; - bar = "bar"; - erase_helper = &kEraseOnDeleteHelper1; - } else { - foo = EncodeKey(1234); - bar = EncodeKey(5678); - erase_helper = &kEraseOnDeleteHelper2; - } + std::string foo = EncodeKey(1234); + std::string bar = EncodeKey(5678); + + std::function erase_fn = [&]() { cache->Erase(foo); }; ASSERT_OK(cache->Insert(foo, nullptr, &kDumbHelper, 1)); - ASSERT_OK(cache->Insert(bar, cache.get(), erase_helper, 1)); + ASSERT_OK(cache->Insert(bar, &erase_fn, &kInvokeOnDeleteHelper, 1)); cache->Erase(bar); ASSERT_EQ(nullptr, cache->Lookup(foo)); @@ -676,10 +630,10 @@ using TypedHandle = SharedCache::TypedHandle; } // namespace TEST_P(CacheTest, SetCapacity) { - auto type = GetParam(); if (IsHyperClock()) { + // TODO: update test & code for limited supoort ROCKSDB_GTEST_BYPASS( - "FastLRUCache and HyperClockCache don't support arbitrary capacity " + "HyperClockCache doesn't support arbitrary capacity " "adjustments."); return; } diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index 159ab4624..e648da9fa 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -9,9 +9,18 @@ #include "cache/clock_cache.h" +#include +#include +#include #include +#include +#include +#include #include #include +#include +#include +#include #include "cache/cache_key.h" #include "cache/secondary_cache_adapter.h" @@ -92,8 +101,6 @@ inline bool ClockUpdate(ClockHandle& h) { (meta >> ClockHandle::kAcquireCounterShift) & ClockHandle::kCounterMask; uint64_t release_count = (meta >> ClockHandle::kReleaseCounterShift) & ClockHandle::kCounterMask; - // fprintf(stderr, "ClockUpdate @ %p: %lu %lu %u\n", &h, acquire_count, - // release_count, (unsigned)(meta >> ClockHandle::kStateShift)); if (acquire_count != release_count) { // Only clock update entries with no outstanding refs return false; @@ -1361,35 +1368,39 @@ size_t ClockCacheShard
::GetTableAddressCount() const { // Explicit instantiation template class ClockCacheShard; +template class ClockCacheShard; -FixedHyperClockCache::FixedHyperClockCache(const HyperClockCacheOptions& opts) - : ShardedCache(opts) { - assert(opts.estimated_entry_charge > 0 || - opts.metadata_charge_policy != kDontChargeCacheMetadata); +template +BaseHyperClockCache
::BaseHyperClockCache( + const HyperClockCacheOptions& opts) + : ShardedCache>(opts) { // TODO: should not need to go through two levels of pointer indirection to // get to table entries - size_t per_shard = GetPerShardCapacity(); + size_t per_shard = this->GetPerShardCapacity(); MemoryAllocator* alloc = this->memory_allocator(); - InitShards([&](Shard* cs) { - FixedHyperClockTable::Opts table_opts; - table_opts.estimated_value_size = opts.estimated_entry_charge; + this->InitShards([&](Shard* cs) { + typename Table::Opts table_opts{opts}; new (cs) Shard(per_shard, opts.strict_capacity_limit, - opts.metadata_charge_policy, alloc, &eviction_callback_, - &hash_seed_, table_opts); + opts.metadata_charge_policy, alloc, + &this->eviction_callback_, &this->hash_seed_, table_opts); }); } -Cache::ObjectPtr FixedHyperClockCache::Value(Handle* handle) { - return reinterpret_cast(handle)->value; +template +Cache::ObjectPtr BaseHyperClockCache
::Value(Handle* handle) { + return reinterpret_cast(handle)->value; } -size_t FixedHyperClockCache::GetCharge(Handle* handle) const { - return reinterpret_cast(handle)->GetTotalCharge(); +template +size_t BaseHyperClockCache
::GetCharge(Handle* handle) const { + return reinterpret_cast(handle) + ->GetTotalCharge(); } -const Cache::CacheItemHelper* FixedHyperClockCache::GetCacheItemHelper( +template +const Cache::CacheItemHelper* BaseHyperClockCache
::GetCacheItemHelper( Handle* handle) const { - auto h = reinterpret_cast(handle); + auto h = reinterpret_cast(handle); return h->helper; } @@ -1428,17 +1439,87 @@ void AddShardEvaluation(const FixedHyperClockCache::Shard& shard, min_recommendation = std::min(min_recommendation, recommendation); } +bool IsSlotOccupied(const ClockHandle& h) { + return (h.meta.load(std::memory_order_relaxed) >> ClockHandle::kStateShift) != + 0; +} } // namespace +// NOTE: GCC might warn about subobject linkage if this is in anon namespace +template +class LoadVarianceStats { + public: + std::string Report() const { + return "Overall " + PercentStr(positive_count_, samples_) + " (" + + std::to_string(positive_count_) + "/" + std::to_string(samples_) + + "), Min/Max/Window = " + PercentStr(min_, N) + "/" + + PercentStr(max_, N) + "/" + std::to_string(N) + + ", MaxRun{Pos/Neg} = " + std::to_string(max_pos_run_) + "/" + + std::to_string(max_neg_run_) + "\n"; + } + + void Add(bool positive) { + recent_[samples_ % N] = positive; + if (positive) { + ++positive_count_; + ++cur_pos_run_; + max_pos_run_ = std::max(max_pos_run_, cur_pos_run_); + cur_neg_run_ = 0; + } else { + ++cur_neg_run_; + max_neg_run_ = std::max(max_neg_run_, cur_neg_run_); + cur_pos_run_ = 0; + } + ++samples_; + if (samples_ >= N) { + size_t count_set = recent_.count(); + max_ = std::max(max_, count_set); + min_ = std::min(min_, count_set); + } + } + + private: + size_t max_ = 0; + size_t min_ = N; + size_t positive_count_ = 0; + size_t samples_ = 0; + size_t max_pos_run_ = 0; + size_t cur_pos_run_ = 0; + size_t max_neg_run_ = 0; + size_t cur_neg_run_ = 0; + std::bitset recent_; + + static std::string PercentStr(size_t a, size_t b) { + return std::to_string(uint64_t{100} * a / b) + "%"; + } +}; + +template +void BaseHyperClockCache
::ReportProblems( + const std::shared_ptr& info_log) const { + if (info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) { + LoadVarianceStats slot_stats; + this->ForEachShard([&](const BaseHyperClockCache
::Shard* shard) { + size_t count = shard->GetTableAddressCount(); + for (size_t i = 0; i < count; ++i) { + slot_stats.Add(IsSlotOccupied(*shard->GetTable().HandlePtr(i))); + } + }); + ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL, + "Slot occupancy stats: %s", slot_stats.Report().c_str()); + } +} + void FixedHyperClockCache::ReportProblems( const std::shared_ptr& info_log) const { + BaseHyperClockCache::ReportProblems(info_log); + uint32_t shard_count = GetNumShards(); std::vector predicted_load_factors; size_t min_recommendation = SIZE_MAX; - const_cast(this)->ForEachShard( - [&](FixedHyperClockCache::Shard* shard) { - AddShardEvaluation(*shard, predicted_load_factors, min_recommendation); - }); + ForEachShard([&](const FixedHyperClockCache::Shard* shard) { + AddShardEvaluation(*shard, predicted_load_factors, min_recommendation); + }); if (predicted_load_factors.empty()) { // None operating "at limit" -> nothing to report @@ -1549,8 +1630,17 @@ std::shared_ptr HyperClockCacheOptions::MakeSharedCache() const { opts.num_shard_bits = GetDefaultCacheShardBits(opts.capacity, min_shard_size); } - std::shared_ptr cache = - std::make_shared(opts); + std::shared_ptr cache; + if (opts.estimated_entry_charge == 0) { + // BEGIN placeholder logic to be removed + // This is sufficient to get the placeholder Auto working in unit tests + // much like the Fixed version. + opts.estimated_entry_charge = opts.min_avg_entry_charge; + // END placeholder logic to be removed + cache = std::make_shared(opts); + } else { + cache = std::make_shared(opts); + } if (opts.secondary_cache) { cache = std::make_shared(cache, opts.secondary_cache); diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 65993cbb0..851dab759 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -31,6 +31,7 @@ namespace ROCKSDB_NAMESPACE { namespace clock_cache { // Forward declaration of friend class. +template class ClockCacheTest; // HyperClockCache is an alternative to LRUCache specifically tailored for @@ -488,6 +489,12 @@ class FixedHyperClockTable : public BaseClockTable { }; // struct HandleImpl struct Opts { + explicit Opts(size_t _estimated_value_size) + : estimated_value_size(_estimated_value_size) {} + explicit Opts(const HyperClockCacheOptions& opts) { + assert(opts.estimated_entry_charge > 0); + estimated_value_size = opts.estimated_entry_charge; + } size_t estimated_value_size; }; @@ -530,7 +537,7 @@ class FixedHyperClockTable : public BaseClockTable { const HandleImpl* HandlePtr(size_t idx) const { return &array_[idx]; } #ifndef NDEBUG - size_t& TEST_MutableOccupancyLimit() const { + size_t& TEST_MutableOccupancyLimit() { return const_cast(occupancy_limit_); } @@ -614,10 +621,18 @@ class FixedHyperClockTable : public BaseClockTable { const std::unique_ptr array_; }; // class FixedHyperClockTable +// Placeholder for future automatic table variant +// For now, just use FixedHyperClockTable. +class AutoHyperClockTable : public FixedHyperClockTable { + public: + using FixedHyperClockTable::FixedHyperClockTable; +}; // class AutoHyperClockTable + // A single shard of sharded cache. -template +template class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { public: + using Table = TableT; ClockCacheShard(size_t capacity, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, @@ -710,8 +725,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { return Lookup(key, hashed_key); } + Table& GetTable() { return table_; } + const Table& GetTable() const { return table_; } + #ifndef NDEBUG - size_t& TEST_MutableOccupancyLimit() const { + size_t& TEST_MutableOccupancyLimit() { return table_.TEST_MutableOccupancyLimit(); } // Acquire/release N references @@ -729,17 +747,14 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { std::atomic strict_capacity_limit_; }; // class ClockCacheShard -class FixedHyperClockCache -#ifdef NDEBUG - final -#endif - : public ShardedCache> { +template +class BaseHyperClockCache : public ShardedCache> { public: - using Shard = ClockCacheShard; - - explicit FixedHyperClockCache(const HyperClockCacheOptions& opts); + using Shard = ClockCacheShard
; + using Handle = Cache::Handle; + using CacheItemHelper = Cache::CacheItemHelper; - const char* Name() const override { return "FixedHyperClockCache"; } + explicit BaseHyperClockCache(const HyperClockCacheOptions& opts); Cache::ObjectPtr Value(Handle* handle) override; @@ -747,10 +762,36 @@ class FixedHyperClockCache const CacheItemHelper* GetCacheItemHelper(Handle* handle) const override; + void ReportProblems( + const std::shared_ptr& /*info_log*/) const override; +}; + +class FixedHyperClockCache +#ifdef NDEBUG + final +#endif + : public BaseHyperClockCache { + public: + using BaseHyperClockCache::BaseHyperClockCache; + + const char* Name() const override { return "FixedHyperClockCache"; } + void ReportProblems( const std::shared_ptr& /*info_log*/) const override; }; // class FixedHyperClockCache +// Placeholder for future automatic HCC variant +class AutoHyperClockCache +#ifdef NDEBUG + final +#endif + : public BaseHyperClockCache { + public: + using BaseHyperClockCache::BaseHyperClockCache; + + const char* Name() const override { return "AutoHyperClockCache"; } +}; // class AutoHyperClockCache + } // namespace clock_cache } // namespace ROCKSDB_NAMESPACE diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 7bd2048dc..7519a7b91 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -371,11 +371,12 @@ TEST_F(LRUCacheTest, EntriesWithPriority) { namespace clock_cache { +template class ClockCacheTest : public testing::Test { public: - using Shard = FixedHyperClockCache::Shard; - using Table = FixedHyperClockTable; - using HandleImpl = Shard::HandleImpl; + using Shard = typename ClockCache::Shard; + using Table = typename Shard::Table; + using TableOpts = typename Table::Opts; ClockCacheTest() {} ~ClockCacheTest() override { DeleteShard(); } @@ -393,8 +394,7 @@ class ClockCacheTest : public testing::Test { shard_ = reinterpret_cast(port::cacheline_aligned_alloc(sizeof(Shard))); - Table::Opts opts; - opts.estimated_value_size = 1; + TableOpts opts{1 /*value_size*/}; new (shard_) Shard(capacity, strict_capacity_limit, kDontChargeCacheMetadata, /*allocator*/ nullptr, &eviction_callback_, &hash_seed_, opts); @@ -458,43 +458,53 @@ class ClockCacheTest : public testing::Test { uint32_t hash_seed_ = 0; }; -TEST_F(ClockCacheTest, Misc) { - NewShard(3); +using ClockCacheTypes = + ::testing::Types; +TYPED_TEST_CASE(ClockCacheTest, ClockCacheTypes); + +TYPED_TEST(ClockCacheTest, Misc) { + this->NewShard(3); + // NOTE: templated base class prevents simple naming of inherited members, + // so lots of `this->` + auto& shard = *this->shard_; // Key size stuff - EXPECT_OK(InsertWithLen('a', 16)); - EXPECT_NOK(InsertWithLen('b', 15)); - EXPECT_OK(InsertWithLen('b', 16)); - EXPECT_NOK(InsertWithLen('c', 17)); - EXPECT_NOK(InsertWithLen('d', 1000)); - EXPECT_NOK(InsertWithLen('e', 11)); - EXPECT_NOK(InsertWithLen('f', 0)); + EXPECT_OK(this->InsertWithLen('a', 16)); + EXPECT_NOK(this->InsertWithLen('b', 15)); + EXPECT_OK(this->InsertWithLen('b', 16)); + EXPECT_NOK(this->InsertWithLen('c', 17)); + EXPECT_NOK(this->InsertWithLen('d', 1000)); + EXPECT_NOK(this->InsertWithLen('e', 11)); + EXPECT_NOK(this->InsertWithLen('f', 0)); // Some of this is motivated by code coverage std::string wrong_size_key(15, 'x'); - EXPECT_FALSE(Lookup(wrong_size_key, TestHashedKey('x'))); - EXPECT_FALSE(shard_->Ref(nullptr)); - EXPECT_FALSE(shard_->Release(nullptr)); - shard_->Erase(wrong_size_key, TestHashedKey('x')); // no-op + EXPECT_FALSE(this->Lookup(wrong_size_key, this->TestHashedKey('x'))); + EXPECT_FALSE(shard.Ref(nullptr)); + EXPECT_FALSE(shard.Release(nullptr)); + shard.Erase(wrong_size_key, this->TestHashedKey('x')); // no-op } -TEST_F(ClockCacheTest, Limits) { - constexpr size_t kCapacity = 3; - NewShard(kCapacity, false /*strict_capacity_limit*/); +TYPED_TEST(ClockCacheTest, Limits) { + constexpr size_t kCapacity = 64; + this->NewShard(kCapacity, false /*strict_capacity_limit*/); + auto& shard = *this->shard_; + using HandleImpl = typename ClockCacheTest::Shard::HandleImpl; + for (bool strict_capacity_limit : {false, true, false}) { SCOPED_TRACE("strict_capacity_limit = " + std::to_string(strict_capacity_limit)); // Also tests switching between strict limit and not - shard_->SetStrictCapacityLimit(strict_capacity_limit); + shard.SetStrictCapacityLimit(strict_capacity_limit); - UniqueId64x2 hkey = TestHashedKey('x'); + UniqueId64x2 hkey = this->TestHashedKey('x'); // Single entry charge beyond capacity { - Status s = shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, - &kNoopCacheItemHelper, 5 /*charge*/, - nullptr /*handle*/, Cache::Priority::LOW); + Status s = shard.Insert(this->TestKey(hkey), hkey, nullptr /*value*/, + &kNoopCacheItemHelper, kCapacity + 2 /*charge*/, + nullptr /*handle*/, Cache::Priority::LOW); if (strict_capacity_limit) { EXPECT_TRUE(s.IsMemoryLimit()); } else { @@ -505,11 +515,11 @@ TEST_F(ClockCacheTest, Limits) { // Single entry fills capacity { HandleImpl* h; - ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, - &kNoopCacheItemHelper, 3 /*charge*/, &h, - Cache::Priority::LOW)); + ASSERT_OK(shard.Insert(this->TestKey(hkey), hkey, nullptr /*value*/, + &kNoopCacheItemHelper, kCapacity /*charge*/, &h, + Cache::Priority::LOW)); // Try to insert more - Status s = Insert('a'); + Status s = this->Insert('a'); if (strict_capacity_limit) { EXPECT_TRUE(s.IsMemoryLimit()); } else { @@ -517,22 +527,22 @@ TEST_F(ClockCacheTest, Limits) { } // Release entry filling capacity. // Cover useful = false case. - shard_->Release(h, false /*useful*/, false /*erase_if_last_ref*/); + shard.Release(h, false /*useful*/, false /*erase_if_last_ref*/); } // Insert more than table size can handle to exceed occupancy limit. // (Cleverly using mostly zero-charge entries, but some non-zero to // verify usage tracking on detached entries.) { - size_t n = shard_->GetTableAddressCount() + 1; + size_t n = shard.GetTableAddressCount() + 1; std::unique_ptr ha { new HandleImpl* [n] {} }; Status s; for (size_t i = 0; i < n && s.ok(); ++i) { hkey[1] = i; - s = shard_->Insert(TestKey(hkey), hkey, nullptr /*value*/, - &kNoopCacheItemHelper, - (i + kCapacity < n) ? 0 : 1 /*charge*/, &ha[i], - Cache::Priority::LOW); + s = shard.Insert(this->TestKey(hkey), hkey, nullptr /*value*/, + &kNoopCacheItemHelper, + (i + kCapacity < n) ? 0 : 1 /*charge*/, &ha[i], + Cache::Priority::LOW); if (i == 0) { EXPECT_OK(s); } @@ -543,7 +553,7 @@ TEST_F(ClockCacheTest, Limits) { EXPECT_OK(s); } // Same result if not keeping a reference - s = Insert('a'); + s = this->Insert('a'); if (strict_capacity_limit) { EXPECT_TRUE(s.IsMemoryLimit()); } else { @@ -551,122 +561,123 @@ TEST_F(ClockCacheTest, Limits) { } // Regardless, we didn't allow table to actually get full - EXPECT_LT(shard_->GetOccupancyCount(), shard_->GetTableAddressCount()); + EXPECT_LT(shard.GetOccupancyCount(), shard.GetTableAddressCount()); // Release handles for (size_t i = 0; i < n; ++i) { if (ha[i]) { - shard_->Release(ha[i]); + shard.Release(ha[i]); } } } } } -TEST_F(ClockCacheTest, ClockEvictionTest) { +TYPED_TEST(ClockCacheTest, ClockEvictionTest) { for (bool strict_capacity_limit : {false, true}) { SCOPED_TRACE("strict_capacity_limit = " + std::to_string(strict_capacity_limit)); - NewShard(6, strict_capacity_limit); - EXPECT_OK(Insert('a', Cache::Priority::BOTTOM)); - EXPECT_OK(Insert('b', Cache::Priority::LOW)); - EXPECT_OK(Insert('c', Cache::Priority::HIGH)); - EXPECT_OK(Insert('d', Cache::Priority::BOTTOM)); - EXPECT_OK(Insert('e', Cache::Priority::LOW)); - EXPECT_OK(Insert('f', Cache::Priority::HIGH)); - - EXPECT_TRUE(Lookup('a', /*use*/ false)); - EXPECT_TRUE(Lookup('b', /*use*/ false)); - EXPECT_TRUE(Lookup('c', /*use*/ false)); - EXPECT_TRUE(Lookup('d', /*use*/ false)); - EXPECT_TRUE(Lookup('e', /*use*/ false)); - EXPECT_TRUE(Lookup('f', /*use*/ false)); + this->NewShard(6, strict_capacity_limit); + auto& shard = *this->shard_; + EXPECT_OK(this->Insert('a', Cache::Priority::BOTTOM)); + EXPECT_OK(this->Insert('b', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('c', Cache::Priority::HIGH)); + EXPECT_OK(this->Insert('d', Cache::Priority::BOTTOM)); + EXPECT_OK(this->Insert('e', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('f', Cache::Priority::HIGH)); + + EXPECT_TRUE(this->Lookup('a', /*use*/ false)); + EXPECT_TRUE(this->Lookup('b', /*use*/ false)); + EXPECT_TRUE(this->Lookup('c', /*use*/ false)); + EXPECT_TRUE(this->Lookup('d', /*use*/ false)); + EXPECT_TRUE(this->Lookup('e', /*use*/ false)); + EXPECT_TRUE(this->Lookup('f', /*use*/ false)); // Ensure bottom are evicted first, even if new entries are low - EXPECT_OK(Insert('g', Cache::Priority::LOW)); - EXPECT_OK(Insert('h', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('a', /*use*/ false)); - EXPECT_TRUE(Lookup('b', /*use*/ false)); - EXPECT_TRUE(Lookup('c', /*use*/ false)); - EXPECT_FALSE(Lookup('d', /*use*/ false)); - EXPECT_TRUE(Lookup('e', /*use*/ false)); - EXPECT_TRUE(Lookup('f', /*use*/ false)); + EXPECT_OK(this->Insert('g', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('h', Cache::Priority::LOW)); + + EXPECT_FALSE(this->Lookup('a', /*use*/ false)); + EXPECT_TRUE(this->Lookup('b', /*use*/ false)); + EXPECT_TRUE(this->Lookup('c', /*use*/ false)); + EXPECT_FALSE(this->Lookup('d', /*use*/ false)); + EXPECT_TRUE(this->Lookup('e', /*use*/ false)); + EXPECT_TRUE(this->Lookup('f', /*use*/ false)); // Mark g & h useful - EXPECT_TRUE(Lookup('g', /*use*/ true)); - EXPECT_TRUE(Lookup('h', /*use*/ true)); + EXPECT_TRUE(this->Lookup('g', /*use*/ true)); + EXPECT_TRUE(this->Lookup('h', /*use*/ true)); // Then old LOW entries - EXPECT_OK(Insert('i', Cache::Priority::LOW)); - EXPECT_OK(Insert('j', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('i', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('j', Cache::Priority::LOW)); - EXPECT_FALSE(Lookup('b', /*use*/ false)); - EXPECT_TRUE(Lookup('c', /*use*/ false)); - EXPECT_FALSE(Lookup('e', /*use*/ false)); - EXPECT_TRUE(Lookup('f', /*use*/ false)); + EXPECT_FALSE(this->Lookup('b', /*use*/ false)); + EXPECT_TRUE(this->Lookup('c', /*use*/ false)); + EXPECT_FALSE(this->Lookup('e', /*use*/ false)); + EXPECT_TRUE(this->Lookup('f', /*use*/ false)); // Mark g & h useful once again - EXPECT_TRUE(Lookup('g', /*use*/ true)); - EXPECT_TRUE(Lookup('h', /*use*/ true)); - EXPECT_TRUE(Lookup('i', /*use*/ false)); - EXPECT_TRUE(Lookup('j', /*use*/ false)); + EXPECT_TRUE(this->Lookup('g', /*use*/ true)); + EXPECT_TRUE(this->Lookup('h', /*use*/ true)); + EXPECT_TRUE(this->Lookup('i', /*use*/ false)); + EXPECT_TRUE(this->Lookup('j', /*use*/ false)); // Then old HIGH entries - EXPECT_OK(Insert('k', Cache::Priority::LOW)); - EXPECT_OK(Insert('l', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('c', /*use*/ false)); - EXPECT_FALSE(Lookup('f', /*use*/ false)); - EXPECT_TRUE(Lookup('g', /*use*/ false)); - EXPECT_TRUE(Lookup('h', /*use*/ false)); - EXPECT_TRUE(Lookup('i', /*use*/ false)); - EXPECT_TRUE(Lookup('j', /*use*/ false)); - EXPECT_TRUE(Lookup('k', /*use*/ false)); - EXPECT_TRUE(Lookup('l', /*use*/ false)); + EXPECT_OK(this->Insert('k', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('l', Cache::Priority::LOW)); + + EXPECT_FALSE(this->Lookup('c', /*use*/ false)); + EXPECT_FALSE(this->Lookup('f', /*use*/ false)); + EXPECT_TRUE(this->Lookup('g', /*use*/ false)); + EXPECT_TRUE(this->Lookup('h', /*use*/ false)); + EXPECT_TRUE(this->Lookup('i', /*use*/ false)); + EXPECT_TRUE(this->Lookup('j', /*use*/ false)); + EXPECT_TRUE(this->Lookup('k', /*use*/ false)); + EXPECT_TRUE(this->Lookup('l', /*use*/ false)); // Then the (roughly) least recently useful - EXPECT_OK(Insert('m', Cache::Priority::HIGH)); - EXPECT_OK(Insert('n', Cache::Priority::HIGH)); + EXPECT_OK(this->Insert('m', Cache::Priority::HIGH)); + EXPECT_OK(this->Insert('n', Cache::Priority::HIGH)); - EXPECT_TRUE(Lookup('g', /*use*/ false)); - EXPECT_TRUE(Lookup('h', /*use*/ false)); - EXPECT_FALSE(Lookup('i', /*use*/ false)); - EXPECT_FALSE(Lookup('j', /*use*/ false)); - EXPECT_TRUE(Lookup('k', /*use*/ false)); - EXPECT_TRUE(Lookup('l', /*use*/ false)); + EXPECT_TRUE(this->Lookup('g', /*use*/ false)); + EXPECT_TRUE(this->Lookup('h', /*use*/ false)); + EXPECT_FALSE(this->Lookup('i', /*use*/ false)); + EXPECT_FALSE(this->Lookup('j', /*use*/ false)); + EXPECT_TRUE(this->Lookup('k', /*use*/ false)); + EXPECT_TRUE(this->Lookup('l', /*use*/ false)); // Now try changing capacity down - shard_->SetCapacity(4); + shard.SetCapacity(4); // Insert to ensure evictions happen - EXPECT_OK(Insert('o', Cache::Priority::LOW)); - EXPECT_OK(Insert('p', Cache::Priority::LOW)); - - EXPECT_FALSE(Lookup('g', /*use*/ false)); - EXPECT_FALSE(Lookup('h', /*use*/ false)); - EXPECT_FALSE(Lookup('k', /*use*/ false)); - EXPECT_FALSE(Lookup('l', /*use*/ false)); - EXPECT_TRUE(Lookup('m', /*use*/ false)); - EXPECT_TRUE(Lookup('n', /*use*/ false)); - EXPECT_TRUE(Lookup('o', /*use*/ false)); - EXPECT_TRUE(Lookup('p', /*use*/ false)); + EXPECT_OK(this->Insert('o', Cache::Priority::LOW)); + EXPECT_OK(this->Insert('p', Cache::Priority::LOW)); + + EXPECT_FALSE(this->Lookup('g', /*use*/ false)); + EXPECT_FALSE(this->Lookup('h', /*use*/ false)); + EXPECT_FALSE(this->Lookup('k', /*use*/ false)); + EXPECT_FALSE(this->Lookup('l', /*use*/ false)); + EXPECT_TRUE(this->Lookup('m', /*use*/ false)); + EXPECT_TRUE(this->Lookup('n', /*use*/ false)); + EXPECT_TRUE(this->Lookup('o', /*use*/ false)); + EXPECT_TRUE(this->Lookup('p', /*use*/ false)); // Now try changing capacity up - EXPECT_TRUE(Lookup('m', /*use*/ true)); - EXPECT_TRUE(Lookup('n', /*use*/ true)); - shard_->SetCapacity(6); - EXPECT_OK(Insert('q', Cache::Priority::HIGH)); - EXPECT_OK(Insert('r', Cache::Priority::HIGH)); - EXPECT_OK(Insert('s', Cache::Priority::HIGH)); - EXPECT_OK(Insert('t', Cache::Priority::HIGH)); - - EXPECT_FALSE(Lookup('o', /*use*/ false)); - EXPECT_FALSE(Lookup('p', /*use*/ false)); - EXPECT_TRUE(Lookup('m', /*use*/ false)); - EXPECT_TRUE(Lookup('n', /*use*/ false)); - EXPECT_TRUE(Lookup('q', /*use*/ false)); - EXPECT_TRUE(Lookup('r', /*use*/ false)); - EXPECT_TRUE(Lookup('s', /*use*/ false)); - EXPECT_TRUE(Lookup('t', /*use*/ false)); + EXPECT_TRUE(this->Lookup('m', /*use*/ true)); + EXPECT_TRUE(this->Lookup('n', /*use*/ true)); + shard.SetCapacity(6); + EXPECT_OK(this->Insert('q', Cache::Priority::HIGH)); + EXPECT_OK(this->Insert('r', Cache::Priority::HIGH)); + EXPECT_OK(this->Insert('s', Cache::Priority::HIGH)); + EXPECT_OK(this->Insert('t', Cache::Priority::HIGH)); + + EXPECT_FALSE(this->Lookup('o', /*use*/ false)); + EXPECT_FALSE(this->Lookup('p', /*use*/ false)); + EXPECT_TRUE(this->Lookup('m', /*use*/ false)); + EXPECT_TRUE(this->Lookup('n', /*use*/ false)); + EXPECT_TRUE(this->Lookup('q', /*use*/ false)); + EXPECT_TRUE(this->Lookup('r', /*use*/ false)); + EXPECT_TRUE(this->Lookup('s', /*use*/ false)); + EXPECT_TRUE(this->Lookup('t', /*use*/ false)); } } @@ -682,66 +693,72 @@ const Cache::CacheItemHelper kDeleteCounterHelper{ } // namespace // Testing calls to CorrectNearOverflow in Release -TEST_F(ClockCacheTest, ClockCounterOverflowTest) { - NewShard(6, /*strict_capacity_limit*/ false); +TYPED_TEST(ClockCacheTest, ClockCounterOverflowTest) { + this->NewShard(6, /*strict_capacity_limit*/ false); + auto& shard = *this->shard_; + using HandleImpl = typename ClockCacheTest::Shard::HandleImpl; + HandleImpl* h; DeleteCounter val; - UniqueId64x2 hkey = TestHashedKey('x'); - ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, &val, &kDeleteCounterHelper, 1, - &h, Cache::Priority::HIGH)); + UniqueId64x2 hkey = this->TestHashedKey('x'); + ASSERT_OK(shard.Insert(this->TestKey(hkey), hkey, &val, &kDeleteCounterHelper, + 1, &h, Cache::Priority::HIGH)); // Some large number outstanding - shard_->TEST_RefN(h, 123456789); + shard.TEST_RefN(h, 123456789); // Simulate many lookup/ref + release, plenty to overflow counters for (int i = 0; i < 10000; ++i) { - shard_->TEST_RefN(h, 1234567); - shard_->TEST_ReleaseN(h, 1234567); + shard.TEST_RefN(h, 1234567); + shard.TEST_ReleaseN(h, 1234567); } // Mark it invisible (to reach a different CorrectNearOverflow() in Release) - shard_->Erase(TestKey(hkey), hkey); + shard.Erase(this->TestKey(hkey), hkey); // Simulate many more lookup/ref + release (one-by-one would be too // expensive for unit test) for (int i = 0; i < 10000; ++i) { - shard_->TEST_RefN(h, 1234567); - shard_->TEST_ReleaseN(h, 1234567); + shard.TEST_RefN(h, 1234567); + shard.TEST_ReleaseN(h, 1234567); } // Free all but last 1 - shard_->TEST_ReleaseN(h, 123456789); + shard.TEST_ReleaseN(h, 123456789); // Still alive ASSERT_EQ(val.deleted, 0); // Free last ref, which will finalize erasure - shard_->Release(h); + shard.Release(h); // Deleted ASSERT_EQ(val.deleted, 1); } -TEST_F(ClockCacheTest, ClockTableFull) { +TYPED_TEST(ClockCacheTest, ClockTableFull) { // Force clock cache table to fill up (not usually allowed) in order // to test full probe sequence that is theoretically possible due to // parallel operations - NewShard(6, /*strict_capacity_limit*/ false); - size_t size = shard_->GetTableAddressCount(); + this->NewShard(6, /*strict_capacity_limit*/ false); + auto& shard = *this->shard_; + using HandleImpl = typename ClockCacheTest::Shard::HandleImpl; + + size_t size = shard.GetTableAddressCount(); ASSERT_LE(size + 3, 256); // for using char keys // Modify occupancy and capacity limits to attempt insert on full - shard_->TEST_MutableOccupancyLimit() = size + 100; - shard_->SetCapacity(size + 100); + shard.TEST_MutableOccupancyLimit() = size + 100; + shard.SetCapacity(size + 100); DeleteCounter val; std::vector handles; // NOTE: the three extra insertions should create standalone entries for (size_t i = 0; i < size + 3; ++i) { - UniqueId64x2 hkey = TestHashedKey(static_cast(i)); - ASSERT_OK(shard_->Insert(TestKey(hkey), hkey, &val, &kDeleteCounterHelper, - 1, &handles.emplace_back(), - Cache::Priority::HIGH)); + UniqueId64x2 hkey = this->TestHashedKey(static_cast(i)); + ASSERT_OK(shard.Insert(this->TestKey(hkey), hkey, &val, + &kDeleteCounterHelper, 1, &handles.emplace_back(), + Cache::Priority::HIGH)); } for (size_t i = 0; i < size + 3; ++i) { - UniqueId64x2 hkey = TestHashedKey(static_cast(i)); - HandleImpl* h = shard_->Lookup(TestKey(hkey), hkey); + UniqueId64x2 hkey = this->TestHashedKey(static_cast(i)); + HandleImpl* h = shard.Lookup(this->TestKey(hkey), hkey); if (i < size) { ASSERT_NE(h, nullptr); - shard_->Release(h); + shard.Release(h); } else { // Standalone entries not visible by lookup ASSERT_EQ(h, nullptr); @@ -750,7 +767,7 @@ TEST_F(ClockCacheTest, ClockTableFull) { for (size_t i = 0; i < size + 3; ++i) { ASSERT_NE(handles[i], nullptr); - shard_->Release(handles[i]); + shard.Release(handles[i]); if (i < size) { // Everything still in cache ASSERT_EQ(val.deleted, 0); @@ -761,8 +778,8 @@ TEST_F(ClockCacheTest, ClockTableFull) { } for (size_t i = size + 3; i > 0; --i) { - UniqueId64x2 hkey = TestHashedKey(static_cast(i - 1)); - shard_->Erase(TestKey(hkey), hkey); + UniqueId64x2 hkey = this->TestHashedKey(static_cast(i - 1)); + shard.Erase(this->TestKey(hkey), hkey); if (i - 1 > size) { ASSERT_EQ(val.deleted, 3); } else { @@ -773,78 +790,81 @@ TEST_F(ClockCacheTest, ClockTableFull) { // This test is mostly to exercise some corner case logic, by forcing two // keys to have the same hash, and more -TEST_F(ClockCacheTest, CollidingInsertEraseTest) { - NewShard(6, /*strict_capacity_limit*/ false); +TYPED_TEST(ClockCacheTest, CollidingInsertEraseTest) { + this->NewShard(6, /*strict_capacity_limit*/ false); + auto& shard = *this->shard_; + using HandleImpl = typename ClockCacheTest::Shard::HandleImpl; + DeleteCounter val; - UniqueId64x2 hkey1 = TestHashedKey('x'); - Slice key1 = TestKey(hkey1); - UniqueId64x2 hkey2 = TestHashedKey('y'); - Slice key2 = TestKey(hkey2); - UniqueId64x2 hkey3 = TestHashedKey('z'); - Slice key3 = TestKey(hkey3); + UniqueId64x2 hkey1 = this->TestHashedKey('x'); + Slice key1 = this->TestKey(hkey1); + UniqueId64x2 hkey2 = this->TestHashedKey('y'); + Slice key2 = this->TestKey(hkey2); + UniqueId64x2 hkey3 = this->TestHashedKey('z'); + Slice key3 = this->TestKey(hkey3); HandleImpl* h1; - ASSERT_OK(shard_->Insert(key1, hkey1, &val, &kDeleteCounterHelper, 1, &h1, - Cache::Priority::HIGH)); + ASSERT_OK(shard.Insert(key1, hkey1, &val, &kDeleteCounterHelper, 1, &h1, + Cache::Priority::HIGH)); HandleImpl* h2; - ASSERT_OK(shard_->Insert(key2, hkey2, &val, &kDeleteCounterHelper, 1, &h2, - Cache::Priority::HIGH)); + ASSERT_OK(shard.Insert(key2, hkey2, &val, &kDeleteCounterHelper, 1, &h2, + Cache::Priority::HIGH)); HandleImpl* h3; - ASSERT_OK(shard_->Insert(key3, hkey3, &val, &kDeleteCounterHelper, 1, &h3, - Cache::Priority::HIGH)); + ASSERT_OK(shard.Insert(key3, hkey3, &val, &kDeleteCounterHelper, 1, &h3, + Cache::Priority::HIGH)); // Can repeatedly lookup+release despite the hash collision HandleImpl* tmp_h; for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key1, hkey1); + tmp_h = shard.Lookup(key1, hkey1); ASSERT_EQ(h1, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); - tmp_h = shard_->Lookup(key2, hkey2); + tmp_h = shard.Lookup(key2, hkey2); ASSERT_EQ(h2, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); - tmp_h = shard_->Lookup(key3, hkey3); + tmp_h = shard.Lookup(key3, hkey3); ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); } // Make h1 invisible - shard_->Erase(key1, hkey1); + shard.Erase(key1, hkey1); // Redundant erase - shard_->Erase(key1, hkey1); + shard.Erase(key1, hkey1); // All still alive ASSERT_EQ(val.deleted, 0); // Invisible to Lookup - tmp_h = shard_->Lookup(key1, hkey1); + tmp_h = shard.Lookup(key1, hkey1); ASSERT_EQ(nullptr, tmp_h); // Can still find h2, h3 for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key2, hkey2); + tmp_h = shard.Lookup(key2, hkey2); ASSERT_EQ(h2, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); - tmp_h = shard_->Lookup(key3, hkey3); + tmp_h = shard.Lookup(key3, hkey3); ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); } // Also Insert with invisible entry there - ASSERT_OK(shard_->Insert(key1, hkey1, &val, &kDeleteCounterHelper, 1, nullptr, - Cache::Priority::HIGH)); - tmp_h = shard_->Lookup(key1, hkey1); + ASSERT_OK(shard.Insert(key1, hkey1, &val, &kDeleteCounterHelper, 1, nullptr, + Cache::Priority::HIGH)); + tmp_h = shard.Lookup(key1, hkey1); // Found but distinct handle ASSERT_NE(nullptr, tmp_h); ASSERT_NE(h1, tmp_h); - ASSERT_TRUE(shard_->Release(tmp_h, /*erase_if_last_ref*/ true)); + ASSERT_TRUE(shard.Release(tmp_h, /*erase_if_last_ref*/ true)); // tmp_h deleted ASSERT_EQ(val.deleted--, 1); // Release last ref on h1 (already invisible) - ASSERT_TRUE(shard_->Release(h1, /*erase_if_last_ref*/ false)); + ASSERT_TRUE(shard.Release(h1, /*erase_if_last_ref*/ false)); // h1 deleted ASSERT_EQ(val.deleted--, 1); @@ -852,57 +872,57 @@ TEST_F(ClockCacheTest, CollidingInsertEraseTest) { // Can still find h2, h3 for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key2, hkey2); + tmp_h = shard.Lookup(key2, hkey2); ASSERT_EQ(h2, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); - tmp_h = shard_->Lookup(key3, hkey3); + tmp_h = shard.Lookup(key3, hkey3); ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); } // Release last ref on h2 - ASSERT_FALSE(shard_->Release(h2, /*erase_if_last_ref*/ false)); + ASSERT_FALSE(shard.Release(h2, /*erase_if_last_ref*/ false)); // h2 still not deleted (unreferenced in cache) ASSERT_EQ(val.deleted, 0); // Can still find it - tmp_h = shard_->Lookup(key2, hkey2); + tmp_h = shard.Lookup(key2, hkey2); ASSERT_EQ(h2, tmp_h); // Release last ref on h2, with erase - ASSERT_TRUE(shard_->Release(h2, /*erase_if_last_ref*/ true)); + ASSERT_TRUE(shard.Release(h2, /*erase_if_last_ref*/ true)); // h2 deleted ASSERT_EQ(val.deleted--, 1); - tmp_h = shard_->Lookup(key2, hkey2); + tmp_h = shard.Lookup(key2, hkey2); ASSERT_EQ(nullptr, tmp_h); // Can still find h3 for (bool erase_if_last_ref : {true, false}) { // but not last ref - tmp_h = shard_->Lookup(key3, hkey3); + tmp_h = shard.Lookup(key3, hkey3); ASSERT_EQ(h3, tmp_h); - ASSERT_FALSE(shard_->Release(tmp_h, erase_if_last_ref)); + ASSERT_FALSE(shard.Release(tmp_h, erase_if_last_ref)); } // Release last ref on h3, without erase - ASSERT_FALSE(shard_->Release(h3, /*erase_if_last_ref*/ false)); + ASSERT_FALSE(shard.Release(h3, /*erase_if_last_ref*/ false)); // h3 still not deleted (unreferenced in cache) ASSERT_EQ(val.deleted, 0); // Explicit erase - shard_->Erase(key3, hkey3); + shard.Erase(key3, hkey3); // h3 deleted ASSERT_EQ(val.deleted--, 1); - tmp_h = shard_->Lookup(key3, hkey3); + tmp_h = shard.Lookup(key3, hkey3); ASSERT_EQ(nullptr, tmp_h); } // This uses the public API to effectively test CalcHashBits etc. -TEST_F(ClockCacheTest, TableSizesTest) { +TYPED_TEST(ClockCacheTest, TableSizesTest) { for (size_t est_val_size : {1U, 5U, 123U, 2345U, 345678U}) { SCOPED_TRACE("est_val_size = " + std::to_string(est_val_size)); for (double est_count : {1.1, 2.2, 511.9, 512.1, 2345.0}) { diff --git a/cache/sharded_cache.h b/cache/sharded_cache.h index d78cfc246..5c42194d8 100644 --- a/cache/sharded_cache.h +++ b/cache/sharded_cache.h @@ -273,6 +273,14 @@ class ShardedCache : public ShardedCacheBase { } } + inline void ForEachShard( + const std::function& fn) const { + uint32_t num_shards = GetNumShards(); + for (uint32_t i = 0; i < num_shards; i++) { + fn(shards_ + i); + } + } + inline size_t SumOverShards( const std::function& fn) const { uint32_t num_shards = GetNumShards(); diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 83a027f5e..512a48149 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -741,10 +741,15 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) { int iterations_tested = 0; for (std::shared_ptr base_cache : {NewLRUCache(capacity, num_shard_bits), + // FixedHyperClockCache HyperClockCacheOptions( capacity, BlockBasedTableOptions().block_size /*estimated_value_size*/, num_shard_bits) + .MakeSharedCache(), + // AutoHyperClockCache + HyperClockCacheOptions(capacity, 0 /*estimated_value_size*/, + num_shard_bits) .MakeSharedCache()}) { if (!base_cache) { // Skip clock cache when not supported diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 2c3d89c80..459878ee0 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -129,12 +129,20 @@ std::shared_ptr StressTest::NewCache(size_t capacity, if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache" || - FLAGS_cache_type == "fixed_hyper_clock_cache") { - HyperClockCacheOptions opts(static_cast(capacity), - FLAGS_block_size /*estimated_entry_charge*/, + } else if (EndsWith(FLAGS_cache_type, "hyper_clock_cache")) { + size_t estimated_entry_charge; + if (FLAGS_cache_type == "fixed_hyper_clock_cache" || + FLAGS_cache_type == "hyper_clock_cache") { + estimated_entry_charge = FLAGS_block_size; + } else if (FLAGS_cache_type == "auto_hyper_clock_cache") { + estimated_entry_charge = 0; + } else { + fprintf(stderr, "Cache type not supported."); + exit(1); + } + HyperClockCacheOptions opts(FLAGS_cache_size, estimated_entry_charge, num_shard_bits); - opts.secondary_cache = std::move(secondary_cache); + opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); return opts.MakeSharedCache(); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts; diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 550b45859..304e4ebd7 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -416,6 +416,9 @@ struct HyperClockCacheOptions : public ShardedCacheOptions { // to estimate toward the lower side than the higher side. size_t estimated_entry_charge; + // FOR A FUTURE FEATURE (NOT YET USED) + size_t min_avg_entry_charge = 450; + HyperClockCacheOptions( size_t _capacity, size_t _estimated_entry_charge, int _num_shard_bits = -1, bool _strict_capacity_limit = false, diff --git a/test_util/secondary_cache_test_util.h b/test_util/secondary_cache_test_util.h index 2b0e68f68..5e2262a9c 100644 --- a/test_util/secondary_cache_test_util.h +++ b/test_util/secondary_cache_test_util.h @@ -43,13 +43,18 @@ class WithCacheType : public TestCreateContext { static constexpr auto kLRU = "lru"; static constexpr auto kFixedHyperClock = "fixed_hyper_clock"; + static constexpr auto kAutoHyperClock = "auto_hyper_clock"; // For options other than capacity size_t estimated_value_size_ = 1; virtual const std::string& Type() const = 0; - bool IsHyperClock() const { return Type() == kFixedHyperClock; } + static bool IsHyperClock(const std::string& type) { + return type == kFixedHyperClock || type == kAutoHyperClock; + } + + bool IsHyperClock() const { return IsHyperClock(Type()); } std::shared_ptr NewCache( size_t capacity, @@ -64,8 +69,11 @@ class WithCacheType : public TestCreateContext { } return lru_opts.MakeSharedCache(); } - if (type == kFixedHyperClock) { - HyperClockCacheOptions hc_opts{capacity, estimated_value_size_}; + if (IsHyperClock(type)) { + HyperClockCacheOptions hc_opts{ + capacity, type == kFixedHyperClock ? estimated_value_size_ : 0}; + hc_opts.min_avg_entry_charge = + std::max(size_t{1}, estimated_value_size_ / 2); hc_opts.hash_seed = 0; // deterministic tests if (modify_opts_fn) { modify_opts_fn(hc_opts); @@ -112,9 +120,11 @@ class WithCacheTypeParam : public WithCacheType, constexpr auto kLRU = WithCacheType::kLRU; constexpr auto kFixedHyperClock = WithCacheType::kFixedHyperClock; +constexpr auto kAutoHyperClock = WithCacheType::kAutoHyperClock; inline auto GetTestingCacheTypes() { - return testing::Values(std::string(kLRU), std::string(kFixedHyperClock)); + return testing::Values(std::string(kLRU), std::string(kFixedHyperClock), + std::string(kAutoHyperClock)); } } // namespace secondary_cache_test_util diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 6d2accd87..dd5abd589 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -3039,22 +3039,29 @@ class Benchmark { if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (FLAGS_cache_type == "hyper_clock_cache" || - FLAGS_cache_type == "fixed_hyper_clock_cache") { - HyperClockCacheOptions hcco{ - static_cast(capacity), - static_cast(FLAGS_block_size) /*estimated_entry_charge*/, - FLAGS_cache_numshardbits}; - hcco.hash_seed = GetCacheHashSeed(); + } else if (EndsWith(FLAGS_cache_type, "hyper_clock_cache")) { + size_t estimated_entry_charge; + if (FLAGS_cache_type == "fixed_hyper_clock_cache" || + FLAGS_cache_type == "hyper_clock_cache") { + estimated_entry_charge = FLAGS_block_size; + } else if (FLAGS_cache_type == "auto_hyper_clock_cache") { + estimated_entry_charge = 0; + } else { + fprintf(stderr, "Cache type not supported."); + exit(1); + } + HyperClockCacheOptions opts(FLAGS_cache_size, estimated_entry_charge, + FLAGS_cache_numshardbits); + opts.hash_seed = GetCacheHashSeed(); if (use_tiered_cache) { - TieredVolatileCacheOptions opts; - hcco.capacity += secondary_cache_opts.capacity; - opts.cache_type = PrimaryCacheType::kCacheTypeHCC; - opts.cache_opts = &hcco; - opts.comp_cache_opts = secondary_cache_opts; - return NewTieredVolatileCache(opts); + TieredVolatileCacheOptions tiered_opts; + opts.capacity += secondary_cache_opts.capacity; + tiered_opts.cache_type = PrimaryCacheType::kCacheTypeHCC; + tiered_opts.cache_opts = &opts; + tiered_opts.comp_cache_opts = secondary_cache_opts; + return NewTieredVolatileCache(tiered_opts); } else { - return hcco.MakeSharedCache(); + return opts.MakeSharedCache(); } } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts( diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 8ec54c9e6..9f6e841ef 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -123,7 +123,10 @@ "use_direct_reads": lambda: random.randint(0, 1), "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1), "mock_direct_io": False, - "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]), + "cache_type": lambda: random.choice( + ["lru_cache", "fixed_hyper_clock_cache", "auto_hyper_clock_cache", + "auto_hyper_clock_cache"] + ), "use_full_merge_v1": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1), # use_put_entity_one_in has to be the same across invocations for verification to work, hence no lambda From 793a786fa3c16a2be782024446bd3f8bb5162875 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 11 Aug 2023 19:59:56 -0700 Subject: [PATCH 039/386] Fix for unchecked status in CancelAllBackgroundWork (#11699) Summary: ## Summary PR https://github.com/facebook/rocksdb/issues/11497 introduced this. Status from `CancelPeriodicTaskScheduler()` is unchecked and causing test failure like https://app.circleci.com/pipelines/github/facebook/rocksdb/30743/workflows/24443a9b-6fc3-41e6-86c1-992d766eb1ec/jobs/642419 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11699 Test Plan: Existing tests Reviewed By: cbi42 Differential Revision: D48287188 Pulled By: jaykorean fbshipit-source-id: b6bcf6e3c3c47f126c34c24a3dfed2649635cc8c --- db/db_impl/db_impl.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 66d9be161..93681c70b 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -498,13 +498,14 @@ void DBImpl::WaitForBackgroundWork() { void DBImpl::CancelAllBackgroundWork(bool wait) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Shutdown: canceling all background work"); - CancelPeriodicTaskScheduler(); + Status s = CancelPeriodicTaskScheduler(); + s.PermitUncheckedError(); + InstrumentedMutexLock l(&mutex_); if (!shutting_down_.load(std::memory_order_acquire) && has_unpersisted_data_.load(std::memory_order_relaxed) && !mutable_db_options_.avoid_flush_during_shutdown) { - Status s = - DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kShutDown); + s = DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kShutDown); s.PermitUncheckedError(); //**TODO: What to do on error? } From a09c141dde51372d14bcfd3affdd242f1248c761 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 14 Aug 2023 14:57:28 -0700 Subject: [PATCH 040/386] In TestIterateAgainstExpected(), verify iterator moves in expected direction (#11698) Summary: It's a bit repetitive in order to give reasonably informative error messages. I also removed total_order_seek in cases where it's not needed, just to make sure a case that shouldn't matter really doesn't. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11698 Test Plan: run it - ``` $ DEBUG_LEVEL=0 TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --max_key=100000 --duration=86400 --interval=10 --write_buffer_size=524288 --target_file_size_base=524288 --max_bytes_for_level_base=2097152 --compression_type=none --blob_compression_type=none --writepercent=50 -iterpercent=45 -readpercent=0 -prefixpercent=0 --prefix_size=0 --verify_iterator_with_expected_state_one_in=10 --test_batches_snapshots=0 -enable_compaction_filter=0 ``` Reviewed By: cbi42 Differential Revision: D48285036 Pulled By: ajkr fbshipit-source-id: 51b147bd7c8011740629ae2fd8114d3d48ce7137 --- db_stress_tool/no_batched_ops_stress.cc | 85 ++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index c0159df4f..f00db51de 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1599,7 +1599,9 @@ class NonBatchedOpsStressTest : public StressTest { } ReadOptions ro(read_opts); - ro.total_order_seek = true; + if (FLAGS_prefix_size > 0) { + ro.total_order_seek = true; + } std::string read_ts_str; Slice read_ts; @@ -1667,6 +1669,7 @@ class NonBatchedOpsStressTest : public StressTest { }; auto check_no_key_in_range = [&](int64_t start, int64_t end) { + assert(start <= end); for (auto j = std::max(start, lb); j < std::min(end, ub); ++j) { std::size_t index = static_cast(j - lb); assert(index < pre_read_expected_values.size() && @@ -1709,6 +1712,7 @@ class NonBatchedOpsStressTest : public StressTest { uint64_t curr = 0; while (true) { + assert(last_key < ub); if (!iter->Valid()) { if (!iter->status().ok()) { thread->shared->SetVerificationFailure(); @@ -1731,6 +1735,18 @@ class NonBatchedOpsStressTest : public StressTest { // iter is valid, the range (last_key, current key) was skipped GetIntVal(iter->key().ToString(), &curr); + if (static_cast(curr) <= last_key) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, + "TestIterateAgainstExpected found unexpectedly small key\n"); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + fprintf(stderr, "Last op found key: %s, expected at least: %s\n", + Slice(Key(curr)).ToString(true).c_str(), + Slice(Key(last_key + 1)).ToString(true).c_str()); + thread->stats.AddErrors(1); + return Status::OK(); + } if (!check_no_key_in_range(last_key + 1, static_cast(curr))) { return Status::OK(); } @@ -1753,6 +1769,7 @@ class NonBatchedOpsStressTest : public StressTest { last_key = ub; while (true) { + assert(lb < last_key); if (!iter->Valid()) { if (!iter->status().ok()) { thread->shared->SetVerificationFailure(); @@ -1775,6 +1792,18 @@ class NonBatchedOpsStressTest : public StressTest { // the range (current key, last key) was skipped GetIntVal(iter->key().ToString(), &curr); + if (last_key <= static_cast(curr)) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, + "TestIterateAgainstExpected found unexpectedly large key\n"); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + fprintf(stderr, "Last op found key: %s, expected at most: %s\n", + Slice(Key(curr)).ToString(true).c_str(), + Slice(Key(last_key - 1)).ToString(true).c_str()); + thread->stats.AddErrors(1); + return Status::OK(); + } if (!check_no_key_in_range(static_cast(curr + 1), last_key)) { return Status::OK(); } @@ -1823,6 +1852,20 @@ class NonBatchedOpsStressTest : public StressTest { if (!check_no_key_in_range(mid, ub)) { return Status::OK(); } + } else if (iter->Valid()) { + GetIntVal(iter->key().ToString(), &curr); + if (static_cast(curr) < mid) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, + "TestIterateAgainstExpected found unexpectedly small key\n"); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + fprintf(stderr, "Last op found key: %s, expected at least: %s\n", + Slice(Key(curr)).ToString(true).c_str(), + Slice(Key(mid)).ToString(true).c_str()); + thread->stats.AddErrors(1); + return Status::OK(); + } } } else { iter->SeekForPrev(key); @@ -1832,6 +1875,20 @@ class NonBatchedOpsStressTest : public StressTest { if (!check_no_key_in_range(lb, mid + 1)) { return Status::OK(); } + } else if (iter->Valid()) { + GetIntVal(iter->key().ToString(), &curr); + if (mid < static_cast(curr)) { + thread->shared->SetVerificationFailure(); + fprintf(stderr, + "TestIterateAgainstExpected found unexpectedly large key\n"); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + fprintf(stderr, "Last op found key: %s, expected at most: %s\n", + Slice(Key(curr)).ToString(true).c_str(), + Slice(Key(mid)).ToString(true).c_str()); + thread->stats.AddErrors(1); + return Status::OK(); + } } } @@ -1879,6 +1936,19 @@ class NonBatchedOpsStressTest : public StressTest { } uint64_t next = 0; GetIntVal(iter->key().ToString(), &next); + if (next <= curr) { + thread->shared->SetVerificationFailure(); + fprintf( + stderr, + "TestIterateAgainstExpected found unexpectedly small key\n"); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + fprintf(stderr, "Last op found key: %s, expected at least: %s\n", + Slice(Key(next)).ToString(true).c_str(), + Slice(Key(curr + 1)).ToString(true).c_str()); + thread->stats.AddErrors(1); + return Status::OK(); + } if (!check_no_key_in_range(static_cast(curr + 1), static_cast(next))) { return Status::OK(); @@ -1891,6 +1961,19 @@ class NonBatchedOpsStressTest : public StressTest { } uint64_t prev = 0; GetIntVal(iter->key().ToString(), &prev); + if (curr <= prev) { + thread->shared->SetVerificationFailure(); + fprintf( + stderr, + "TestIterateAgainstExpected found unexpectedly large key\n"); + fprintf(stderr, "Column family: %s, op_logs: %s\n", + cfh->GetName().c_str(), op_logs.c_str()); + fprintf(stderr, "Last op found key: %s, expected at most: %s\n", + Slice(Key(prev)).ToString(true).c_str(), + Slice(Key(curr - 1)).ToString(true).c_str()); + thread->stats.AddErrors(1); + return Status::OK(); + } if (!check_no_key_in_range(static_cast(prev + 1), static_cast(curr))) { return Status::OK(); From 6a3da5635e1013f03930453481f49724f2319252 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 14 Aug 2023 22:04:18 -0700 Subject: [PATCH 041/386] Add documentation to some formatting util functions (#11674) Summary: As titled, mostly adding documentation. While updating one usage of these util functions in the external file ingestion job based on code inspection. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11674 Test Plan: ``` make check ``` Note that no unit test was added or updated to check the change in the external file ingestion flow works. This is because user-defined timestamp doesn't support bulk loading yet. There could be other missing pieces that are needed to make this flow functional and testable. That work is separately tracked and unit tests will be added then. Reviewed By: cbi42 Differential Revision: D48271338 Pulled By: jowlyzhang fbshipit-source-id: c05c3440f1c08632dd0de51b563a30b44b4eb8b5 --- db/dbformat.cc | 7 ++++ db/dbformat.h | 59 +++++++++++++++++++++++++++ db/external_sst_file_ingestion_job.cc | 2 +- include/rocksdb/db.h | 5 ++- 4 files changed, 70 insertions(+), 3 deletions(-) diff --git a/db/dbformat.cc b/db/dbformat.cc index 2d24c8953..63bb354de 100644 --- a/db/dbformat.cc +++ b/db/dbformat.cc @@ -88,6 +88,13 @@ void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key, result->append(kTsMax.data(), ts_sz); } +void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key, + size_t ts_sz) { + assert(ts_sz > 0); + result->append(key.data(), key.size() - ts_sz); + result->append(ts_sz, static_cast(0)); +} + void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key, size_t ts_sz) { assert(ts_sz > 0); diff --git a/db/dbformat.h b/db/dbformat.h index d0b3bbaaa..6ce1bafb8 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -168,10 +168,18 @@ inline void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, EntryType GetEntryType(ValueType value_type); // Append the serialization of "key" to *result. +// +// input [internal key]: +// output before: empty +// output: void AppendInternalKey(std::string* result, const ParsedInternalKey& key); // Append the serialization of "key" to *result, replacing the original // timestamp with argument ts. +// +// input [internal key]: +// output before: empty +// output after: void AppendInternalKeyWithDifferentTimestamp(std::string* result, const ParsedInternalKey& key, const Slice& ts); @@ -179,37 +187,73 @@ void AppendInternalKeyWithDifferentTimestamp(std::string* result, // Serialized internal key consists of user key followed by footer. // This function appends the footer to *result, assuming that *result already // contains the user key at the end. +// +// output before: +// output after: void AppendInternalKeyFooter(std::string* result, SequenceNumber s, ValueType t); // Append the key and a minimal timestamp to *result +// +// input [user key without ts]: +// output before: empty +// output after: void AppendKeyWithMinTimestamp(std::string* result, const Slice& key, size_t ts_sz); // Append the key and a maximal timestamp to *result +// +// input [user key without ts]: +// output before: empty +// output after: void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key, size_t ts_sz); +// `key` is a user key with timestamp. Append the user key without timestamp +// and the minimum timestamp to *result. +// +// input [user key]: +// output before: empty +// output after: +void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key, + size_t ts_sz); + // `key` is a user key with timestamp. Append the user key without timestamp // and the maximal timestamp to *result. +// +// input [user key]: +// output before: empty +// output after: void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key, size_t ts_sz); // `key` is an internal key containing a user key without timestamp. Create a // new key in *result by padding a min timestamp of size `ts_sz` to the user key // and copying the remaining internal key bytes. +// +// input [internal key]: +// output before: empty +// output after: void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key, size_t ts_sz); // `key` is an internal key containing a user key with timestamp of size // `ts_sz`. Create a new internal key in *result by stripping the timestamp from // the user key and copying the remaining internal key bytes. +// +// input [internal key]: +// output before: empty +// output after: void StripTimestampFromInternalKey(std::string* result, const Slice& key, size_t ts_sz); // `key` is an internal key containing a user key with timestamp of size // `ts_sz`. Create a new internal key in *result while replace the original // timestamp with min timestamp. +// +// input [internal key]: +// output before: empty +// output after: void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key, size_t ts_sz); @@ -221,11 +265,16 @@ Status ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result, bool log_err_key); // Returns the user key portion of an internal key. +// +// input [internal key]: +// output: inline Slice ExtractUserKey(const Slice& internal_key) { assert(internal_key.size() >= kNumInternalBytes); return Slice(internal_key.data(), internal_key.size() - kNumInternalBytes); } +// input [internal key]: +// output : inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key, size_t ts_sz) { Slice ret = internal_key; @@ -233,17 +282,23 @@ inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key, return ret; } +// input [user key]: +// output: inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { Slice ret = user_key; ret.remove_suffix(ts_sz); return ret; } +// input [user key]: +// output: inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) { assert(user_key.size() >= ts_sz); return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz); } +// input [internal key]: +// output: inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) { const size_t key_size = internal_key.size(); assert(key_size >= kNumInternalBytes + ts_sz); @@ -251,12 +306,16 @@ inline Slice ExtractTimestampFromKey(const Slice& internal_key, size_t ts_sz) { ts_sz); } +// input [internal key]: +// output: inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) { assert(internal_key.size() >= kNumInternalBytes); const size_t n = internal_key.size(); return DecodeFixed64(internal_key.data() + n - kNumInternalBytes); } +// input [internal key]: +// output: inline ValueType ExtractValueType(const Slice& internal_key) { uint64_t num = ExtractInternalKeyFooter(internal_key); unsigned char c = num & 0xff; diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index d25c42118..9756f47aa 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -349,7 +349,7 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed, std::string end_str; AppendUserKeyWithMaxTimestamp( &begin_str, file_to_ingest.smallest_internal_key.user_key(), ts_sz); - AppendKeyWithMinTimestamp( + AppendUserKeyWithMinTimestamp( &end_str, file_to_ingest.largest_internal_key.user_key(), ts_sz); keys.emplace_back(std::move(begin_str)); keys.emplace_back(std::move(end_str)); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 6c15213fa..2c8644409 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1361,8 +1361,9 @@ class DB { // the files. In this case, client could set options.change_level to true, to // move the files back to the minimum level capable of holding the data set // or a given level (specified by non-negative options.target_level). - // In case of user_defined timestamp, if enabled, `start` and `end` should - // point to key without timestamp part. + // + // In case of user-defined timestamp, if enabled, `begin` and `end` should + // not contain timestamp. virtual Status CompactRange(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end) = 0; From 407efb021c82d60b8ead34e725f7dfa901bd2ee5 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 15 Aug 2023 13:44:13 -0700 Subject: [PATCH 042/386] Expose the root comparator for built-in With64Ts comparators (#11704) Summary: As titled. User-defined timestamp feature users sometimes directly call the user comparator to do validation on their side too. Having access to the root comparator can help make their code consistent for when UDT is enabled and disabled. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11704 Reviewed By: ltamasi Differential Revision: D48355090 Pulled By: jowlyzhang fbshipit-source-id: 26bc73543bfb379ef548d1361803d6f8c308cef6 --- util/comparator.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/util/comparator.cc b/util/comparator.cc index d0ff1f7aa..e573f5e85 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -250,6 +250,12 @@ class ComparatorWithU64TsImpl : public Comparator { const char* Name() const override { return kClassName(); } + // The comparator that compares the user key without timestamp part is treated + // as the root comparator. + const Comparator* GetRootComparator() const override { + return &cmp_without_ts_; + } + void FindShortSuccessor(std::string*) const override {} void FindShortestSeparator(std::string*, const Slice&) const override {} int Compare(const Slice& a, const Slice& b) const override { From b63018fb59b3736b7a2e597dc934da0693c16788 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Tue, 15 Aug 2023 16:13:13 -0700 Subject: [PATCH 043/386] Wide Column Ingestion in CrashTest (#11697) Summary: `PutEntity` is now supported in SST file writer (https://github.com/facebook/rocksdb/issues/11688). This PR enables ingestion of wide column data in the stress/crash tests. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11697 Test Plan: ``` python3 tools/db_crashtest.py blackbox --simple --duration=300 --ingest_external_file_one_in=2 --use_put_entity_one_in=2 --max_key=1048576 -write_buffer_size=1048576 -target_file_size_base=1048576 -max_bytes_for_level_base=4194304 --interval=10 -value_size_mult=33 -column_families=1 -reopen=0 --key_len_percent_dist="1,30,69" ``` Reviewed By: ltamasi Differential Revision: D48370719 Pulled By: jaykorean fbshipit-source-id: 5855d3112b37b2fb300d05e6df110d899855d77d --- db_stress_tool/db_stress_tool.cc | 7 +++---- db_stress_tool/no_batched_ops_stress.cc | 16 ++++++++++++++-- tools/db_crashtest.py | 3 +-- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index f2b124b4c..787efe47d 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -308,11 +308,10 @@ int db_stress_tool(int argc, char** argv) { } if (FLAGS_use_put_entity_one_in > 0 && - (FLAGS_ingest_external_file_one_in > 0 || FLAGS_use_merge || - FLAGS_use_full_merge_v1 || FLAGS_use_txn || FLAGS_test_multi_ops_txns || - FLAGS_user_timestamp_size > 0)) { + (FLAGS_use_merge || FLAGS_use_full_merge_v1 || FLAGS_use_txn || + FLAGS_test_multi_ops_txns || FLAGS_user_timestamp_size > 0)) { fprintf(stderr, - "PutEntity is currently incompatible with SstFileWriter, Merge," + "PutEntity is currently incompatible with Merge," " transactions, and user-defined timestamps\n"); exit(1); } diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index f00db51de..f822a7663 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1538,9 +1538,21 @@ class NonBatchedOpsStressTest : public StressTest { pending_expected_values.push_back(pending_expected_value); char value[100]; - size_t value_len = GenerateValue(value_base, value, sizeof(value)); auto key_str = Key(key); - s = sst_file_writer.Put(Slice(key_str), Slice(value, value_len)); + const size_t value_len = GenerateValue(value_base, value, sizeof(value)); + const Slice k(key_str); + const Slice v(value, value_len); + + const bool use_put_entity = + !FLAGS_use_merge && FLAGS_use_put_entity_one_in > 0 && + (value_base % FLAGS_use_put_entity_one_in) == 0; + + if (use_put_entity) { + WideColumns columns = GenerateWideColumns(value_base, v); + s = sst_file_writer.PutEntity(k, columns); + } else { + s = sst_file_writer.Put(k, v); + } } if (s.ok() && keys.empty()) { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 9f6e841ef..e7e772910 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -657,9 +657,8 @@ def finalize_and_sanitize(src_params): if dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0: dest_params["sync_fault_injection"] = 0 dest_params["manual_wal_flush_one_in"] = 0 - # PutEntity is currently not supported by SstFileWriter or in conjunction with Merge + # PutEntity is currently incompatible with Merge if dest_params["use_put_entity_one_in"] != 0: - dest_params["ingest_external_file_one_in"] = 0 dest_params["use_merge"] = 0 dest_params["use_full_merge_v1"] = 0 if dest_params["file_checksum_impl"] == "none": From 0b6ee88d51b4742c9a623df58587773ae2ca0de5 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 16 Aug 2023 09:43:20 -0700 Subject: [PATCH 044/386] clarify TODO for whitebox disable_wal=1 in db_crashtest.py (#11665) Summary: See https://github.com/facebook/rocksdb/issues/11613 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11665 Reviewed By: hx235 Differential Revision: D48010507 Pulled By: ajkr fbshipit-source-id: 65c6d87d2c6ffc9d25f1d17106eae467ec528082 --- tools/db_crashtest.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index e7e772910..d7baab793 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -323,10 +323,14 @@ def is_direct_io_supported(dbname): } whitebox_default_params = { - # TODO: enable this once we figure out how to adjust kill odds for WAL- - # disabled runs, and either (1) separate full `db_stress` runs out of - # whitebox crash or (2) support verification at end of `db_stress` runs - # that ran with WAL disabled. + # TODO: enable this at random once we figure out two things. First, we need + # to ensure the kill odds in WAL-disabled runs result in regular crashing + # before the fifteen minute timeout. When WAL is disabled there are very few + # calls to write functions since writes to SST files are buffered and other + # writes (e.g., MANIFEST) are infrequent. Crashing in reasonable time might + # currently assume killpoints in write functions are reached frequently. + # + # Second, we need to make sure disabling WAL works with `-reopen > 0`. "disable_wal": 0, "duration": 10000, "log2_keys_per_lock": 10, From d1ff401472e4a4f27d2cb9217d8950a4a42ab13d Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 16 Aug 2023 17:45:44 -0700 Subject: [PATCH 045/386] Delay bottommost level single file compactions (#11701) Summary: For leveled compaction, RocksDB has a special kind of compaction with reason "kBottommmostFiles" that compacts bottommost level files to clear data held by snapshots (more detail in https://github.com/facebook/rocksdb/issues/3009). Such compactions can happen soon after a relevant snapshot is released. For some use cases, a bottommost file may contain only a small amount of keys that can be cleared, so compacting such a file has a high write amp. In addition, these bottommost files may be compacted in compactions with reason other than "kBottommmostFiles" if we wait for some time (so that enough data is ingested to trigger such a compaction). This PR introduces an option `bottommost_file_compaction_delay` to specify the delay of these bottommost level single file compactions. * The main change is in `VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction()` where we only add a file to `bottommost_files_marked_for_compaction_` if it oldest_snapshot is larger than its non-zero largest_seqno **and** the file is old enough. Note that if a file is not old enough but its largest_seqno is less than oldest_snapshot, we exclude it from the calculation of `bottommost_files_mark_threshold_`. This makes the change simpler, but such a file's eligibility for compaction will only be checked the next time `ComputeBottommostFilesMarkedForCompaction()` is called. This happens when a new Version is created (compaction, flush, SetOptions()...), a new enough snapshot is released (`VersionStorageInfo::UpdateOldestSnapshot()`) or when a compaction is picked and compaction score has to be re-calculated. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11701 Test Plan: * Add two unit tests to test when bottommost_file_compaction_delay > 0. * Ran crash test with the new option. Reviewed By: jaykorean, ajkr Differential Revision: D48331564 Pulled By: cbi42 fbshipit-source-id: c584f3dc5f6354fce3ed65f4c6366dc450b15ba8 --- db/compaction/compaction_picker_test.cc | 6 +- db/db_compaction_test.cc | 82 ++++++++++- db/db_impl/db_impl.h | 4 +- db/import_column_family_job.cc | 3 +- db/repair.cc | 3 +- db/version_builder_test.cc | 139 ++++++++++-------- db/version_set.cc | 51 ++++++- db/version_set.h | 9 +- db/version_set_test.cc | 4 +- db_stress_tool/db_stress_common.h | 2 + db_stress_tool/db_stress_gflags.cc | 4 + db_stress_tool/db_stress_test_base.cc | 3 + include/rocksdb/advanced_options.h | 14 ++ options/cf_options.cc | 6 + options/cf_options.h | 5 +- options/options_helper.cc | 2 + options/options_settable_test.cc | 3 +- tools/db_crashtest.py | 2 + ...delay_bottommost_single_file_compaction.md | 1 + 19 files changed, 265 insertions(+), 78 deletions(-) create mode 100644 unreleased_history/new_features/delay_bottommost_single_file_compaction.md diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 6aec03840..2684f62e6 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -84,7 +84,8 @@ class CompactionPickerTestBase : public testing::Test { options_.num_levels = num_levels; vstorage_.reset(new VersionStorageInfo( &icmp_, ucmp_, options_.num_levels, style, nullptr, false, - EpochNumberRequirement::kMustPresent)); + EpochNumberRequirement::kMustPresent, ioptions_.clock, + options_.bottommost_file_compaction_delay)); vstorage_->PrepareForVersionAppend(ioptions_, mutable_cf_options_); } @@ -93,7 +94,8 @@ class CompactionPickerTestBase : public testing::Test { void AddVersionStorage() { temp_vstorage_.reset(new VersionStorageInfo( &icmp_, ucmp_, options_.num_levels, ioptions_.compaction_style, - vstorage_.get(), false, EpochNumberRequirement::kMustPresent)); + vstorage_.get(), false, EpochNumberRequirement::kMustPresent, + ioptions_.clock, options_.bottommost_file_compaction_delay)); } void DeleteVersionStorage() { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index d6f9f25fb..0d23a76e5 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -4126,11 +4126,6 @@ TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { // files does not need to be preserved in case of a future snapshot. ASSERT_OK(Put(Key(0), "val")); ASSERT_NE(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); - // release snapshot and wait for compactions to finish. Single-file - // compactions should be triggered, which reduce the size of each bottom-level - // file without changing file count. - db_->ReleaseSnapshot(snapshot); - ASSERT_EQ(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { Compaction* compaction = reinterpret_cast(arg); @@ -4138,6 +4133,11 @@ TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { CompactionReason::kBottommostFiles); }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + // release snapshot and wait for compactions to finish. Single-file + // compactions should be triggered, which reduce the size of each bottom-level + // file without changing file count. + db_->ReleaseSnapshot(snapshot); + ASSERT_EQ(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); ASSERT_OK(dbfull()->TEST_WaitForCompact()); db_->GetLiveFilesMetaData(&post_release_metadata); ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); @@ -4154,6 +4154,78 @@ TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBCompactionTest, DelayCompactBottomLevelFilesWithDeletions) { + // bottom-level files may contain deletions due to snapshots protecting the + // deleted keys. Once the snapshot is released and the files are old enough, + // we should see them undergo single-file compactions. + Options options = CurrentOptions(); + env_->SetMockSleep(); + options.bottommost_file_compaction_delay = 3600; + DestroyAndReopen(options); + CreateColumnFamilies({"one"}, options); + const int kNumKey = 100; + const int kValLen = 100; + + Random rnd(301); + for (int i = 0; i < kNumKey; ++i) { + ASSERT_OK(Put(Key(i), rnd.RandomString(kValLen))); + } + const Snapshot* snapshot = db_->GetSnapshot(); + for (int i = 0; i < kNumKey; i += 2) { + ASSERT_OK(Delete(Key(i))); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + ASSERT_EQ(1, NumTableFilesAtLevel(1)); + + std::vector pre_release_metadata; + db_->GetLiveFilesMetaData(&pre_release_metadata); + ASSERT_EQ(1, pre_release_metadata.size()); + std::atomic_int compaction_count = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(compaction->compaction_reason() == + CompactionReason::kBottommostFiles); + compaction_count++; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST + // files does not need to be preserved in case of a future snapshot. + ASSERT_OK(Put(Key(0), "val")); + ASSERT_NE(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); + // release snapshot will not trigger compaction. + db_->ReleaseSnapshot(snapshot); + ASSERT_EQ(kMaxSequenceNumber, dbfull()->bottommost_files_mark_threshold_); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(0, compaction_count); + // Now the file is old enough for compaction. + env_->MockSleepForSeconds(3600); + // Another flush will trigger re-computation of the compaction score + // to find out that the file is qualified for compaction. + ASSERT_OK(Flush()); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(1, compaction_count); + + std::vector post_release_metadata; + db_->GetLiveFilesMetaData(&post_release_metadata); + ASSERT_EQ(2, post_release_metadata.size()); + + const auto& pre_file = pre_release_metadata[0]; + // Get the L1 (bottommost level) file. + const auto& post_file = post_release_metadata[0].level == 0 + ? post_release_metadata[1] + : post_release_metadata[0]; + + ASSERT_EQ(1, pre_file.level); + ASSERT_EQ(1, post_file.level); + // the file is smaller than it was before as it was rewritten without + // deletion markers/deleted keys. + ASSERT_LT(post_file.size, pre_file.size); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBCompactionTest, NoCompactBottomLevelFilesWithDeletions) { // bottom-level files may contain deletions due to snapshots protecting the // deleted keys. Once the snapshot is released, we should see files with many diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index b3a884783..50aec5804 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1580,12 +1580,14 @@ class DBImpl : public DB { friend class ForwardIterator; friend struct SuperVersion; friend class CompactedDBImpl; +#ifndef NDEBUG friend class DBTest_ConcurrentFlushWAL_Test; friend class DBTest_MixedSlowdownOptionsStop_Test; friend class DBCompactionTest_CompactBottomLevelFilesWithDeletions_Test; friend class DBCompactionTest_CompactionDuringShutdown_Test; + friend class DBCompactionTest_DelayCompactBottomLevelFilesWithDeletions_Test; + friend class DBCompactionTest_DisableCompactBottomLevelFiles_Test; friend class StatsHistoryTest_PersistentStatsCreateColumnFamilies_Test; -#ifndef NDEBUG friend class DBTest2_ReadCallbackTest_Test; friend class WriteCallbackPTest_WriteWithCallbackTest_Test; friend class XFTransactionWriteHandler; diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 11c5fd41b..32bc4eead 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -185,7 +185,8 @@ Status ImportColumnFamilyJob::Run() { &cfd_->internal_comparator(), cfd_->user_comparator(), cfd_->NumberLevels(), cfd_->ioptions()->compaction_style, nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks, - EpochNumberRequirement::kMightMissing); + EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock, + cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay); Status s; for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) { diff --git a/db/repair.cc b/db/repair.cc index 4b28ec2cd..1af738fca 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -692,7 +692,8 @@ class Repairer { &cfd->internal_comparator(), cfd->user_comparator(), cfd->NumberLevels(), cfd->ioptions()->compaction_style, nullptr /* src_vstorage */, cfd->ioptions()->force_consistency_checks, - EpochNumberRequirement::kMightMissing); + EpochNumberRequirement::kMightMissing, cfd->ioptions()->clock, + /*bottommost_file_compaction_delay=*/0); Status s; VersionEdit dummy_edit; for (const auto* table : cf_id_and_tables.second) { diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index ac80be7ca..34db9aba8 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -37,7 +37,8 @@ class VersionBuilderTest : public testing::Test { ioptions_(options_), mutable_cf_options_(options_), vstorage_(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, - nullptr, false), + nullptr, false, EpochNumberRequirement::kMustPresent, + ioptions_.clock, options_.bottommost_file_compaction_delay), file_num_(1) { mutable_cf_options_.RefreshDerivedOptions(ioptions_); size_being_compacted_.resize(options_.num_levels); @@ -199,8 +200,9 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) { VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -249,8 +251,9 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -303,8 +306,9 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -359,8 +363,9 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -381,8 +386,9 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, false); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, + EpochNumberRequirement::kMightMissing, nullptr, 0); VersionEdit version_edit; version_edit.AddFile( @@ -548,9 +554,10 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { ASSERT_OK(builder.Apply(&addition)); constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -692,9 +699,10 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { ASSERT_OK(builder.Apply(&deletion)); constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -736,9 +744,10 @@ TEST_F(VersionBuilderTest, ApplyBlobFileAddition) { ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -875,9 +884,10 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -948,9 +958,10 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) { ASSERT_OK(builder.Apply(&garbage)); constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1128,9 +1139,10 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = false; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1176,9 +1188,10 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { ASSERT_OK(second_builder.Apply(&second_edit)); - VersionStorageInfo newer_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &new_vstorage, - force_consistency_checks); + VersionStorageInfo newer_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &new_vstorage, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(second_builder.SaveTo(&newer_vstorage)); @@ -1262,9 +1275,10 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { ASSERT_OK(builder.Apply(&edit)); constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1365,9 +1379,10 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1404,9 +1419,10 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesInconsistentLinks) { // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1445,9 +1461,10 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbage) { // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1494,9 +1511,10 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbageLinkedSsts) { // Save to a new version in order to trigger consistency checks. constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1657,9 +1675,10 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { ASSERT_OK(builder.Apply(&edit2)); constexpr bool force_consistency_checks = true; - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, &vstorage_, - force_consistency_checks); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, + force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, + 0); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1708,9 +1727,10 @@ TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { VersionBuilder version_builder(env_options, &ioptions_, table_cache, &vstorage_, version_set); - VersionStorageInfo new_vstorage(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, - true /* force_consistency_checks */); + VersionStorageInfo new_vstorage( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, + true /* force_consistency_checks */, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -1718,9 +1738,10 @@ TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { VersionBuilder version_builder2(env_options, &ioptions_, table_cache, &new_vstorage, version_set); - VersionStorageInfo new_vstorage2(&icmp_, ucmp_, options_.num_levels, - kCompactionStyleLevel, nullptr, - true /* force_consistency_checks */); + VersionStorageInfo new_vstorage2( + &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, + true /* force_consistency_checks */, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_NOK(version_builder2.Apply(&version_edit)); UnrefFilesInVersion(&new_vstorage); @@ -1758,7 +1779,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { nullptr /* file_metadata_cache_res_mgr */); VersionStorageInfo new_vstorage_1( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, - nullptr /* src_vstorage */, true /* force_consistency_checks */); + nullptr /* src_vstorage */, true /* force_consistency_checks */, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder_1.Apply(&version_edit_1)); s = version_builder_1.SaveTo(&new_vstorage_1); @@ -1795,7 +1817,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { nullptr /* file_metadata_cache_res_mgr */); VersionStorageInfo new_vstorage_2( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, - nullptr /* src_vstorage */, true /* force_consistency_checks */); + nullptr /* src_vstorage */, true /* force_consistency_checks */, + EpochNumberRequirement::kMightMissing, nullptr, 0); ASSERT_OK(version_builder_2.Apply(&version_edit_2)); s = version_builder_2.SaveTo(&new_vstorage_2); diff --git a/db/version_set.cc b/db/version_set.cc index 68646b122..c5057028d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2114,7 +2114,8 @@ VersionStorageInfo::VersionStorageInfo( const Comparator* user_comparator, int levels, CompactionStyle compaction_style, VersionStorageInfo* ref_vstorage, bool _force_consistency_checks, - EpochNumberRequirement epoch_number_requirement) + EpochNumberRequirement epoch_number_requirement, SystemClock* clock, + uint32_t bottommost_file_compaction_delay) : internal_comparator_(internal_comparator), user_comparator_(user_comparator), // cfd is nullptr if Version is dummy @@ -2142,6 +2143,8 @@ VersionStorageInfo::VersionStorageInfo( current_num_deletions_(0), current_num_samples_(0), estimated_compaction_needed_bytes_(0), + clock_(clock), + bottommost_file_compaction_delay_(bottommost_file_compaction_delay), finalized_(false), force_consistency_checks_(_force_consistency_checks), epoch_number_requirement_(epoch_number_requirement) { @@ -2186,7 +2189,11 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset, ? nullptr : cfd_->current()->storage_info(), cfd_ == nullptr ? false : cfd_->ioptions()->force_consistency_checks, - epoch_number_requirement), + epoch_number_requirement, + cfd_ == nullptr ? nullptr : cfd_->ioptions()->clock, + cfd_ == nullptr + ? 0 + : mutable_cf_options.bottommost_file_compaction_delay), vset_(vset), next_(this), prev_(this), @@ -4178,14 +4185,48 @@ void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum) { void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction() { bottommost_files_marked_for_compaction_.clear(); bottommost_files_mark_threshold_ = kMaxSequenceNumber; + // If a file's creation time is larger than creation_time_ub, + // it is too new to be marked for compaction. + int64_t creation_time_ub = 0; + bool needs_delay = bottommost_file_compaction_delay_ > 0; + if (needs_delay) { + int64_t current_time = 0; + clock_->GetCurrentTime(¤t_time).PermitUncheckedError(); + // Note that if GetCurrentTime() fails, current_time will be 0. + // We will treat it as is and treat all files as too new. + // The subtraction will not underflow since + // bottommost_file_compaction_delay_ is of type uint32_t. + creation_time_ub = + current_time - static_cast(bottommost_file_compaction_delay_); + } + for (auto& level_and_file : bottommost_files_) { if (!level_and_file.second->being_compacted && level_and_file.second->fd.largest_seqno != 0) { // largest_seqno might be nonzero due to containing the final key in an - // earlier compaction, whose seqnum we didn't zero out. Multiple deletions - // ensures the file really contains deleted or overwritten keys. + // earlier compaction, whose seqnum we didn't zero out. if (level_and_file.second->fd.largest_seqno < oldest_snapshot_seqnum_) { - bottommost_files_marked_for_compaction_.push_back(level_and_file); + if (!needs_delay) { + bottommost_files_marked_for_compaction_.push_back(level_and_file); + } else if (creation_time_ub > 0) { + int64_t creation_time = static_cast( + level_and_file.second->TryGetFileCreationTime()); + if (creation_time == kUnknownFileCreationTime || + creation_time <= creation_time_ub) { + bottommost_files_marked_for_compaction_.push_back(level_and_file); + } else { + // Just ignore this file for both + // bottommost_files_marked_for_compaction_ and + // bottommost_files_mark_threshold_. The next time + // this method is called, it will try this file again. The method + // is called after a new Version creation (compaction, flush, etc.), + // after a compaction is picked, and after a snapshot newer than + // bottommost_files_mark_threshold_ is released. + } + } else { + // creation_time_ub <= 0, all files are too new to be marked for + // compaction. + } } else { bottommost_files_mark_threshold_ = std::min(bottommost_files_mark_threshold_, diff --git a/db/version_set.h b/db/version_set.h index e32d0ff11..f08f758c4 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -132,8 +132,9 @@ class VersionStorageInfo { CompactionStyle compaction_style, VersionStorageInfo* src_vstorage, bool _force_consistency_checks, - EpochNumberRequirement epoch_number_requirement = - EpochNumberRequirement::kMustPresent); + EpochNumberRequirement epoch_number_requirement, + SystemClock* clock, + uint32_t bottommost_file_compaction_delay); // No copying allowed VersionStorageInfo(const VersionStorageInfo&) = delete; void operator=(const VersionStorageInfo&) = delete; @@ -748,6 +749,10 @@ class VersionStorageInfo { // target sizes. uint64_t estimated_compaction_needed_bytes_; + // Used for computing bottommost files marked for compaction. + SystemClock* clock_; + uint32_t bottommost_file_compaction_delay_; + bool finalized_; // If set to true, we will run consistency checks even if RocksDB diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 86f9798c8..59b946151 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -130,7 +130,9 @@ class VersionStorageInfoTestBase : public testing::Test { mutable_cf_options_(options_), vstorage_(&icmp_, ucmp_, 6, kCompactionStyleLevel, /*src_vstorage=*/nullptr, - /*_force_consistency_checks=*/false) {} + /*_force_consistency_checks=*/false, + EpochNumberRequirement::kMustPresent, ioptions_.clock, + mutable_cf_options_.bottommost_file_compaction_delay) {} ~VersionStorageInfoTestBase() override { for (int i = 0; i < vstorage_.num_levels(); ++i) { diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index a954d747b..a90abe9a2 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -330,6 +330,8 @@ DECLARE_bool(enable_thread_tracking); DECLARE_uint32(memtable_max_range_deletions); +DECLARE_uint32(bottommost_file_compaction_delay); + // Tiered storage DECLARE_bool(enable_tiered_storage); // set last_level_temperature DECLARE_int64(preclude_last_level_data_seconds); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 7c499faf7..cd525cf9c 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1113,4 +1113,8 @@ DEFINE_uint32(memtable_max_range_deletions, 0, "If nonzero, RocksDB will try to flush the current memtable" "after the number of range deletions is >= this limit"); +DEFINE_uint32(bottommost_file_compaction_delay, 0, + "Delay kBottommostFiles compaction by this amount of seconds." + "See more in option comment."); + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 459878ee0..bd71cb2a6 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -3331,6 +3331,9 @@ void InitializeOptionsFromFlags( options.enable_thread_tracking = FLAGS_enable_thread_tracking; options.memtable_max_range_deletions = FLAGS_memtable_max_range_deletions; + + options.bottommost_file_compaction_delay = + FLAGS_bottommost_file_compaction_delay; } void InitializeOptionsGeneral( diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index ab394977d..ffe992681 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -1136,6 +1136,7 @@ struct AdvancedColumnFamilyOptions { // // Default: 0 (no protection) // Supported values: 0, 1, 2, 4, 8. + // Dynamically changeable through the SetOptions() API. uint32_t memtable_protection_bytes_per_key = 0; // UNDER CONSTRUCTION -- DO NOT USE @@ -1199,8 +1200,21 @@ struct AdvancedColumnFamilyOptions { // // Default: 0 (no protection) // Supported values: 0, 1, 2, 4, 8. + // Dynamically changeable through the SetOptions() API. uint8_t block_protection_bytes_per_key = 0; + // For leveled compaction, RocksDB may compact a file at the bottommost level + // if it can compact away data that were protected by some snapshot. + // The compaction reason in LOG for this kind of compactions is + // "BottommostFiles". Usually such compaction can happen as soon as a + // relevant snapshot is released. This option allows user to delay + // such compactions. A file is qualified for "BottommostFiles" compaction + // if it is at least "bottommost_file_compaction_delay" seconds old. + // + // Default: 0 (no delay) + // Dynamically changeable through the SetOptions() API. + uint32_t bottommost_file_compaction_delay = 0; + // Create ColumnFamilyOptions with default values for all fields AdvancedColumnFamilyOptions(); // Create ColumnFamilyOptions from Options diff --git a/options/cf_options.cc b/options/cf_options.cc index a425c18bd..d30171fbc 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -507,6 +507,10 @@ static std::unordered_map {offsetof(struct MutableCFOptions, memtable_protection_bytes_per_key), OptionType::kUInt32T, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"bottommost_file_compaction_delay", + {offsetof(struct MutableCFOptions, bottommost_file_compaction_delay), + OptionType::kUInt32T, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, {"block_protection_bytes_per_key", {offsetof(struct MutableCFOptions, block_protection_bytes_per_key), OptionType::kUInt8T, OptionVerificationType::kNormal, @@ -1117,6 +1121,8 @@ void MutableCFOptions::Dump(Logger* log) const { ROCKS_LOG_INFO(log, " experimental_mempurge_threshold: %f", experimental_mempurge_threshold); + ROCKS_LOG_INFO(log, " bottommost_file_compaction_delay: %" PRIu32, + bottommost_file_compaction_delay); // Universal Compaction Options ROCKS_LOG_INFO(log, "compaction_options_universal.size_ratio : %d", diff --git a/options/cf_options.h b/options/cf_options.h index 86de78d9d..075d1568e 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -176,7 +176,9 @@ struct MutableCFOptions { sample_for_compression( options.sample_for_compression), // TODO: is 0 fine here? compression_per_level(options.compression_per_level), - memtable_max_range_deletions(options.memtable_max_range_deletions) { + memtable_max_range_deletions(options.memtable_max_range_deletions), + bottommost_file_compaction_delay( + options.bottommost_file_compaction_delay) { RefreshDerivedOptions(options.num_levels, options.compaction_style); } @@ -321,6 +323,7 @@ struct MutableCFOptions { uint64_t sample_for_compression; std::vector compression_per_level; uint32_t memtable_max_range_deletions; + uint32_t bottommost_file_compaction_delay; // Derived options // Per-level target file size. diff --git a/options/options_helper.cc b/options/options_helper.cc index 83b4e970d..7eaad6ba6 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -210,6 +210,8 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions, moptions.memtable_protection_bytes_per_key; cf_opts->block_protection_bytes_per_key = moptions.block_protection_bytes_per_key; + cf_opts->bottommost_file_compaction_delay = + moptions.bottommost_file_compaction_delay; // Compaction related options cf_opts->disable_auto_compactions = moptions.disable_auto_compactions; diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 60a25dc42..fa001d2c0 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -560,7 +560,8 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "memtable_protection_bytes_per_key=2;" "persist_user_defined_timestamps=true;" "block_protection_bytes_per_key=1;" - "memtable_max_range_deletions=999999;", + "memtable_max_range_deletions=999999;" + "bottommost_file_compaction_delay=7200;", new_options)); ASSERT_NE(new_options->blob_cache.get(), nullptr); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index d7baab793..49fe96008 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -213,6 +213,8 @@ "min_write_buffer_number_to_merge": lambda: random.choice([1, 2]), "preserve_internal_time_seconds": lambda: random.choice([0, 60, 3600, 36000]), "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]), + # 0 (disable) is the default and more commonly used value. + "bottommost_file_compaction_delay": lambda: random.choice([0, 0, 0, 600, 3600, 86400]), } _TEST_DIR_ENV_VAR = "TEST_TMPDIR" diff --git a/unreleased_history/new_features/delay_bottommost_single_file_compaction.md b/unreleased_history/new_features/delay_bottommost_single_file_compaction.md new file mode 100644 index 000000000..ca703674b --- /dev/null +++ b/unreleased_history/new_features/delay_bottommost_single_file_compaction.md @@ -0,0 +1 @@ +Add a CF option `bottommost_file_compaction_delay` to allow specifying the delay of bottommost level single-file compactions. \ No newline at end of file From 966be1cc4e468c78e3d1acc3305cc6c8f018484e Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 17 Aug 2023 11:52:38 -0700 Subject: [PATCH 046/386] Clean up some FastRange calls (#11707) Summary: * JemallocNodumpAllocator was passing a size_t to FastRange32, which could cause compilation errors or warnings (seen with clang) * Fixed the order of arguments to match what would be used with modulo operator (%), for clarity. Fixes https://github.com/facebook/rocksdb/issues/11006 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11707 Test Plan: no functional change, existing tests Reviewed By: ajkr Differential Revision: D48435149 Pulled By: pdillinger fbshipit-source-id: e6e8b107ded4eceda37db20df59985c846a2546b --- memory/jemalloc_nodump_allocator.cc | 3 ++- util/bloom_impl.h | 6 +++--- util/dynamic_bloom.h | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/memory/jemalloc_nodump_allocator.cc b/memory/jemalloc_nodump_allocator.cc index d05248224..02e1fad16 100644 --- a/memory/jemalloc_nodump_allocator.cc +++ b/memory/jemalloc_nodump_allocator.cc @@ -124,7 +124,8 @@ uint32_t JemallocNodumpAllocator::GetArenaIndex() const { // to make Random thread-safe and prevent cacheline bouncing. Whether this is // worthwhile is still an open question. thread_local Random tl_random(next_seed.fetch_add(1)); - return arena_indexes_[FastRange32(tl_random.Next(), arena_indexes_.size())]; + return arena_indexes_[FastRange32( + tl_random.Next(), static_cast(arena_indexes_.size()))]; } Status JemallocNodumpAllocator::InitializeArenas() { diff --git a/util/bloom_impl.h b/util/bloom_impl.h index 53b70aa68..c9bbb125b 100644 --- a/util/bloom_impl.h +++ b/util/bloom_impl.h @@ -199,7 +199,7 @@ class FastLocalBloomImpl { static inline void AddHash(uint32_t h1, uint32_t h2, uint32_t len_bytes, int num_probes, char *data) { - uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6; + uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6; AddHashPrepared(h2, num_probes, data + bytes_to_cache_line); } @@ -216,7 +216,7 @@ class FastLocalBloomImpl { static inline void PrepareHash(uint32_t h1, uint32_t len_bytes, const char *data, uint32_t /*out*/ *byte_offset) { - uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6; + uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6; PREFETCH(data + bytes_to_cache_line, 0 /* rw */, 1 /* locality */); PREFETCH(data + bytes_to_cache_line + 63, 0 /* rw */, 1 /* locality */); *byte_offset = bytes_to_cache_line; @@ -224,7 +224,7 @@ class FastLocalBloomImpl { static inline bool HashMayMatch(uint32_t h1, uint32_t h2, uint32_t len_bytes, int num_probes, const char *data) { - uint32_t bytes_to_cache_line = FastRange32(len_bytes >> 6, h1) << 6; + uint32_t bytes_to_cache_line = FastRange32(h1, len_bytes >> 6) << 6; return HashMayMatchPrepared(h2, num_probes, data + bytes_to_cache_line); } diff --git a/util/dynamic_bloom.h b/util/dynamic_bloom.h index 40cd29404..0ff1053ca 100644 --- a/util/dynamic_bloom.h +++ b/util/dynamic_bloom.h @@ -126,7 +126,7 @@ inline void DynamicBloom::MayContain(int num_keys, Slice* keys, std::array byte_offsets; for (int i = 0; i < num_keys; ++i) { hashes[i] = BloomHash(keys[i]); - size_t a = FastRange32(kLen, hashes[i]); + size_t a = FastRange32(hashes[i], kLen); PREFETCH(data_ + a, 0, 3); byte_offsets[i] = a; } @@ -142,7 +142,7 @@ inline void DynamicBloom::MayContain(int num_keys, Slice* keys, #pragma warning(disable : 4189) #endif inline void DynamicBloom::Prefetch(uint32_t h32) { - size_t a = FastRange32(kLen, h32); + size_t a = FastRange32(h32, kLen); PREFETCH(data_ + a, 0, 3); } #if defined(_MSC_VER) @@ -171,7 +171,7 @@ inline void DynamicBloom::Prefetch(uint32_t h32) { // because of false positives.) inline bool DynamicBloom::MayContainHash(uint32_t h32) const { - size_t a = FastRange32(kLen, h32); + size_t a = FastRange32(h32, kLen); PREFETCH(data_ + a, 0, 3); return DoubleProbe(h32, a); } @@ -195,7 +195,7 @@ inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const { template inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) { - size_t a = FastRange32(kLen, h32); + size_t a = FastRange32(h32, kLen); PREFETCH(data_ + a, 0, 3); // Expand/remix with 64-bit golden ratio uint64_t h = 0x9e3779b97f4a7c13ULL * h32; From 1e77e35d269f843e4c5e2ed8edc87f07a907636b Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 17 Aug 2023 17:06:57 -0700 Subject: [PATCH 047/386] Add a per column family default temperature option for accounting (#11708) Summary: Add a column family option `default_temperature` that will be used for file reading accounting purpose, such as io statistics, for files that don't have an explicitly set temperature. This options is not a mutable one, changing its value would require a DB restart. This is to avoid the confusion that had the option being a mutable one, the users may expect it to take effect on all files immediately, while in reality, it would only become effective for SST files opened in the future. This `default_temperature` also just affect accounting during one DB session. It won't be recorded in manifest as the file's temperature and can be different across different DB sessions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11708 Test Plan: ``` make all check ``` Reviewed By: pdillinger Differential Revision: D48375763 Pulled By: jowlyzhang fbshipit-source-id: eb756696c14a694c6e2a93d2bb6f040563194981 --- db/version_builder.cc | 7 ++++++- include/rocksdb/advanced_options.h | 8 ++++++++ options/cf_options.cc | 5 +++++ options/cf_options.h | 2 ++ options/options.cc | 12 ++++++++++++ options/options_helper.cc | 1 + options/options_settable_test.cc | 1 + options/options_test.cc | 4 ++++ 8 files changed, 39 insertions(+), 1 deletion(-) diff --git a/db/version_builder.cc b/db/version_builder.cc index 210b0de86..b78a91643 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -1323,6 +1323,11 @@ class VersionBuilder::Rep { auto* file_meta = files_meta[file_idx].first; int level = files_meta[file_idx].second; + Temperature file_temperature = file_meta->temperature; + if (ioptions_->default_temperature != Temperature::kUnknown && + file_temperature == Temperature::kUnknown) { + file_temperature = ioptions_->default_temperature; + } TableCache::TypedHandle* handle = nullptr; statuses[file_idx] = table_cache_->FindTable( read_options, file_options_, @@ -1330,7 +1335,7 @@ class VersionBuilder::Rep { block_protection_bytes_per_key, prefix_extractor, false /*no_io */, internal_stats->GetFileReadHist(level), false, level, prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin, - file_meta->temperature); + file_temperature); if (handle != nullptr) { file_meta->table_reader_handle = handle; // Load table_reader diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index ffe992681..79003f501 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -960,6 +960,14 @@ struct AdvancedColumnFamilyOptions { Temperature bottommost_temperature = Temperature::kUnknown; Temperature last_level_temperature = Temperature::kUnknown; + // EXPERIMENTAL + // When this field is set, all SST files without an explicitly set temperature + // will be treated as if they have this temperature for file reading + // accounting purpose, such as io statistics, io perf context. + // + // Not dynamically changeable, change it requires db restart. + Temperature default_temperature = Temperature::kUnknown; + // EXPERIMENTAL // The feature is still in development and is incomplete. // If this option is set, when data insert time is within this time range, it diff --git a/options/cf_options.cc b/options/cf_options.cc index d30171fbc..fc2865f90 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -604,6 +604,10 @@ static std::unordered_map {offsetof(struct ImmutableCFOptions, force_consistency_checks), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"default_temperature", + {offsetof(struct ImmutableCFOptions, default_temperature), + OptionType::kTemperature, OptionVerificationType::kNormal, + OptionTypeFlags::kCompareNever}}, {"preclude_last_level_data_seconds", {offsetof(struct ImmutableCFOptions, preclude_last_level_data_seconds), OptionType::kUInt64T, OptionVerificationType::kNormal, @@ -950,6 +954,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options) num_levels(cf_options.num_levels), optimize_filters_for_hits(cf_options.optimize_filters_for_hits), force_consistency_checks(cf_options.force_consistency_checks), + default_temperature(cf_options.default_temperature), preclude_last_level_data_seconds( cf_options.preclude_last_level_data_seconds), preserve_internal_time_seconds(cf_options.preserve_internal_time_seconds), diff --git a/options/cf_options.h b/options/cf_options.h index 075d1568e..f42d6b562 100644 --- a/options/cf_options.h +++ b/options/cf_options.h @@ -72,6 +72,8 @@ struct ImmutableCFOptions { bool force_consistency_checks; + Temperature default_temperature; + uint64_t preclude_last_level_data_seconds; uint64_t preserve_internal_time_seconds; diff --git a/options/options.cc b/options/options.cc index 4e3ac4115..03289e5b6 100644 --- a/options/options.cc +++ b/options/options.cc @@ -94,6 +94,7 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options) ttl(options.ttl), periodic_compaction_seconds(options.periodic_compaction_seconds), sample_for_compression(options.sample_for_compression), + default_temperature(options.default_temperature), preclude_last_level_data_seconds( options.preclude_last_level_data_seconds), preserve_internal_time_seconds(options.preserve_internal_time_seconds), @@ -412,6 +413,17 @@ void ColumnFamilyOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.periodic_compaction_seconds: %" PRIu64, periodic_compaction_seconds); + const auto& it_temp = temperature_to_string.find(default_temperature); + std::string str_default_temperature; + if (it_temp == temperature_to_string.end()) { + assert(false); + str_default_temperature = "unknown_temperature"; + } else { + str_default_temperature = it_temp->second; + } + ROCKS_LOG_HEADER(log, + " Options.default_temperature: %s", + str_default_temperature.c_str()); ROCKS_LOG_HEADER(log, " Options.preclude_last_level_data_seconds: %" PRIu64, preclude_last_level_data_seconds); ROCKS_LOG_HEADER(log, " Options.preserve_internal_time_seconds: %" PRIu64, diff --git a/options/options_helper.cc b/options/options_helper.cc index 7eaad6ba6..65467f765 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -317,6 +317,7 @@ void UpdateColumnFamilyOptions(const ImmutableCFOptions& ioptions, ioptions.preserve_internal_time_seconds; cf_opts->persist_user_defined_timestamps = ioptions.persist_user_defined_timestamps; + cf_opts->default_temperature = ioptions.default_temperature; // TODO(yhchiang): find some way to handle the following derived options // * max_file_size diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index fa001d2c0..6541bf0a4 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -551,6 +551,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "prepopulate_blob_cache=kDisable;" "bottommost_temperature=kWarm;" "last_level_temperature=kWarm;" + "default_temperature=kHot;" "preclude_last_level_data_seconds=86400;" "preserve_internal_time_seconds=86400;" "compaction_options_fifo={max_table_files_size=3;allow_" diff --git a/options/options_test.cc b/options/options_test.cc index 067b00b77..855243c95 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -130,6 +130,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"blob_file_starting_level", "1"}, {"prepopulate_blob_cache", "kDisable"}, {"last_level_temperature", "kWarm"}, + {"default_temperature", "kHot"}, {"persist_user_defined_timestamps", "true"}, {"memtable_max_range_deletions", "0"}, }; @@ -284,6 +285,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.prepopulate_blob_cache, PrepopulateBlobCache::kDisable); ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm); + ASSERT_EQ(new_cf_opt.default_temperature, Temperature::kHot); ASSERT_EQ(new_cf_opt.persist_user_defined_timestamps, true); ASSERT_EQ(new_cf_opt.memtable_max_range_deletions, 0); @@ -2339,6 +2341,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { {"blob_file_starting_level", "1"}, {"prepopulate_blob_cache", "kDisable"}, {"last_level_temperature", "kWarm"}, + {"default_temperature", "kHot"}, {"persist_user_defined_timestamps", "true"}, {"memtable_max_range_deletions", "0"}, }; @@ -2491,6 +2494,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) { ASSERT_EQ(new_cf_opt.prepopulate_blob_cache, PrepopulateBlobCache::kDisable); ASSERT_EQ(new_cf_opt.last_level_temperature, Temperature::kWarm); ASSERT_EQ(new_cf_opt.bottommost_temperature, Temperature::kWarm); + ASSERT_EQ(new_cf_opt.default_temperature, Temperature::kHot); ASSERT_EQ(new_cf_opt.persist_user_defined_timestamps, true); ASSERT_EQ(new_cf_opt.memtable_max_range_deletions, 0); From a67ef998dc231f60d12eb43eedb61bf9728b51b7 Mon Sep 17 00:00:00 2001 From: Han Zhu Date: Fri, 18 Aug 2023 10:19:33 -0700 Subject: [PATCH 048/386] Explicitly instantiate MaybeReadBlockAndLoadToCache as well (#11714) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11714 Fixes T161017540. The staging build starts failing with an undefined symbol error: ``` ld.lld: error: undefined symbol: std::enable_if::type rocksdb::BlockBasedTable::MaybeReadBlockAndLoadToCache(rocksdb::FilePrefetchBuffer*, rocksdb::ReadOptions const&, rocksdb::BlockHandle const&, rocksdb::UncompressionDict const&, bool, rocksdb::CachableEntry*, rocksdb::GetContext*, rocksdb::BlockCacheLookupContext*, rocksdb::BlockContents*, bool) const ``` This is the `MaybeReadBlockAndLoadToCache` function where `TBlocklike = ParsedFullFilterBlock`. The trigger was an FDO profile update D48261413. `MaybeReadBlockAndLoadToCache` is used in the same translation unit `block_based_table_reader.cc`, and also in another file `partitioned_filter_block.cc`. The later was the file that couldn't find the symbol. It seems after the FDO profile update, `MaybeReadBlockAndLoadToCache` may've got inlined into its caller in `block_based_table_reader.cc`. And with no knowledge of other usages, the symbol got stripped. Explicitly instantiate the template similar to how `RetrieveBlock` was handled. Reviewed By: pdillinger, akankshamahajan15 Differential Revision: D48400574 fbshipit-source-id: d4a80999bfb6ce4afa80678444139fcd8ae84aa4 --- table/block_based/block_based_table_reader.cc | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index bebeece75..df6c0a912 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -88,21 +88,27 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) { // Explicitly instantiate templates for each "blocklike" type we use (and // before implicit specialization). // This makes it possible to keep the template definitions in the .cc file. -#define INSTANTIATE_RETRIEVE_BLOCK(T) \ +#define INSTANTIATE_BLOCKLIKE_TEMPLATES(T) \ template Status BlockBasedTable::RetrieveBlock( \ FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \ const BlockHandle& handle, const UncompressionDict& uncompression_dict, \ CachableEntry* out_parsed_block, GetContext* get_context, \ BlockCacheLookupContext* lookup_context, bool for_compaction, \ - bool use_cache, bool async_read) const; - -INSTANTIATE_RETRIEVE_BLOCK(ParsedFullFilterBlock); -INSTANTIATE_RETRIEVE_BLOCK(UncompressionDict); -INSTANTIATE_RETRIEVE_BLOCK(Block_kData); -INSTANTIATE_RETRIEVE_BLOCK(Block_kIndex); -INSTANTIATE_RETRIEVE_BLOCK(Block_kFilterPartitionIndex); -INSTANTIATE_RETRIEVE_BLOCK(Block_kRangeDeletion); -INSTANTIATE_RETRIEVE_BLOCK(Block_kMetaIndex); + bool use_cache, bool async_read) const; \ + template Status BlockBasedTable::MaybeReadBlockAndLoadToCache( \ + FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \ + const BlockHandle& handle, const UncompressionDict& uncompression_dict, \ + bool for_compaction, CachableEntry* block_entry, \ + GetContext* get_context, BlockCacheLookupContext* lookup_context, \ + BlockContents* contents, bool async_read) const; + +INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock); +INSTANTIATE_BLOCKLIKE_TEMPLATES(UncompressionDict); +INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kData); +INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kIndex); +INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kFilterPartitionIndex); +INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kRangeDeletion); +INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kMetaIndex); } // namespace ROCKSDB_NAMESPACE From a1743e85bea7835229dcb4abbbaa4cfc5f6b6187 Mon Sep 17 00:00:00 2001 From: anand76 Date: Fri, 18 Aug 2023 11:19:48 -0700 Subject: [PATCH 049/386] Implement a allow cache hits admission policy for the compressed secondary cache (#11713) Summary: This PR implements a new admission policy for the compressed secondary cache, which includes the functionality of the existing policy, and also admits items evicted from the primary block cache with the hit bit set. Effectively, the new policy works as follows - 1. When an item is demoted from the primary cache without a hit, a placeholder is inserted in the compressed cache. A second demotion will insert the full entry. 2. When an item is promoted from the compressed cache to the primary cache for the first time, a placeholder is inserted in the primary. The second promotion inserts the full entry, while erasing it form the compressed cache. 3. If an item is demoted from the primary cache with the hit bit set, it is immediately inserted in the compressed secondary cache. The ```TieredVolatileCacheOptions``` has been updated with a new option, ```adm_policy```, which allows the policy to be selected. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11713 Reviewed By: pdillinger Differential Revision: D48444512 Pulled By: anand1976 fbshipit-source-id: b4cbf8c169a88097dff08e36e8bc4b3088de1492 --- cache/clock_cache.cc | 24 ++- cache/clock_cache.h | 9 +- cache/compressed_secondary_cache.cc | 21 +-- cache/compressed_secondary_cache.h | 3 +- cache/compressed_secondary_cache_test.cc | 142 +++++++++++++++--- cache/lru_cache.cc | 6 +- cache/lru_cache_test.cc | 3 +- cache/secondary_cache.cc | 3 +- cache/secondary_cache_adapter.cc | 27 +++- cache/secondary_cache_adapter.h | 4 +- include/rocksdb/advanced_cache.h | 3 +- include/rocksdb/cache.h | 14 ++ include/rocksdb/secondary_cache.h | 3 +- options/customizable_test.cc | 3 +- ...lement_allow_cache_hit_admission_policy.md | 1 + utilities/fault_injection_secondary_cache.cc | 4 +- utilities/fault_injection_secondary_cache.h | 3 +- 17 files changed, 217 insertions(+), 56 deletions(-) create mode 100644 unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index e648da9fa..cdb97e296 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -118,6 +118,7 @@ inline bool ClockUpdate(ClockHandle& h) { // not aggressively uint64_t new_meta = (uint64_t{ClockHandle::kStateVisible} << ClockHandle::kStateShift) | + (meta & ClockHandle::kHitBitMask) | (new_count << ClockHandle::kReleaseCounterShift) | (new_count << ClockHandle::kAcquireCounterShift); h.meta.compare_exchange_strong(meta, new_meta, std::memory_order_relaxed); @@ -125,10 +126,11 @@ inline bool ClockUpdate(ClockHandle& h) { } // Otherwise, remove entry (either unreferenced invisible or // unreferenced and expired visible). - if (h.meta.compare_exchange_strong( - meta, - uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift, - std::memory_order_acquire)) { + if (h.meta.compare_exchange_strong(meta, + (uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift) | + (meta & ClockHandle::kHitBitMask), + std::memory_order_acquire)) { // Took ownership. return true; } else { @@ -528,10 +530,11 @@ void BaseClockTable::TrackAndReleaseEvictedEntry( if (eviction_callback_) { // For key reconstructed from hash UniqueId64x2 unhashed; - took_value_ownership = - eviction_callback_(ClockCacheShard::ReverseHash( - h->GetHash(), &unhashed, hash_seed_), - reinterpret_cast(h)); + took_value_ownership = eviction_callback_( + ClockCacheShard::ReverseHash( + h->GetHash(), &unhashed, hash_seed_), + reinterpret_cast(h), + h->meta.load(std::memory_order_relaxed) & ClockHandle::kHitBitMask); } if (!took_value_ownership) { h->FreeData(allocator_); @@ -825,6 +828,11 @@ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup( // Acquired a read reference if (h->hashed_key == hashed_key) { // Match + // Update the hit bit + if (eviction_callback_) { + h->meta.fetch_or(uint64_t{1} << ClockHandle::kHitBitShift, + std::memory_order_relaxed); + } return true; } else { // Mismatch. Pretend we never took the reference diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 851dab759..65f8662a8 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -305,7 +305,7 @@ struct ClockHandle : public ClockHandleBasicData { // state of the handle. The meta word looks like this: // low bits high bits // ----------------------------------------------------------------------- - // | acquire counter | release counter | state marker | + // | acquire counter | release counter | hit bit | state marker | // ----------------------------------------------------------------------- // For reading or updating counters in meta word. @@ -319,8 +319,13 @@ struct ClockHandle : public ClockHandleBasicData { static constexpr uint64_t kReleaseIncrement = uint64_t{1} << kReleaseCounterShift; + // For setting the hit bit + static constexpr uint8_t kHitBitShift = 2U * kCounterNumBits; + static constexpr uint64_t kHitBitMask = uint64_t{1} << kHitBitShift; + ; + // For reading or updating the state marker in meta word - static constexpr uint8_t kStateShift = 2U * kCounterNumBits; + static constexpr uint8_t kStateShift = kHitBitShift + 1; // Bits contribution to state marker. // Occupied means any state other than empty diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index f5b7af308..c14e58d4b 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -109,20 +109,23 @@ std::unique_ptr CompressedSecondaryCache::Lookup( Status CompressedSecondaryCache::Insert(const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper) { + const Cache::CacheItemHelper* helper, + bool force_insert) { if (value == nullptr) { return Status::InvalidArgument(); } - Cache::Handle* lru_handle = cache_->Lookup(key); auto internal_helper = GetHelper(cache_options_.enable_custom_split_merge); - if (lru_handle == nullptr) { - PERF_COUNTER_ADD(compressed_sec_cache_insert_dummy_count, 1); - // Insert a dummy handle if the handle is evicted for the first time. - return cache_->Insert(key, /*obj=*/nullptr, internal_helper, - /*charge=*/0); - } else { - cache_->Release(lru_handle, /*erase_if_last_ref=*/false); + if (!force_insert) { + Cache::Handle* lru_handle = cache_->Lookup(key); + if (lru_handle == nullptr) { + PERF_COUNTER_ADD(compressed_sec_cache_insert_dummy_count, 1); + // Insert a dummy handle if the handle is evicted for the first time. + return cache_->Insert(key, /*obj=*/nullptr, internal_helper, + /*charge=*/0); + } else { + cache_->Release(lru_handle, /*erase_if_last_ref=*/false); + } } size_t size = (*helper->size_cb)(value); diff --git a/cache/compressed_secondary_cache.h b/cache/compressed_secondary_cache.h index 7bee05955..777782fc3 100644 --- a/cache/compressed_secondary_cache.h +++ b/cache/compressed_secondary_cache.h @@ -77,7 +77,8 @@ class CompressedSecondaryCache : public SecondaryCache { const char* Name() const override { return "CompressedSecondaryCache"; } Status Insert(const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper) override; + const Cache::CacheItemHelper* helper, + bool force_insert) override; std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index d350234f3..54727a2fc 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -12,6 +12,7 @@ #include "cache/secondary_cache_adapter.h" #include "memory/jemalloc_nodump_allocator.h" +#include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "test_util/secondary_cache_test_util.h" #include "test_util/testharness.h" @@ -51,7 +52,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, std::string str1(rnd.RandomString(1000)); TestItem item1(str1.data(), str1.length()); // A dummy handle is inserted if the item is inserted for the first time. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); + ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper(), false)); ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1); ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, 0); ASSERT_EQ(get_perf_context()->compressed_sec_cache_compressed_bytes, 0); @@ -62,7 +63,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, ASSERT_EQ(handle1_1, nullptr); // Insert and Lookup the item k1 for the second time and advise erasing it. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); + ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper(), false)); ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1); std::unique_ptr handle1_2 = @@ -94,14 +95,14 @@ class CompressedSecondaryCacheTestBase : public testing::Test, // Insert and Lookup the item k2. std::string str2(rnd.RandomString(1000)); TestItem item2(str2.data(), str2.length()); - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); + ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper(), false)); ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 2); std::unique_ptr handle2_1 = sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, kept_in_sec_cache); ASSERT_EQ(handle2_1, nullptr); - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); + ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper(), false)); ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 2); if (sec_cache_is_compressed) { ASSERT_EQ(get_perf_context()->compressed_sec_cache_uncompressed_bytes, @@ -183,15 +184,15 @@ class CompressedSecondaryCacheTestBase : public testing::Test, std::string str1(rnd.RandomString(1000)); TestItem item1(str1.data(), str1.length()); // Insert a dummy handle. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); + ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper(), false)); // Insert k1. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); + ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper(), false)); // Insert and Lookup the second item. std::string str2(rnd.RandomString(200)); TestItem item2(str2.data(), str2.length()); // Insert a dummy handle, k1 is not evicted. - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); + ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper(), false)); bool kept_in_sec_cache{false}; std::unique_ptr handle1 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, @@ -199,7 +200,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, ASSERT_EQ(handle1, nullptr); // Insert k2 and k1 is evicted. - ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper())); + ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper(), false)); std::unique_ptr handle2 = sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, kept_in_sec_cache); @@ -210,7 +211,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0); // Insert k1 again and a dummy handle is inserted. - ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper())); + ASSERT_OK(sec_cache->Insert(key1, &item1, GetHelper(), false)); std::unique_ptr handle1_1 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, @@ -228,8 +229,8 @@ class CompressedSecondaryCacheTestBase : public testing::Test, std::string str3 = rnd.RandomString(10); TestItem item3(str3.data(), str3.length()); // The Status is OK because a dummy handle is inserted. - ASSERT_OK(sec_cache->Insert(key3, &item3, GetHelperFail())); - ASSERT_NOK(sec_cache->Insert(key3, &item3, GetHelperFail())); + ASSERT_OK(sec_cache->Insert(key3, &item3, GetHelperFail(), false)); + ASSERT_NOK(sec_cache->Insert(key3, &item3, GetHelperFail(), false)); sec_cache.reset(); } @@ -904,10 +905,10 @@ TEST_P(CompressedSecondaryCacheTestWithCompressionParam, EntryRoles) { Slice ith_key = Slice(junk.data(), 16); get_perf_context()->Reset(); - ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role))); + ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role), false)); ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 1U); - ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role))); + ASSERT_OK(sec_cache->Insert(ith_key, &item, GetHelper(role), false)); ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1U); bool kept_in_sec_cache{true}; @@ -976,24 +977,54 @@ TEST_P(CompressedSecondaryCacheTest, SplictValueAndMergeChunksTest) { SplictValueAndMergeChunksTest(); } -class CompressedSecCacheTestWithTiered : public ::testing::Test { +using secondary_cache_test_util::WithCacheType; + +class CompressedSecCacheTestWithTiered + : public testing::Test, + public WithCacheType, + public testing::WithParamInterface< + std::tuple> { public: + using secondary_cache_test_util::WithCacheType::TestItem; CompressedSecCacheTestWithTiered() { LRUCacheOptions lru_opts; + HyperClockCacheOptions hcc_opts( + /*_capacity=*/70 << 20, + /*_estimated_entry_charge=*/256 << 10, + /*_num_shard_bits=*/0); TieredVolatileCacheOptions opts; lru_opts.capacity = 70 << 20; - opts.cache_opts = &lru_opts; - opts.cache_type = PrimaryCacheType::kCacheTypeLRU; + lru_opts.num_shard_bits = 0; + lru_opts.high_pri_pool_ratio = 0; + opts.cache_type = std::get<0>(GetParam()); + if (opts.cache_type == PrimaryCacheType::kCacheTypeLRU) { + opts.cache_opts = &lru_opts; + } else { + opts.cache_opts = &hcc_opts; + } + opts.adm_policy = std::get<1>(GetParam()); + ; opts.comp_cache_opts.capacity = 30 << 20; + opts.comp_cache_opts.num_shard_bits = 0; cache_ = NewTieredVolatileCache(opts); cache_res_mgr_ = std::make_shared>( cache_); } + const std::string& Type() const override { + if (std::get<0>(GetParam()) == PrimaryCacheType::kCacheTypeLRU) { + return lru_str; + } else { + return hcc_str; + } + } + protected: CacheReservationManager* cache_res_mgr() { return cache_res_mgr_.get(); } + Cache* GetTieredCache() { return cache_.get(); } + Cache* GetCache() { return static_cast_with_check( cache_.get()) @@ -1013,13 +1044,19 @@ class CompressedSecCacheTestWithTiered : public ::testing::Test { private: std::shared_ptr cache_; std::shared_ptr cache_res_mgr_; + static std::string lru_str; + static std::string hcc_str; }; +std::string CompressedSecCacheTestWithTiered::lru_str(WithCacheType::kLRU); +std::string CompressedSecCacheTestWithTiered::hcc_str( + WithCacheType::kFixedHyperClock); + bool CacheUsageWithinBounds(size_t val1, size_t val2, size_t error) { return ((val1 < (val2 + error)) && (val1 > (val2 - error))); } -TEST_F(CompressedSecCacheTestWithTiered, CacheReservationManager) { +TEST_P(CompressedSecCacheTestWithTiered, CacheReservationManager) { CompressedSecondaryCache* sec_cache = reinterpret_cast(GetSecondaryCache()); @@ -1041,7 +1078,7 @@ TEST_F(CompressedSecCacheTestWithTiered, CacheReservationManager) { EXPECT_EQ(sec_cache->TEST_GetUsage(), 0); } -TEST_F(CompressedSecCacheTestWithTiered, +TEST_P(CompressedSecCacheTestWithTiered, CacheReservationManagerMultipleUpdate) { CompressedSecondaryCache* sec_cache = reinterpret_cast(GetSecondaryCache()); @@ -1067,6 +1104,75 @@ TEST_F(CompressedSecCacheTestWithTiered, EXPECT_EQ(sec_cache->TEST_GetUsage(), 0); } +TEST_P(CompressedSecCacheTestWithTiered, AdmissionPolicy) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_BYPASS("This test requires LZ4 support\n"); + return; + } + + Cache* tiered_cache = GetTieredCache(); + Cache* cache = GetCache(); + std::vector keys; + std::vector vals; + // Make the item size slightly less than 10MB to ensure we can fit the + // expected number of items in the cache + int item_size = (10 << 20) - (1 << 18); + int i; + Random rnd(301); + for (i = 0; i < 14; ++i) { + keys.emplace_back(CacheKey::CreateUniqueForCacheLifetime(cache)); + vals.emplace_back(rnd.RandomString(item_size)); + } + + for (i = 0; i < 7; ++i) { + TestItem* item = new TestItem(vals[i].data(), vals[i].length()); + ASSERT_OK(tiered_cache->Insert(keys[i].AsSlice(), item, GetHelper(), + vals[i].length())); + } + + Cache::Handle* handle1; + handle1 = tiered_cache->Lookup(keys[0].AsSlice(), GetHelper(), + /*context*/ this, Cache::Priority::LOW); + ASSERT_NE(handle1, nullptr); + Cache::Handle* handle2; + handle2 = tiered_cache->Lookup(keys[1].AsSlice(), GetHelper(), + /*context*/ this, Cache::Priority::LOW); + ASSERT_NE(handle2, nullptr); + tiered_cache->Release(handle1); + tiered_cache->Release(handle2); + + // Flush all previous entries out of the primary cache + for (i = 7; i < 14; ++i) { + TestItem* item = new TestItem(vals[i].data(), vals[i].length()); + ASSERT_OK(tiered_cache->Insert(keys[i].AsSlice(), item, GetHelper(), + vals[i].length())); + } + // keys 0 and 1 should be found as they had the hit bit set + handle1 = tiered_cache->Lookup(keys[0].AsSlice(), GetHelper(), + /*context*/ this, Cache::Priority::LOW); + ASSERT_NE(handle1, nullptr); + handle2 = tiered_cache->Lookup(keys[1].AsSlice(), GetHelper(), + /*context*/ this, Cache::Priority::LOW); + ASSERT_NE(handle2, nullptr); + tiered_cache->Release(handle1); + tiered_cache->Release(handle2); + + handle1 = tiered_cache->Lookup(keys[2].AsSlice(), GetHelper(), + /*context*/ this, Cache::Priority::LOW); + ASSERT_EQ(handle1, nullptr); + handle1 = tiered_cache->Lookup(keys[3].AsSlice(), GetHelper(), + /*context*/ this, Cache::Priority::LOW); + ASSERT_EQ(handle1, nullptr); +} + +INSTANTIATE_TEST_CASE_P( + CompressedSecCacheTests, CompressedSecCacheTestWithTiered, + ::testing::Values( + std::make_tuple(PrimaryCacheType::kCacheTypeLRU, + TieredAdmissionPolicy::kAdmPolicyAllowCacheHits), + std::make_tuple(PrimaryCacheType::kCacheTypeHCC, + TieredAdmissionPolicy::kAdmPolicyAllowCacheHits))); + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index b72c82403..9d1695224 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -340,7 +340,8 @@ void LRUCacheShard::NotifyEvicted( for (LRUHandle* entry : evicted_handles) { if (eviction_callback_ && eviction_callback_(entry->key(), - reinterpret_cast(entry))) { + reinterpret_cast(entry), + entry->HasHit())) { // Callback took ownership of obj; just free handle free(entry); } else { @@ -505,7 +506,8 @@ bool LRUCacheShard::Release(LRUHandle* e, bool /*useful*/, // Only call eviction callback if we're sure no one requested erasure // FIXME: disabled because of test churn if (false && was_in_cache && !erase_if_last_ref && eviction_callback_ && - eviction_callback_(e->key(), reinterpret_cast(e))) { + eviction_callback_(e->key(), reinterpret_cast(e), + e->HasHit())) { // Callback took ownership of obj; just free handle free(e); } else { diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 7519a7b91..d00e63055 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -996,7 +996,8 @@ class TestSecondaryCache : public SecondaryCache { void ResetInjectFailure() { inject_failure_ = false; } Status Insert(const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper) override { + const Cache::CacheItemHelper* helper, + bool /*force_insert*/) override { if (inject_failure_) { return Status::Corruption("Insertion Data Corrupted"); } diff --git a/cache/secondary_cache.cc b/cache/secondary_cache.cc index 8eba8b098..5fecc0a6e 100644 --- a/cache/secondary_cache.cc +++ b/cache/secondary_cache.cc @@ -38,7 +38,8 @@ Status SecondaryCache::InsertSaved(const Slice& key, const Slice& saved) { CacheEntryRole::kMisc, &NoopDelete, &SliceSize, &SliceSaveTo, &FailCreate, &helper_no_secondary}; // NOTE: depends on Insert() being synchronous, not keeping pointer `&saved` - return Insert(key, const_cast(&saved), &helper); + return Insert(key, const_cast(&saved), &helper, + /*force_insert=*/true); } } // namespace ROCKSDB_NAMESPACE diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index 06441a17a..7f5968bb7 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -73,13 +73,16 @@ Cache::ObjectPtr const kDummyObj = const_cast(&kDummy); // CacheWithSecondaryAdapter::CacheWithSecondaryAdapter( std::shared_ptr target, - std::shared_ptr secondary_cache, bool distribute_cache_res) + std::shared_ptr secondary_cache, + TieredAdmissionPolicy adm_policy, bool distribute_cache_res) : CacheWrapper(std::move(target)), secondary_cache_(std::move(secondary_cache)), + adm_policy_(adm_policy), distribute_cache_res_(distribute_cache_res) { - target_->SetEvictionCallback([this](const Slice& key, Handle* handle) { - return EvictionHandler(key, handle); - }); + target_->SetEvictionCallback( + [this](const Slice& key, Handle* handle, bool was_hit) { + return EvictionHandler(key, handle, was_hit); + }); if (distribute_cache_res_) { size_t sec_capacity = 0; pri_cache_res_ = std::make_shared( @@ -114,14 +117,18 @@ CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() { } bool CacheWithSecondaryAdapter::EvictionHandler(const Slice& key, - Handle* handle) { + Handle* handle, bool was_hit) { auto helper = GetCacheItemHelper(handle); if (helper->IsSecondaryCacheCompatible()) { auto obj = target_->Value(handle); // Ignore dummy entry if (obj != kDummyObj) { + bool hit = false; + if (adm_policy_ == TieredAdmissionPolicy::kAdmPolicyAllowCacheHits) { + hit = was_hit; + } // Spill into secondary cache. - secondary_cache_->Insert(key, obj, helper).PermitUncheckedError(); + secondary_cache_->Insert(key, obj, helper, hit).PermitUncheckedError(); } } // Never takes ownership of obj @@ -410,6 +417,10 @@ std::shared_ptr NewTieredVolatileCache( return nullptr; } + if (opts.adm_policy >= TieredAdmissionPolicy::kAdmPolicyMax) { + return nullptr; + } + std::shared_ptr cache; if (opts.cache_type == PrimaryCacheType::kCacheTypeLRU) { LRUCacheOptions cache_opts = @@ -421,6 +432,7 @@ std::shared_ptr NewTieredVolatileCache( HyperClockCacheOptions cache_opts = *(static_cast_with_check( opts.cache_opts)); + cache_opts.capacity += opts.comp_cache_opts.capacity; cache = cache_opts.MakeSharedCache(); } else { return nullptr; @@ -428,6 +440,7 @@ std::shared_ptr NewTieredVolatileCache( std::shared_ptr sec_cache; sec_cache = NewCompressedSecondaryCache(opts.comp_cache_opts); - return std::make_shared(cache, sec_cache, true); + return std::make_shared( + cache, sec_cache, opts.adm_policy, /*distribute_cache_res=*/true); } } // namespace ROCKSDB_NAMESPACE diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index 4ac93898e..4ef048de5 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -15,6 +15,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper { explicit CacheWithSecondaryAdapter( std::shared_ptr target, std::shared_ptr secondary_cache, + TieredAdmissionPolicy adm_policy = TieredAdmissionPolicy::kAdmPolicyAuto, bool distribute_cache_res = false); ~CacheWithSecondaryAdapter() override; @@ -47,7 +48,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper { SecondaryCache* TEST_GetSecondaryCache() { return secondary_cache_.get(); } private: - bool EvictionHandler(const Slice& key, Handle* handle); + bool EvictionHandler(const Slice& key, Handle* handle, bool was_hit); void StartAsyncLookupOnMySecondary(AsyncLookupHandle& async_handle); @@ -61,6 +62,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper { void CleanupCacheObject(ObjectPtr obj, const CacheItemHelper* helper); std::shared_ptr secondary_cache_; + TieredAdmissionPolicy adm_policy_; // Whether to proportionally distribute cache memory reservations, i.e // placeholder entries with null value and a non-zero charge, across // the primary and secondary caches. diff --git a/include/rocksdb/advanced_cache.h b/include/rocksdb/advanced_cache.h index 997a41499..ec4a5b296 100644 --- a/include/rocksdb/advanced_cache.h +++ b/include/rocksdb/advanced_cache.h @@ -514,7 +514,8 @@ class Cache { // returns `true` if it has taken ownership of the Value (object), or // `false` if the cache should destroy it as usual. Regardless, Ref() and // Release() cannot be called on this Handle that is poised for eviction. - using EvictionCallback = std::function; + using EvictionCallback = + std::function; // Sets an eviction callback for this Cache. Not thread safe and only // supports being set once, so should only be used during initialization // or destruction, guaranteed before or after any thread-shared operations. diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 304e4ebd7..2891c9d47 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -450,6 +450,19 @@ enum PrimaryCacheType { kCacheTypeMax, }; +enum TieredAdmissionPolicy { + // Automatically select the admission policy + kAdmPolicyAuto, + // During promotion/demotion, first time insert a placeholder entry, second + // time insert the full entry if the placeholder is found, i.e insert on + // second hit + kAdmPolicyPlaceholder, + // Same as kAdmPolicyPlaceholder, but also if an entry in the primary cache + // was a hit, then force insert it into the compressed secondary cache + kAdmPolicyAllowCacheHits, + kAdmPolicyMax, +}; + // A 2-tier cache with a primary block cache, and a compressed secondary // cache. The returned cache instance will internally allocate a primary // uncompressed cache of the specified type, and a compressed secondary @@ -459,6 +472,7 @@ enum PrimaryCacheType { struct TieredVolatileCacheOptions { ShardedCacheOptions* cache_opts; PrimaryCacheType cache_type; + TieredAdmissionPolicy adm_policy; CompressedSecondaryCacheOptions comp_cache_opts; }; diff --git a/include/rocksdb/secondary_cache.h b/include/rocksdb/secondary_cache.h index 7c8828168..292c0ffe0 100644 --- a/include/rocksdb/secondary_cache.h +++ b/include/rocksdb/secondary_cache.h @@ -79,7 +79,8 @@ class SecondaryCache : public Customizable { // and writes it to this cache tier. OK may be returned even if the insertion // is not made. virtual Status Insert(const Slice& key, Cache::ObjectPtr obj, - const Cache::CacheItemHelper* helper) = 0; + const Cache::CacheItemHelper* helper, + bool force_insert) = 0; // Insert a value from its saved/persistable data (typically uncompressed // block), as if generated by SaveToCallback/SizeCallback. This can be used diff --git a/options/customizable_test.cc b/options/customizable_test.cc index d88777793..125a5aabe 100644 --- a/options/customizable_test.cc +++ b/options/customizable_test.cc @@ -1230,7 +1230,8 @@ class TestSecondaryCache : public SecondaryCache { static const char* kClassName() { return "Test"; } const char* Name() const override { return kClassName(); } Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*value*/, - const Cache::CacheItemHelper* /*helper*/) override { + const Cache::CacheItemHelper* /*helper*/, + bool /*force_insert*/) override { return Status::NotSupported(); } std::unique_ptr Lookup( diff --git a/unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md b/unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md new file mode 100644 index 000000000..97ed644fa --- /dev/null +++ b/unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md @@ -0,0 +1 @@ +Implement a new admission policy for the compressed secondary cache that admits blocks evicted from the primary cache with the hit bit set. This policy can be specified in TieredVolatileCacheOptions by setting the newly added adm_policy option. diff --git a/utilities/fault_injection_secondary_cache.cc b/utilities/fault_injection_secondary_cache.cc index d7a2a1bd7..c2ea12535 100644 --- a/utilities/fault_injection_secondary_cache.cc +++ b/utilities/fault_injection_secondary_cache.cc @@ -78,13 +78,13 @@ FaultInjectionSecondaryCache::GetErrorContext() { Status FaultInjectionSecondaryCache::Insert( const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper) { + const Cache::CacheItemHelper* helper, bool force_insert) { ErrorContext* ctx = GetErrorContext(); if (ctx->rand.OneIn(prob_)) { return Status::IOError(); } - return base_->Insert(key, value, helper); + return base_->Insert(key, value, helper, force_insert); } std::unique_ptr diff --git a/utilities/fault_injection_secondary_cache.h b/utilities/fault_injection_secondary_cache.h index ed89f655a..60488dcfb 100644 --- a/utilities/fault_injection_secondary_cache.h +++ b/utilities/fault_injection_secondary_cache.h @@ -32,7 +32,8 @@ class FaultInjectionSecondaryCache : public SecondaryCache { const char* Name() const override { return "FaultInjectionSecondaryCache"; } Status Insert(const Slice& key, Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper) override; + const Cache::CacheItemHelper* helper, + bool force_insert) override; std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, From 0fa0c97d3e9ac5dfc2e7ae94834b0850cdef5df7 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 18 Aug 2023 11:21:45 -0700 Subject: [PATCH 050/386] Timeout in microsecond option in WaitForCompactOptions (#11711) Summary: While it's rare, we may run into a scenario where `WaitForCompact()` waits for background jobs indefinitely. For example, not enough space error will add the job back to the queue while WaitForCompact() waits for _all jobs_ including the jobs that are in the queue to be completed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11711 Test Plan: `DBCompactionWaitForCompactTest::WaitForCompactToTimeout` added `timeout` option added to the variables for all of the existing DBCompactionWaitForCompactTests Reviewed By: pdillinger, jowlyzhang Differential Revision: D48416390 Pulled By: jaykorean fbshipit-source-id: 7b6a12f705ab6c6dfaf8ad736a484ca654a86106 --- db/db_compaction_test.cc | 62 +++++++++++++++++-- db/db_impl/db_impl_compaction_flush.cc | 10 ++- include/rocksdb/db.h | 3 +- include/rocksdb/options.h | 9 ++- .../timeout_for_wait_for_compact_api.md | 1 + 5 files changed, 77 insertions(+), 8 deletions(-) create mode 100644 unreleased_history/new_features/timeout_for_wait_for_compact_api.md diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 0d23a76e5..24445ecdb 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -153,19 +153,23 @@ class DBCompactionDirectIOTest : public DBCompactionTest, DBCompactionDirectIOTest() : DBCompactionTest() {} }; +// Params: See WaitForCompactOptions for details class DBCompactionWaitForCompactTest : public DBTestBase, - public testing::WithParamInterface> { + public testing::WithParamInterface< + std::tuple> { public: DBCompactionWaitForCompactTest() : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { abort_on_pause_ = std::get<0>(GetParam()); flush_ = std::get<1>(GetParam()); close_db_ = std::get<2>(GetParam()); + timeout_ = std::get<3>(GetParam()); } bool abort_on_pause_; bool flush_; bool close_db_; + std::chrono::microseconds timeout_; Options options_; WaitForCompactOptions wait_for_compact_options_; @@ -182,6 +186,7 @@ class DBCompactionWaitForCompactTest wait_for_compact_options_.abort_on_pause = abort_on_pause_; wait_for_compact_options_.flush = flush_; wait_for_compact_options_.close_db = close_db_; + wait_for_compact_options_.timeout = timeout_; DestroyAndReopen(options_); @@ -3334,10 +3339,19 @@ TEST_F(DBCompactionTest, SuggestCompactRangeNoTwoLevel0Compactions) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } -INSTANTIATE_TEST_CASE_P(DBCompactionWaitForCompactTest, - DBCompactionWaitForCompactTest, - ::testing::Combine(testing::Bool(), testing::Bool(), - testing::Bool())); +INSTANTIATE_TEST_CASE_P( + DBCompactionWaitForCompactTest, DBCompactionWaitForCompactTest, + ::testing::Combine( + testing::Bool() /* abort_on_pause */, testing::Bool() /* flush */, + testing::Bool() /* close_db */, + testing::Values( + std::chrono::microseconds::zero(), + std::chrono::microseconds{ + 60 * 60 * + 1000000ULL} /* timeout */))); // 1 hour (long enough to + // make sure that tests + // don't fail unexpectedly + // when running slow) TEST_P(DBCompactionWaitForCompactTest, WaitForCompactWaitsOnCompactionToFinish) { @@ -3583,6 +3597,44 @@ TEST_P(DBCompactionWaitForCompactTest, ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } +TEST_P(DBCompactionWaitForCompactTest, WaitForCompactToTimeout) { + // When timeout is set, this test makes CompactionJob hangs forever + // using sync point. This test also sets the timeout to be 1 ms for + // WaitForCompact to time out early. WaitForCompact() is expected to return + // Status::TimedOut. + // When timeout is not set, we expect WaitForCompact() to wait indefinitely. + // We don't want the test to hang forever. When timeout = 0, this test is not + // much different from WaitForCompactWaitsOnCompactionToFinish + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBCompactionTest::WaitForCompactToTimeout", + "CompactionJob::Run():Start"}}); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // Now trigger L0 compaction by adding a file + Random rnd(123); + GenerateNewRandomFile(&rnd, /* nowait */ true); + ASSERT_OK(Flush()); + + if (wait_for_compact_options_.timeout.count()) { + // Make timeout shorter to finish test early + wait_for_compact_options_.timeout = std::chrono::microseconds{1000}; + } else { + // if timeout is not set, WaitForCompact() will wait forever. We don't + // want test to hang forever. Just let compaction go through + TEST_SYNC_POINT("DBCompactionTest::WaitForCompactToTimeout"); + } + Status s = dbfull()->WaitForCompact(wait_for_compact_options_); + if (wait_for_compact_options_.timeout.count()) { + ASSERT_NOK(s); + ASSERT_TRUE(s.IsTimedOut()); + } else { + ASSERT_OK(s); + } + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); +} + static std::string ShortKey(int i) { assert(i < 10000); char buf[100]; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 157db28a1..732b0667d 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -4114,6 +4114,8 @@ Status DBImpl::WaitForCompact( } } TEST_SYNC_POINT("DBImpl::WaitForCompact:StartWaiting"); + const auto deadline = immutable_db_options_.clock->NowMicros() + + wait_for_compact_options.timeout.count(); for (;;) { if (shutting_down_.load(std::memory_order_acquire)) { return Status::ShutdownInProgress(); @@ -4125,7 +4127,13 @@ Status DBImpl::WaitForCompact( bg_flush_scheduled_ || unscheduled_compactions_ || unscheduled_flushes_ || error_handler_.IsRecoveryInProgress()) && (error_handler_.GetBGError().ok())) { - bg_cv_.Wait(); + if (wait_for_compact_options.timeout.count()) { + if (bg_cv_.TimedWait(deadline)) { + return Status::TimedOut(); + } + } else { + bg_cv_.Wait(); + } } else if (wait_for_compact_options.close_db) { reject_new_background_jobs_ = true; mutex_.Unlock(); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 2c8644409..4e764cb6a 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1477,7 +1477,8 @@ class DB { // NOTE: This may also never return if there's sufficient ongoing writes that // keeps flush and compaction going without stopping. The user would have to // cease all the writes to DB to make this eventually return in a stable - // state. + // state. The user may also use timeout option in WaitForCompactOptions to + // make this stop waiting and return when timeout expires. virtual Status WaitForCompact( const WaitForCompactOptions& /* wait_for_compact_options */) = 0; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index be42806f1..ee6a1096e 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -2120,7 +2120,8 @@ struct WaitForCompactOptions { // called) If true, Status::Aborted will be returned immediately. If false, // ContinueBackgroundWork() must be called to resume the background jobs. // Otherwise, jobs that were queued, but not scheduled yet may never finish - // and WaitForCompact() may wait indefinitely. + // and WaitForCompact() may wait indefinitely (if timeout is set, it will + // expire and return Status::TimedOut). bool abort_on_pause = false; // A boolean to flush all column families before starting to wait. @@ -2132,6 +2133,12 @@ struct WaitForCompactOptions { // returned Aborted status due to unreleased snapshots in the system. See // comments in DB::Close() for details. bool close_db = false; + + // Timeout in microseconds for waiting for compaction to complete. + // Status::TimedOut will be returned if timeout expires. + // when timeout == 0, WaitForCompact() will wait as long as there's background + // work to finish. + std::chrono::microseconds timeout = std::chrono::microseconds::zero(); }; } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/new_features/timeout_for_wait_for_compact_api.md b/unreleased_history/new_features/timeout_for_wait_for_compact_api.md new file mode 100644 index 000000000..fccc34f56 --- /dev/null +++ b/unreleased_history/new_features/timeout_for_wait_for_compact_api.md @@ -0,0 +1 @@ +Add `timeout` in microsecond option to `WaitForCompactOptions` to allow timely termination of prolonged waiting in scenarios like recurring recoverable errors, such as out-of-space situations and continuous write streams that sustain ongoing flush and compactions From c2aad555c34726ee8b91a2fb10a6e0ef6ac6786e Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 18 Aug 2023 15:01:59 -0700 Subject: [PATCH 051/386] Add `CompressionOptions::checksum` for enabling ZSTD checksum (#11666) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Optionally enable zstd checksum flag (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) to detect corruption during decompression. Main changes are in compression.h: * User can set CompressionOptions::checksum to true to enable this feature. * We enable this feature in ZSTD by setting the checksum flag in ZSTD compression context: `ZSTD_CCtx`. * Uses `ZSTD_compress2()` to do compression since it supports frame parameter like the checksum flag. Compression level is also set in compression context as a flag. * Error handling during decompression to propagate error message from ZSTD. * Updated microbench to test read performance impact. About compatibility, the current compression decoders should continue to work with the data created by the new compression API `ZSTD_compress2()`: https://github.com/facebook/zstd/issues/3711. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11666 Test Plan: * Existing unit tests for zstd compression * Add unit test `DBTest2.ZSTDChecksum` to test the corruption case * Manually tested that compression levels, parallel compression, dictionary compression, index compression all work with the new ZSTD_compress2() API. * Manually tested with `sst_dump --command=recompress` that different compression levels and dictionary compression settings all work. * Manually tested compiling with older versions of ZSTD: v1.3.8, v1.1.0, v0.6.2. * Perf impact: from public benchmark data: http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html for checksum and https://github.com/facebook/zstd#benchmarks, if decompression is 1700MB/s and checksum computation is 70000MB/s, checksum computation is an additional ~2.4% time for decompression. Compression is slower and checksumming should be less noticeable. * Microbench: ``` TEST_TMPDIR=/dev/shm ./branch_db_basic_bench --benchmark_filter=DBGet/comp_style:0/max_data:1048576/per_key_size:256/enable_statistics:0/negative_query:0/enable_filter:0/mmap:0/compression_type:7/compression_checksum:1/no_blockcache:1/iterations:10000/threads:1 --benchmark_repetitions=100 Min out of 100 runs: Main: 10390 10436 10456 10484 10499 10535 10544 10545 10565 10568 After this PR, checksum=false 10285 10397 10503 10508 10515 10557 10562 10635 10640 10660 After this PR, checksum=true 10827 10876 10925 10949 10971 11052 11061 11063 11100 11109 ``` * db_bench: ``` Write perf TEST_TMPDIR=/dev/shm/ ./db_bench_ichecksum --benchmarks=fillseq[-X10] --compression_type=zstd --num=10000000 --compression_checksum=.. [FillSeq checksum=0] fillseq [AVG 10 runs] : 281635 (± 31711) ops/sec; 31.2 (± 3.5) MB/sec fillseq [MEDIAN 10 runs] : 294027 ops/sec; 32.5 MB/sec [FillSeq checksum=1] fillseq [AVG 10 runs] : 286961 (± 34700) ops/sec; 31.7 (± 3.8) MB/sec fillseq [MEDIAN 10 runs] : 283278 ops/sec; 31.3 MB/sec Read perf TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=readrandom[-X20] --num=100000000 --reads=1000000 --use_existing_db=true --readonly=1 [Readrandom checksum=1] readrandom [AVG 20 runs] : 360928 (± 3579) ops/sec; 4.0 (± 0.0) MB/sec readrandom [MEDIAN 20 runs] : 362468 ops/sec; 4.0 MB/sec [Readrandom checksum=0] readrandom [AVG 20 runs] : 380365 (± 2384) ops/sec; 4.2 (± 0.0) MB/sec readrandom [MEDIAN 20 runs] : 379800 ops/sec; 4.2 MB/sec Compression TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=compress[-X20] --compression_type=zstd --num=100000000 --compression_checksum=1 checksum=1 compress [AVG 20 runs] : 54074 (± 634) ops/sec; 211.2 (± 2.5) MB/sec compress [MEDIAN 20 runs] : 54396 ops/sec; 212.5 MB/sec checksum=0 compress [AVG 20 runs] : 54598 (± 393) ops/sec; 213.3 (± 1.5) MB/sec compress [MEDIAN 20 runs] : 54592 ops/sec; 213.3 MB/sec Decompression: TEST_TMPDIR=/dev/shm ./db_bench_ichecksum --benchmarks=uncompress[-X20] --compression_type=zstd --compression_checksum=1 checksum = 0 uncompress [AVG 20 runs] : 167499 (± 962) ops/sec; 654.3 (± 3.8) MB/sec uncompress [MEDIAN 20 runs] : 167210 ops/sec; 653.2 MB/sec checksum = 1 uncompress [AVG 20 runs] : 167980 (± 924) ops/sec; 656.2 (± 3.6) MB/sec uncompress [MEDIAN 20 runs] : 168465 ops/sec; 658.1 MB/sec ``` Reviewed By: ajkr Differential Revision: D48019378 Pulled By: cbi42 fbshipit-source-id: 674120c6e1853c2ced1436ac8138559d0204feba --- cache/compressed_secondary_cache.cc | 3 +- db/blob/blob_file_builder.cc | 2 +- db/blob/blob_file_builder_test.cc | 2 +- db/blob/blob_file_reader_test.cc | 2 +- db/blob/blob_source_test.cc | 2 +- db/db_test2.cc | 35 +++++ db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_gflags.cc | 3 + db_stress_tool/db_stress_test_base.cc | 3 + include/rocksdb/advanced_options.h | 8 + microbench/db_basic_bench.cc | 55 ++++++- options/cf_options.cc | 3 + options/options_settable_test.cc | 5 +- .../block_based/block_based_table_builder.cc | 11 +- table/format.cc | 20 ++- tools/db_bench_tool.cc | 9 +- tools/db_crashtest.py | 1 + .../new_features/zstd-checksum.md | 1 + util/compression.cc | 8 +- util/compression.h | 142 ++++++++++++++---- utilities/blob_db/blob_db_impl.cc | 2 +- 21 files changed, 253 insertions(+), 65 deletions(-) create mode 100644 unreleased_history/new_features/zstd-checksum.md diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index c14e58d4b..af4db81e3 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -143,7 +143,8 @@ Status CompressedSecondaryCache::Insert(const Slice& key, !cache_options_.do_not_compress_roles.Contains(helper->role)) { PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, size); CompressionOptions compression_opts; - CompressionContext compression_context(cache_options_.compression_type); + CompressionContext compression_context(cache_options_.compression_type, + compression_opts); uint64_t sample_for_compression{0}; CompressionInfo compression_info( compression_opts, compression_context, CompressionDict::GetEmptyDict(), diff --git a/db/blob/blob_file_builder.cc b/db/blob/blob_file_builder.cc index 21c1e5d41..35269fdb5 100644 --- a/db/blob/blob_file_builder.cc +++ b/db/blob/blob_file_builder.cc @@ -261,7 +261,7 @@ Status BlobFileBuilder::CompressBlobIfNeeded( // TODO: allow user CompressionOptions, including max_compressed_bytes_per_kb CompressionOptions opts; - CompressionContext context(blob_compression_type_); + CompressionContext context(blob_compression_type_, opts); constexpr uint64_t sample_for_compression = 0; CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), diff --git a/db/blob/blob_file_builder_test.cc b/db/blob/blob_file_builder_test.cc index 3a0feee45..5882e219f 100644 --- a/db/blob/blob_file_builder_test.cc +++ b/db/blob/blob_file_builder_test.cc @@ -406,7 +406,7 @@ TEST_F(BlobFileBuilderTest, Compression) { ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1); CompressionOptions opts; - CompressionContext context(kSnappyCompression); + CompressionContext context(kSnappyCompression, opts); constexpr uint64_t sample_for_compression = 0; CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), diff --git a/db/blob/blob_file_reader_test.cc b/db/blob/blob_file_reader_test.cc index c8e4e5954..b6049d1ef 100644 --- a/db/blob/blob_file_reader_test.cc +++ b/db/blob/blob_file_reader_test.cc @@ -74,7 +74,7 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, } } else { CompressionOptions opts; - CompressionContext context(compression); + CompressionContext context(compression, opts); constexpr uint64_t sample_for_compression = 0; CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), compression, sample_for_compression); diff --git a/db/blob/blob_source_test.cc b/db/blob/blob_source_test.cc index a9771565a..c0e1aba6e 100644 --- a/db/blob/blob_source_test.cc +++ b/db/blob/blob_source_test.cc @@ -76,7 +76,7 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, } } else { CompressionOptions opts; - CompressionContext context(compression); + CompressionContext context(compression, opts); constexpr uint64_t sample_for_compression = 0; CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), compression, sample_for_compression); diff --git a/db/db_test2.cc b/db/db_test2.cc index f03444647..d3bc34fee 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -7670,6 +7670,41 @@ TEST_F(DBTest2, GetLatestSeqAndTsForKey) { ASSERT_EQ(0, options.statistics->getTickerCount(GET_HIT_L0)); } +#if defined(ZSTD_ADVANCED) +TEST_F(DBTest2, ZSTDChecksum) { + // Verify that corruption during decompression is caught. + Options options = CurrentOptions(); + options.create_if_missing = true; + options.compression = kZSTD; + options.compression_opts.max_compressed_bytes_per_kb = 1024; + options.compression_opts.checksum = true; + DestroyAndReopen(options); + Random rnd(33); + ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10))); + SyncPoint::GetInstance()->SetCallBack( + "BlockBasedTableBuilder::WriteBlock:TamperWithCompressedData", + [&](void* arg) { + std::string* output = static_cast(arg); + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames + // Checksum is the last 4 bytes, corrupting that part in unit test is + // more controllable. + output->data()[output->size() - 1]++; + }); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Flush()); + PinnableSlice val; + Status s = Get(Key(0), &val); + ASSERT_TRUE(s.IsCorruption()); + + // Corruption caught during flush. + options.paranoid_file_checks = true; + DestroyAndReopen(options); + ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10))); + s = Flush(); + ASSERT_TRUE(s.IsCorruption()); +} +#endif + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index a90abe9a2..88a113d92 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -229,6 +229,7 @@ DECLARE_int32(compression_zstd_max_train_bytes); DECLARE_int32(compression_parallel_threads); DECLARE_uint64(compression_max_dict_buffer_bytes); DECLARE_bool(compression_use_zstd_dict_trainer); +DECLARE_bool(compression_checksum); DECLARE_string(checksum_type); DECLARE_string(env_uri); DECLARE_string(fs_uri); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index cd525cf9c..ddca21176 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -845,6 +845,9 @@ DEFINE_bool( "ZSTD 1.4.5+ is required. If ZSTD 1.4.5+ is not linked with the binary, " "this flag will have the default value true."); +DEFINE_bool(compression_checksum, false, + "Turn on zstd's checksum feature for detecting corruption."); + DEFINE_string(bottommost_compression_type, "disable", "Algorithm to use to compress bottommost level of the database. " "\"disable\" means disabling the feature"); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index bd71cb2a6..3e8b4ea70 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -3206,6 +3206,9 @@ void InitializeOptionsFromFlags( "cannot be used because ZSTD 1.4.5+ is not linked with the binary." " zstd dictionary trainer will be used.\n"); } + if (FLAGS_compression_checksum) { + options.compression_opts.checksum = true; + } options.max_manifest_file_size = FLAGS_max_manifest_file_size; options.inplace_update_support = FLAGS_in_place_update; options.max_subcompactions = static_cast(FLAGS_subcompactions); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 79003f501..f22ade185 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -181,6 +181,14 @@ struct CompressionOptions { // compressed by less than 12.5% (minimum ratio of 1.143:1). int max_compressed_bytes_per_kb = 1024 * 7 / 8; + // ZSTD only. + // Enable compression algorithm's checksum feature. + // (https://github.com/facebook/zstd/blob/d857369028d997c92ff1f1861a4d7f679a125464/lib/zstd.h#L428) + // Each compressed frame will have a 32-bit checksum attached. The checksum + // computed from the uncompressed data and can be verified during + // decompression. + bool checksum = false; + // A convenience function for setting max_compressed_bytes_per_kb based on a // minimum acceptable compression ratio (uncompressed size over compressed // size). diff --git a/microbench/db_basic_bench.cc b/microbench/db_basic_bench.cc index 3851ddd5a..c2e547f60 100644 --- a/microbench/db_basic_bench.cc +++ b/microbench/db_basic_bench.cc @@ -538,6 +538,23 @@ static void ManualFlushArguments(benchmark::internal::Benchmark* b) { BENCHMARK(ManualFlush)->Iterations(1)->Apply(ManualFlushArguments); +// Copied from test_util.cc to not depend on rocksdb_test_lib +// when building microbench binaries. +static Slice CompressibleString(Random* rnd, double compressed_fraction, + int len, std::string* dst) { + int raw = static_cast(len * compressed_fraction); + if (raw < 1) raw = 1; + std::string raw_data = rnd->RandomBinaryString(raw); + + // Duplicate the random data until we have filled "len" bytes + dst->clear(); + while (dst->size() < (unsigned int)len) { + dst->append(raw_data); + } + dst->resize(len); + return Slice(*dst); +} + static void DBGet(benchmark::State& state) { auto compaction_style = static_cast(state.range(0)); uint64_t max_data = state.range(1); @@ -546,6 +563,9 @@ static void DBGet(benchmark::State& state) { bool negative_query = state.range(4); bool enable_filter = state.range(5); bool mmap = state.range(6); + auto compression_type = static_cast(state.range(7)); + bool compression_checksum = static_cast(state.range(8)); + bool no_blockcache = state.range(9); uint64_t key_num = max_data / per_key_size; // setup DB @@ -568,6 +588,13 @@ static void DBGet(benchmark::State& state) { table_options.no_block_cache = true; table_options.block_restart_interval = 1; } + options.compression = compression_type; + options.compression_opts.checksum = compression_checksum; + if (no_blockcache) { + table_options.no_block_cache = true; + } else { + table_options.block_cache = NewLRUCache(100 << 20); + } options.table_factory.reset(NewBlockBasedTableFactory(table_options)); auto rnd = Random(301 + state.thread_index()); @@ -581,9 +608,10 @@ static void DBGet(benchmark::State& state) { // number. auto wo = WriteOptions(); wo.disableWAL = true; + std::string val; for (uint64_t i = 0; i < key_num; i++) { - Status s = db->Put(wo, kg_seq.Next(), - rnd.RandomString(static_cast(per_key_size))); + CompressibleString(&rnd, 0.5, static_cast(per_key_size), &val); + Status s = db->Put(wo, kg_seq.Next(), val); if (!s.ok()) { state.SkipWithError(s.ToString().c_str()); } @@ -641,14 +669,23 @@ static void DBGet(benchmark::State& state) { static void DBGetArguments(benchmark::internal::Benchmark* b) { for (int comp_style : {kCompactionStyleLevel, kCompactionStyleUniversal, kCompactionStyleFIFO}) { - for (int64_t max_data : {128l << 20, 512l << 20}) { + for (int64_t max_data : {1l << 20, 128l << 20, 512l << 20}) { for (int64_t per_key_size : {256, 1024}) { for (bool enable_statistics : {false, true}) { for (bool negative_query : {false, true}) { for (bool enable_filter : {false, true}) { for (bool mmap : {false, true}) { - b->Args({comp_style, max_data, per_key_size, enable_statistics, - negative_query, enable_filter, mmap}); + for (int compression_type : + {kNoCompression /* 0x0 */, kZSTD /* 0x7 */}) { + for (bool compression_checksum : {false, true}) { + for (bool no_blockcache : {false, true}) { + b->Args({comp_style, max_data, per_key_size, + enable_statistics, negative_query, enable_filter, + mmap, compression_type, compression_checksum, + no_blockcache}); + } + } + } } } } @@ -657,11 +694,13 @@ static void DBGetArguments(benchmark::internal::Benchmark* b) { } } b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics", - "negative_query", "enable_filter", "mmap"}); + "negative_query", "enable_filter", "mmap", "compression_type", + "compression_checksum", "no_blockcache"}); } -BENCHMARK(DBGet)->Threads(1)->Apply(DBGetArguments); -BENCHMARK(DBGet)->Threads(8)->Apply(DBGetArguments); +static const uint64_t DBGetNum = 10000l; +BENCHMARK(DBGet)->Threads(1)->Iterations(DBGetNum)->Apply(DBGetArguments); +BENCHMARK(DBGet)->Threads(8)->Iterations(DBGetNum / 8)->Apply(DBGetArguments); static void SimpleGetWithPerfContext(benchmark::State& state) { // setup DB diff --git a/options/cf_options.cc b/options/cf_options.cc index fc2865f90..26d124783 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -173,6 +173,9 @@ static std::unordered_map {offsetof(struct CompressionOptions, use_zstd_dict_trainer), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"checksum", + {offsetof(struct CompressionOptions, checksum), OptionType::kBoolean, + OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, }; static std::unordered_map diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 6541bf0a4..39d7d6b20 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -504,11 +504,12 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) { "compression=kNoCompression;" "compression_opts={max_dict_buffer_bytes=5;use_zstd_dict_trainer=true;" "enabled=false;parallel_threads=6;zstd_max_train_bytes=7;strategy=8;max_" - "dict_bytes=9;level=10;window_bits=11;max_compressed_bytes_per_kb=987;};" + "dict_bytes=9;level=10;window_bits=11;max_compressed_bytes_per_kb=987;" + "checksum=true};" "bottommost_compression_opts={max_dict_buffer_bytes=4;use_zstd_dict_" "trainer=true;enabled=true;parallel_threads=5;zstd_max_train_bytes=6;" "strategy=7;max_dict_bytes=8;level=9;window_bits=10;max_compressed_bytes_" - "per_kb=876;};" + "per_kb=876;checksum=true};" "bottommost_compression=kDisableCompressionOption;" "level0_stop_writes_trigger=33;" "num_levels=99;" diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 26e071abb..051f9d87b 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -138,8 +138,8 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info, if (sampled_output_fast && (LZ4_Supported() || Snappy_Supported())) { CompressionType c = LZ4_Supported() ? kLZ4Compression : kSnappyCompression; - CompressionContext context(c); CompressionOptions options; + CompressionContext context(c, options); CompressionInfo info_tmp(options, context, CompressionDict::GetEmptyDict(), c, info.SampleForCompression()); @@ -152,8 +152,8 @@ Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info, // Sampling with a slow but high-compression algorithm if (sampled_output_slow && (ZSTD_Supported() || Zlib_Supported())) { CompressionType c = ZSTD_Supported() ? kZSTD : kZlibCompression; - CompressionContext context(c); CompressionOptions options; + CompressionContext context(c, options); CompressionInfo info_tmp(options, context, CompressionDict::GetEmptyDict(), c, info.SampleForCompression()); @@ -525,8 +525,10 @@ struct BlockBasedTableBuilder::Rep { compression_dict_buffer_cache_res_mgr = nullptr; } + assert(compression_ctxs.size() >= compression_opts.parallel_threads); for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) { - compression_ctxs[i].reset(new CompressionContext(compression_type)); + compression_ctxs[i].reset( + new CompressionContext(compression_type, compression_opts)); } if (table_options.index_type == BlockBasedTableOptions::kTwoLevelIndexSearch) { @@ -1145,6 +1147,9 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data, return; } + TEST_SYNC_POINT_CALLBACK( + "BlockBasedTableBuilder::WriteBlock:TamperWithCompressedData", + &r->compressed_output); WriteMaybeCompressedBlock(block_contents, type, handle, block_type, &uncompressed_block_data); r->compressed_output.clear(); diff --git a/table/format.cc b/table/format.cc index 30847554b..27ecce547 100644 --- a/table/format.cc +++ b/table/format.cc @@ -645,19 +645,25 @@ Status UncompressBlockData(const UncompressionInfo& uncompression_info, StopWatchNano timer(ioptions.clock, ShouldReportDetailedTime(ioptions.env, ioptions.stats)); size_t uncompressed_size = 0; - CacheAllocationPtr ubuf = - UncompressData(uncompression_info, data, size, &uncompressed_size, - GetCompressFormatForVersion(format_version), allocator); + const char* error_msg = nullptr; + CacheAllocationPtr ubuf = UncompressData( + uncompression_info, data, size, &uncompressed_size, + GetCompressFormatForVersion(format_version), allocator, &error_msg); if (!ubuf) { if (!CompressionTypeSupported(uncompression_info.type())) { - return Status::NotSupported( + ret = Status::NotSupported( "Unsupported compression method for this build", CompressionTypeToString(uncompression_info.type())); } else { - return Status::Corruption( - "Corrupted compressed block contents", - CompressionTypeToString(uncompression_info.type())); + std::ostringstream oss; + oss << "Corrupted compressed block contents"; + if (error_msg) { + oss << ": " << error_msg; + } + ret = Status::Corruption( + oss.str(), CompressionTypeToString(uncompression_info.type())); } + return ret; } *out_contents = BlockContents(std::move(ubuf), uncompressed_size); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index dd5abd589..db66d0721 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -2834,7 +2834,7 @@ class Benchmark { std::string input_str(len, 'y'); std::string compressed; CompressionOptions opts; - CompressionContext context(FLAGS_compression_type_e); + CompressionContext context(FLAGS_compression_type_e, opts); CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), FLAGS_compression_type_e, FLAGS_sample_for_compression); @@ -4002,7 +4002,8 @@ class Benchmark { bool ok = true; std::string compressed; CompressionOptions opts; - CompressionContext context(FLAGS_compression_type_e); + opts.level = FLAGS_compression_level; + CompressionContext context(FLAGS_compression_type_e, opts); CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), FLAGS_compression_type_e, FLAGS_sample_for_compression); @@ -4031,8 +4032,10 @@ class Benchmark { Slice input = gen.Generate(FLAGS_block_size); std::string compressed; - CompressionContext compression_ctx(FLAGS_compression_type_e); CompressionOptions compression_opts; + compression_opts.level = FLAGS_compression_level; + CompressionContext compression_ctx(FLAGS_compression_type_e, + compression_opts); CompressionInfo compression_info( compression_opts, compression_ctx, CompressionDict::GetEmptyDict(), FLAGS_compression_type_e, FLAGS_sample_for_compression); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 49fe96008..11ac75d78 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -65,6 +65,7 @@ "compression_parallel_threads": 1, "compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1, "compression_use_zstd_dict_trainer": lambda: random.randint(0, 1), + "compression_checksum": lambda: random.randint(0, 1), "clear_column_family_one_in": 0, "compact_files_one_in": 1000000, "compact_range_one_in": 1000000, diff --git a/unreleased_history/new_features/zstd-checksum.md b/unreleased_history/new_features/zstd-checksum.md new file mode 100644 index 000000000..033576c9c --- /dev/null +++ b/unreleased_history/new_features/zstd-checksum.md @@ -0,0 +1 @@ +* Add a new compression option `CompressionOptions::checksum` for enabling ZSTD's checksum feature to detect corruption during decompression. \ No newline at end of file diff --git a/util/compression.cc b/util/compression.cc index 712d333ee..2a0bc38d4 100644 --- a/util/compression.cc +++ b/util/compression.cc @@ -48,7 +48,7 @@ int ZSTDStreamingCompress::Compress(const char* input, size_t input_size, if (input_size == 0) { return 0; } -#ifndef ZSTD_STREAMING +#ifndef ZSTD_ADVANCED (void)input; (void)input_size; (void)output; @@ -77,7 +77,7 @@ int ZSTDStreamingCompress::Compress(const char* input, size_t input_size, } void ZSTDStreamingCompress::Reset() { -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED ZSTD_CCtx_reset(cctx_, ZSTD_ResetDirective::ZSTD_reset_session_only); input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0}; #endif @@ -91,7 +91,7 @@ int ZSTDStreamingUncompress::Uncompress(const char* input, size_t input_size, if (input_size == 0) { return 0; } -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED if (input) { // New input input_buffer_ = {input, input_size, /*pos=*/0}; @@ -113,7 +113,7 @@ int ZSTDStreamingUncompress::Uncompress(const char* input, size_t input_size, } void ZSTDStreamingUncompress::Reset() { -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED ZSTD_DCtx_reset(dctx_, ZSTD_ResetDirective::ZSTD_reset_session_only); input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0}; #endif diff --git a/util/compression.h b/util/compression.h index eb5d1c7bb..3e21a669b 100644 --- a/util/compression.h +++ b/util/compression.h @@ -53,8 +53,11 @@ #include #endif // ZSTD_VERSION_NUMBER >= 10103 // v1.4.0+ +// ZSTD_Compress2(), ZSTD_compressStream2() and frame parameters all belong to +// advanced APIs and require v1.4.0+. +// https://github.com/facebook/zstd/blob/eb9f881eb810f2242f1ef36b3f3e7014eecb8fa6/lib/zstd.h#L297C40-L297C45 #if ZSTD_VERSION_NUMBER >= 10400 -#define ZSTD_STREAMING +#define ZSTD_ADVANCED #endif // ZSTD_VERSION_NUMBER >= 10400 namespace ROCKSDB_NAMESPACE { // Need this for the context allocation override @@ -180,6 +183,9 @@ struct CompressionDict { if (level == CompressionOptions::kDefaultCompressionLevel) { // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see // https://github.com/facebook/zstd/issues/1148 + // TODO(cbi): ZSTD_CLEVEL_DEFAULT is exposed after + // https://github.com/facebook/zstd/pull/1174. Use ZSTD_CLEVEL_DEFAULT + // instead of hardcoding 3. level = 3; } // Should be safe (but slower) if below call fails as we'll use the @@ -363,14 +369,43 @@ class CompressionContext { private: #if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500) ZSTD_CCtx* zstd_ctx_ = nullptr; - void CreateNativeContext(CompressionType type) { - if (type == kZSTD || type == kZSTDNotFinalCompression) { + + ZSTD_CCtx* CreateZSTDContext() { #ifdef ROCKSDB_ZSTD_CUSTOM_MEM - zstd_ctx_ = - ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides()); + return ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides()); #else // ROCKSDB_ZSTD_CUSTOM_MEM - zstd_ctx_ = ZSTD_createCCtx(); + return ZSTD_createCCtx(); #endif // ROCKSDB_ZSTD_CUSTOM_MEM + } + + void CreateNativeContext(CompressionType type, int level, bool checksum) { + if (type == kZSTD || type == kZSTDNotFinalCompression) { + zstd_ctx_ = CreateZSTDContext(); +#ifdef ZSTD_ADVANCED + if (level == CompressionOptions::kDefaultCompressionLevel) { + // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see + // https://github.com/facebook/zstd/issues/1148 + level = 3; + } + size_t err = + ZSTD_CCtx_setParameter(zstd_ctx_, ZSTD_c_compressionLevel, level); + if (ZSTD_isError(err)) { + assert(false); + ZSTD_freeCCtx(zstd_ctx_); + zstd_ctx_ = CreateZSTDContext(); + } + if (checksum) { + err = ZSTD_CCtx_setParameter(zstd_ctx_, ZSTD_c_checksumFlag, 1); + if (ZSTD_isError(err)) { + assert(false); + ZSTD_freeCCtx(zstd_ctx_); + zstd_ctx_ = CreateZSTDContext(); + } + } +#else + (void)level; + (void)checksum; +#endif } } void DestroyNativeContext() { @@ -388,12 +423,14 @@ class CompressionContext { #else // ZSTD && (ZSTD_VERSION_NUMBER >= 500) private: - void CreateNativeContext(CompressionType /* type */) {} + void CreateNativeContext(CompressionType /* type */, int /* level */, + bool /* checksum */) {} void DestroyNativeContext() {} #endif // ZSTD && (ZSTD_VERSION_NUMBER >= 500) public: - explicit CompressionContext(CompressionType type) { - CreateNativeContext(type); + explicit CompressionContext(CompressionType type, + const CompressionOptions& options) { + CreateNativeContext(type, options.level, options.checksum); } ~CompressionContext() { DestroyNativeContext(); } CompressionContext(const CompressionContext&) = delete; @@ -525,7 +562,7 @@ inline bool ZSTDNotFinal_Supported() { } inline bool ZSTD_Streaming_Supported() { -#if defined(ZSTD) && defined(ZSTD_STREAMING) +#if defined(ZSTD_ADVANCED) return true; #else return false; @@ -1343,30 +1380,44 @@ inline bool ZSTD_Compress(const CompressionInfo& info, const char* input, size_t compressBound = ZSTD_compressBound(length); output->resize(static_cast(output_header_len + compressBound)); size_t outlen = 0; - int level; - if (info.options().level == CompressionOptions::kDefaultCompressionLevel) { - // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see - // https://github.com/facebook/zstd/issues/1148 - level = 3; - } else { - level = info.options().level; - } #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ ZSTD_CCtx* context = info.context().ZSTDPreallocCtx(); assert(context != nullptr); +#ifdef ZSTD_ADVANCED + if (info.dict().GetDigestedZstdCDict() != nullptr) { + ZSTD_CCtx_refCDict(context, info.dict().GetDigestedZstdCDict()); + } else { + ZSTD_CCtx_loadDictionary(context, info.dict().GetRawDict().data(), + info.dict().GetRawDict().size()); + } + + // Compression level is set in `contex` during CreateNativeContext() + outlen = ZSTD_compress2(context, &(*output)[output_header_len], compressBound, + input, length); +#else // ZSTD_ADVANCED #if ZSTD_VERSION_NUMBER >= 700 // v0.7.0+ if (info.dict().GetDigestedZstdCDict() != nullptr) { outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len], compressBound, input, length, info.dict().GetDigestedZstdCDict()); } -#endif // ZSTD_VERSION_NUMBER >= 700 +#endif // ZSTD_VERSION_NUMBER >= 700 + // TODO (cbi): error handling for compression. if (outlen == 0) { + int level; + if (info.options().level == CompressionOptions::kDefaultCompressionLevel) { + // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see + // https://github.com/facebook/zstd/issues/1148 + level = 3; + } else { + level = info.options().level; + } outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len], compressBound, input, length, info.dict().GetRawDict().data(), info.dict().GetRawDict().size(), level); } +#endif // ZSTD_ADVANCED #else // up to v0.4.x outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input, length, level); @@ -1387,17 +1438,28 @@ inline bool ZSTD_Compress(const CompressionInfo& info, const char* input, // @param compression_dict Data for presetting the compression library's // dictionary. +// @param error_message If not null, will be set if decompression fails. +// +// Returns nullptr if decompression fails. inline CacheAllocationPtr ZSTD_Uncompress( const UncompressionInfo& info, const char* input_data, size_t input_length, - size_t* uncompressed_size, MemoryAllocator* allocator = nullptr) { + size_t* uncompressed_size, MemoryAllocator* allocator = nullptr, + const char** error_message = nullptr) { #ifdef ZSTD + static const char* const kErrorDecodeOutputSize = + "Cannot decode output size."; + static const char* const kErrorOutputLenMismatch = + "Decompressed size does not match header."; uint32_t output_len = 0; if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, &output_len)) { + if (error_message) { + *error_message = kErrorDecodeOutputSize; + } return nullptr; } - auto output = AllocateBlock(output_len, allocator); + CacheAllocationPtr output = AllocateBlock(output_len, allocator); size_t actual_output_length = 0; #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ ZSTD_DCtx* context = info.context().GetZSTDContext(); @@ -1407,19 +1469,31 @@ inline CacheAllocationPtr ZSTD_Uncompress( actual_output_length = ZSTD_decompress_usingDDict( context, output.get(), output_len, input_data, input_length, info.dict().GetDigestedZstdDDict()); - } + } else { #endif // ROCKSDB_ZSTD_DDICT - if (actual_output_length == 0) { actual_output_length = ZSTD_decompress_usingDict( context, output.get(), output_len, input_data, input_length, info.dict().GetRawDict().data(), info.dict().GetRawDict().size()); +#ifdef ROCKSDB_ZSTD_DDICT } +#endif // ROCKSDB_ZSTD_DDICT #else // up to v0.4.x (void)info; actual_output_length = ZSTD_decompress(output.get(), output_len, input_data, input_length); #endif // ZSTD_VERSION_NUMBER >= 500 - assert(actual_output_length == output_len); + if (ZSTD_isError(actual_output_length)) { + if (error_message) { + *error_message = ZSTD_getErrorName(actual_output_length); + } + return nullptr; + } else if (actual_output_length != output_len) { + if (error_message) { + *error_message = kErrorOutputLenMismatch; + } + return nullptr; + } + *uncompressed_size = actual_output_length; return output; #else // ZSTD @@ -1428,6 +1502,7 @@ inline CacheAllocationPtr ZSTD_Uncompress( (void)input_length; (void)uncompressed_size; (void)allocator; + (void)error_message; return nullptr; #endif } @@ -1530,6 +1605,7 @@ inline std::string ZSTD_FinalizeDictionary( return dict_data; } #else // up to v1.4.4 + assert(false); (void)samples; (void)sample_lens; (void)max_dict_bytes; @@ -1589,7 +1665,8 @@ inline bool CompressData(const Slice& raw, inline CacheAllocationPtr UncompressData( const UncompressionInfo& uncompression_info, const char* data, size_t n, size_t* uncompressed_size, uint32_t compress_format_version, - MemoryAllocator* allocator = nullptr) { + MemoryAllocator* allocator = nullptr, + const char** error_message = nullptr) { switch (uncompression_info.type()) { case kSnappyCompression: return Snappy_Uncompress(data, n, uncompressed_size, allocator); @@ -1609,8 +1686,9 @@ inline CacheAllocationPtr UncompressData( return CacheAllocationPtr(XPRESS_Uncompress(data, n, uncompressed_size)); case kZSTD: case kZSTDNotFinalCompression: + // TODO(cbi): error message handling for other compression algorithms. return ZSTD_Uncompress(uncompression_info, data, n, uncompressed_size, - allocator); + allocator, error_message); default: return CacheAllocationPtr(); } @@ -1743,7 +1821,7 @@ class ZSTDStreamingCompress final : public StreamingCompress { size_t max_output_len) : StreamingCompress(kZSTD, opts, compress_format_version, max_output_len) { -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED cctx_ = ZSTD_createCCtx(); // Each compressed frame will have a checksum ZSTD_CCtx_setParameter(cctx_, ZSTD_c_checksumFlag, 1); @@ -1752,14 +1830,14 @@ class ZSTDStreamingCompress final : public StreamingCompress { #endif } ~ZSTDStreamingCompress() override { -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED ZSTD_freeCCtx(cctx_); #endif } int Compress(const char* input, size_t input_size, char* output, size_t* output_pos) override; void Reset() override; -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED ZSTD_CCtx* cctx_; ZSTD_inBuffer input_buffer_; #endif @@ -1770,14 +1848,14 @@ class ZSTDStreamingUncompress final : public StreamingUncompress { explicit ZSTDStreamingUncompress(uint32_t compress_format_version, size_t max_output_len) : StreamingUncompress(kZSTD, compress_format_version, max_output_len) { -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED dctx_ = ZSTD_createDCtx(); assert(dctx_ != nullptr); input_buffer_ = {/*src=*/nullptr, /*size=*/0, /*pos=*/0}; #endif } ~ZSTDStreamingUncompress() override { -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED ZSTD_freeDCtx(dctx_); #endif } @@ -1786,7 +1864,7 @@ class ZSTDStreamingUncompress final : public StreamingUncompress { void Reset() override; private: -#ifdef ZSTD_STREAMING +#ifdef ZSTD_ADVANCED ZSTD_DCtx* dctx_; ZSTD_inBuffer input_buffer_; #endif diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index f610b9ec4..034701136 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -1148,7 +1148,7 @@ Slice BlobDBImpl::GetCompressedSlice(const Slice& raw, StopWatch compression_sw(clock_, statistics_, BLOB_DB_COMPRESSION_MICROS); CompressionType type = bdb_options_.compression; CompressionOptions opts; - CompressionContext context(type); + CompressionContext context(type, opts); CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(), type, 0 /* sample_for_compression */); CompressBlock(raw, info, &type, kBlockBasedTableVersionFormat, false, From f65a0379f0710be894fa72ebfcb4527d6d2ee3f0 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Fri, 18 Aug 2023 15:52:04 -0700 Subject: [PATCH 052/386] Implement trimming of readhead size when upper bound is specified (#11684) Summary: Implement trimming of readahead_size under a new option ReadOptions.auto_readahead_size. It'll trim the readahead_size during prefetching upto iterate_upper_bound offset only when ReadOptions.iterate_upper_bound is set, therefore reducing the prefetching of data beyond upper_bound. It's enabled for both implicit auto readahead size and when ReadOptions.readahead_size is specified and for sync and async_io. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11684 Test Plan: Added new unit test Reviewed By: anand1976 Differential Revision: D48479723 Pulled By: akankshamahajan15 fbshipit-source-id: 2b1703579caf779105e836b580866ffd7db076fc --- file/file_prefetch_buffer.cc | 11 +- file/file_prefetch_buffer.h | 23 +- file/prefetch_test.cc | 222 +++++++++++++++--- include/rocksdb/options.h | 11 +- include/rocksdb/statistics.h | 4 + java/rocksjni/portal.h | 2 + .../src/main/java/org/rocksdb/TickerType.java | 2 + monitoring/statistics.cc | 1 + .../block_based/block_based_table_iterator.cc | 51 ++++ .../block_based/block_based_table_iterator.h | 2 + table/block_based/block_based_table_reader.cc | 2 +- table/block_based/block_based_table_reader.h | 23 +- table/block_based/block_prefetcher.cc | 14 +- table/block_based/block_prefetcher.h | 6 + table/block_based/partitioned_filter_block.cc | 3 +- table/block_based/partitioned_index_reader.cc | 3 +- .../new_features/auto_readahead.md | 1 + 17 files changed, 321 insertions(+), 60 deletions(-) create mode 100644 unreleased_history/new_features/auto_readahead.md diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 618d49e03..d34a65c82 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -159,6 +159,7 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts, size_t read_len = static_cast(roundup_len - chunk_len); Status s = Read(opts, reader, read_len, chunk_len, rounddown_offset, curr_); + if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && s.ok()) { RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len); } @@ -650,6 +651,7 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( return false; } } + UpdateReadAheadSizeForUpperBound(offset, n); s = Prefetch(opts, reader, offset, n + readahead_size_); } if (!s.ok()) { @@ -743,6 +745,9 @@ bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( return false; } } + + UpdateReadAheadSizeForUpperBound(offset, n); + // Prefetch n + readahead_size_/2 synchronously as remaining // readahead_size_/2 will be prefetched asynchronously. s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2, @@ -823,7 +828,11 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, if (readahead_size_ > 0 && (!implicit_auto_readahead_ || num_file_reads_ >= num_file_reads_for_auto_readahead_)) { - is_eligible_for_prefetching = true; + UpdateReadAheadSizeForUpperBound(offset, n); + // After trim, readahead size can be 0. + if (readahead_size_ > 0) { + is_eligible_for_prefetching = true; + } } // 1. Cancel any pending async read to make code simpler as buffers can be out diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index 89d96d438..334e32b6e 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -87,7 +87,8 @@ class FilePrefetchBuffer { size_t readahead_size = 0, size_t max_readahead_size = 0, bool enable = true, bool track_min_offset = false, bool implicit_auto_readahead = false, uint64_t num_file_reads = 0, - uint64_t num_file_reads_for_auto_readahead = 0, FileSystem* fs = nullptr, + uint64_t num_file_reads_for_auto_readahead = 0, + uint64_t upper_bound_offset = 0, FileSystem* fs = nullptr, SystemClock* clock = nullptr, Statistics* stats = nullptr, FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) : curr_(0), @@ -106,7 +107,8 @@ class FilePrefetchBuffer { fs_(fs), clock_(clock), stats_(stats), - usage_(usage) { + usage_(usage), + upper_bound_offset_(upper_bound_offset) { assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) || (num_file_reads_ == 0)); // If ReadOptions.async_io is enabled, data is asynchronously filled in @@ -319,6 +321,7 @@ class FilePrefetchBuffer { void ResetValues() { num_file_reads_ = 1; readahead_size_ = initial_auto_readahead_size_; + upper_bound_offset_ = 0; } // Called in case of implicit auto prefetching. @@ -416,6 +419,17 @@ class FilePrefetchBuffer { uint64_t offset, size_t n, Slice* result, Status* status); + void UpdateReadAheadSizeForUpperBound(uint64_t offset, size_t n) { + // Adjust readhahead_size till upper_bound if upper_bound_offset_ is + // set. + if (upper_bound_offset_ > 0 && upper_bound_offset_ > offset) { + if (upper_bound_offset_ < offset + n + readahead_size_) { + readahead_size_ = (upper_bound_offset_ - offset) - n; + RecordTick(stats_, READAHEAD_TRIMMED); + } + } + } + std::vector bufs_; // curr_ represents the index for bufs_ indicating which buffer is being // consumed currently. @@ -457,5 +471,10 @@ class FilePrefetchBuffer { Statistics* stats_; FilePrefetchBufferUsage usage_; + + // upper_bound_offset_ is set when ReadOptions.iterate_upper_bound and + // ReadOptions.auto_readahead_size are set to trim readahead_size upto + // upper_bound_offset_ during prefetching. + uint64_t upper_bound_offset_ = 0; }; } // namespace ROCKSDB_NAMESPACE diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 7cb2d3876..f7c1a6fed 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -418,7 +418,8 @@ TEST_P(PrefetchTailTest, UpgradeToTailSizeInManifest) { } if (UseDirectIO()) { ROCKSDB_GTEST_BYPASS( - "To simplify testing logics with setting file's buffer alignment to be " + "To simplify testing logics with setting file's buffer alignment to " + "be " "1, direct IO is required to be disabled."); } @@ -455,8 +456,8 @@ TEST_P(PrefetchTailTest, UpgradeToTailSizeInManifest) { // inferred to be a small number for files with no tail size recorded in // manifest. // "1" is chosen to be such number so that with `small_buffer_alignment == - // true` and `use_small_cache == true`, it would have caused one file read per - // index partition during db open if the upgrade is done wrong. + // true` and `use_small_cache == true`, it would have caused one file read + // per index partition during db open if the upgrade is done wrong. SyncPoint::GetInstance()->SetCallBack( "BlockBasedTable::Open::TailPrefetchLen", [&](void* arg) { std::pair* prefetch_off_len_pair = @@ -481,8 +482,8 @@ TEST_P(PrefetchTailTest, UpgradeToTailSizeInManifest) { int64_t num_index_partition = GetNumIndexPartition(); // If the upgrade is done right, db open will prefetch all the index // partitions at once, instead of doing one read per partition. - // That is, together with `metadata_block_size == 1`, there will be more index - // partitions than number of non index partitions reads. + // That is, together with `metadata_block_size == 1`, there will be more + // index partitions than number of non index partitions reads. ASSERT_LT(db_open_file_read.count, num_index_partition); Close(); @@ -695,8 +696,8 @@ TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) { "{initial_auto_readahead_size=0;}"}})); break; case 1: - // intial_auto_readahead_size and max_auto_readahead_size are set same - // so readahead_size remains same. + // intial_auto_readahead_size and max_auto_readahead_size are set + // same so readahead_size remains same. ASSERT_OK(db_->SetOptions({{"block_based_table_factory", "{initial_auto_readahead_size=4096;max_" "auto_readahead_size=4096;}"}})); @@ -803,8 +804,9 @@ TEST_P(PrefetchTest, ConfigureNumFilesReadsForReadaheadSize) { /* * Reseek keys from sequential Data Blocks within same partitioned * index. It will prefetch the data block at the first seek since - * num_file_reads_for_auto_readahead = 0. Data Block size is nearly 4076 so - * readahead will fetch 8 * 1024 data more initially (2 more data blocks). + * num_file_reads_for_auto_readahead = 0. Data Block size is nearly 4076 + * so readahead will fetch 8 * 1024 data more initially (2 more data + * blocks). */ iter->Seek(BuildKey(0)); // Prefetch data + index block since // num_file_reads_for_auto_readahead = 0. @@ -902,8 +904,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) { /* * Reseek keys from sequential Data Blocks within same partitioned * index. After 2 sequential reads it will prefetch the data block. - * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data more - * initially (2 more data blocks). + * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data + * more initially (2 more data blocks). */ iter->Seek(BuildKey(0)); ASSERT_TRUE(iter->Valid()); @@ -980,9 +982,9 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) { { /* * Reseek keys from sequential data blocks to set implicit auto readahead - * and prefetch data but after that iterate over different (non sequential) - * data blocks which won't prefetch any data further. So buff_prefetch_count - * will be 1 for the first one. + * and prefetch data but after that iterate over different (non + * sequential) data blocks which won't prefetch any data further. So + * buff_prefetch_count will be 1 for the first one. */ auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); iter->Seek(BuildKey(0)); @@ -1009,8 +1011,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) { buff_prefetch_count = 0; } - // Read sequentially to confirm readahead_size is reset to initial value (2 - // more data blocks) + // Read sequentially to confirm readahead_size is reset to initial value + // (2 more data blocks) iter->Seek(BuildKey(1011)); ASSERT_TRUE(iter->Valid()); iter->Seek(BuildKey(1015)); @@ -1060,8 +1062,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) { } { /* - * Reseek over different keys from different blocks. buff_prefetch_count is - * set 0. + * Reseek over different keys from different blocks. buff_prefetch_count + * is set 0. */ auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); int i = 0; @@ -1165,8 +1167,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) { /* * Reseek keys from sequential Data Blocks within same partitioned * index. After 2 sequential reads it will prefetch the data block. - * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data more - * initially (2 more data blocks). + * Data Block size is nearly 4076 so readahead will fetch 8 * 1024 data + * more initially (2 more data blocks). */ auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); // Warm up the cache @@ -1193,8 +1195,8 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) { ASSERT_TRUE(iter->Valid()); iter->Seek(BuildKey(1004)); // Prefetch data (not in cache). ASSERT_TRUE(iter->Valid()); - // Missed one sequential block but next is in already in buffer so readahead - // will not be reset. + // Missed one sequential block but next is in already in buffer so + // readahead will not be reset. iter->Seek(BuildKey(1011)); ASSERT_TRUE(iter->Valid()); // Prefetch data but blocks are in cache so no prefetch and reset. @@ -1648,12 +1650,13 @@ TEST_P(PrefetchTest1, SeekWithExtraPrefetchAsyncIO) { 0)); // Prefetch data on seek because of seek parallelization. ASSERT_TRUE(iter->Valid()); - // Do extra prefetching in Seek only if num_file_reads_for_auto_readahead - // = 0. + // Do extra prefetching in Seek only if + // num_file_reads_for_auto_readahead = 0. ASSERT_EQ(extra_prefetch_buff_cnt, (i == 0 ? 1 : 0)); // buff_prefetch_count is 2 because of index block when // num_file_reads_for_auto_readahead = 0. - // If num_file_reads_for_auto_readahead > 0, index block isn't prefetched. + // If num_file_reads_for_auto_readahead > 0, index block isn't + // prefetched. ASSERT_EQ(buff_prefetch_count, i == 0 ? 2 : 1); extra_prefetch_buff_cnt = 0; @@ -1662,8 +1665,8 @@ TEST_P(PrefetchTest1, SeekWithExtraPrefetchAsyncIO) { iter->Seek( BuildKey(22)); // Prefetch data because of seek parallelization. ASSERT_TRUE(iter->Valid()); - // Do extra prefetching in Seek only if num_file_reads_for_auto_readahead - // = 0. + // Do extra prefetching in Seek only if + // num_file_reads_for_auto_readahead = 0. ASSERT_EQ(extra_prefetch_buff_cnt, (i == 0 ? 1 : 0)); ASSERT_EQ(buff_prefetch_count, 1); @@ -1673,8 +1676,8 @@ TEST_P(PrefetchTest1, SeekWithExtraPrefetchAsyncIO) { iter->Seek( BuildKey(33)); // Prefetch data because of seek parallelization. ASSERT_TRUE(iter->Valid()); - // Do extra prefetching in Seek only if num_file_reads_for_auto_readahead - // = 0. + // Do extra prefetching in Seek only if + // num_file_reads_for_auto_readahead = 0. ASSERT_EQ(extra_prefetch_buff_cnt, (i == 0 ? 1 : 0)); ASSERT_EQ(buff_prefetch_count, 1); } @@ -1765,8 +1768,8 @@ TEST_P(PrefetchTest1, NonSequentialReadsWithAdaptiveReadahead) { Close(); } -// This test verifies the functionality of adaptive_readaheadsize with cache and -// if block is found in cache, decrease the readahead_size if +// This test verifies the functionality of adaptive_readaheadsize with cache +// and if block is found in cache, decrease the readahead_size if // - its enabled internally by RocksDB (implicit_auto_readahead_) and, // - readahead_size is greater than 0 and, // - the block would have called prefetch API if not found in cache for @@ -1888,8 +1891,8 @@ TEST_P(PrefetchTest1, DecreaseReadAheadIfInCache) { ASSERT_TRUE(iter->Valid()); // Prefetch data (not in buffer) but found in cache. So decrease - // readahead_size. Since it will 0 after decrementing so readahead_size will - // be set to initial value. + // readahead_size. Since it will 0 after decrementing so readahead_size + // will be set to initial value. iter->Seek(BuildKey(1019)); ASSERT_TRUE(iter->Valid()); expected_current_readahead_size = std::max( @@ -2001,8 +2004,8 @@ TEST_P(PrefetchTest1, SeekParallelizationTest) { HistogramData async_read_bytes; options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); - // not all platforms support io_uring. In that case it'll fallback to normal - // prefetching without async_io. + // not all platforms support io_uring. In that case it'll fallback to + // normal prefetching without async_io. if (read_async_called) { ASSERT_EQ(buff_prefetch_async_count, 2); ASSERT_GT(async_read_bytes.count, 0); @@ -2014,6 +2017,150 @@ TEST_P(PrefetchTest1, SeekParallelizationTest) { Close(); } +// This test checks if readahead_size is trimmed when upper_bound is reached. +// It tests with different combinations of async_io disabled/enabled, +// readahead_size (implicit and explicit), and num_file_reads_for_auto_readahead +// from 0 to 2. +TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + + // First param is if the mockFS support_prefetch or not + std::shared_ptr fs = + std::make_shared(FileSystem::Default(), false); + + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + Options options; + SetGenericOptions(env.get(), /*use_direct_io=*/false, options); + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions table_options; + SetBlockBasedTableOptions(table_options); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + Status s = TryReopen(options); + ASSERT_OK(s); + + Random rnd(309); + WriteBatch batch; + + for (int i = 0; i < 26; i++) { + std::string key = "my_key_"; + + for (int j = 0; j < 10; j++) { + key += char('a' + i); + ASSERT_OK(batch.Put(key, rnd.RandomString(1000))); + } + } + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + std::string start_key = "my_key_a"; + + std::string end_key = "my_key_"; + for (int j = 0; j < 10; j++) { + end_key += char('a' + 25); + } + + Slice least(start_key.data(), start_key.size()); + Slice greatest(end_key.data(), end_key.size()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); + + int buff_prefetch_count = 0; + + // Try with different num_file_reads_for_auto_readahead from 0 to 3. + for (size_t i = 0; i < 3; i++) { + table_options.num_file_reads_for_auto_readahead = i; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + s = TryReopen(options); + ASSERT_OK(s); + + int buff_count_with_tuning = 0, buff_count_without_tuning = 0; + int keys_with_tuning = 0, keys_without_tuning = 0; + buff_prefetch_count = 0; + + SyncPoint::GetInstance()->SetCallBack( + "FilePrefetchBuffer::Prefetch:Start", + [&](void*) { buff_prefetch_count++; }); + + SyncPoint::GetInstance()->SetCallBack( + "FilePrefetchBuffer::PrefetchAsyncInternal:Start", + [&](void*) { buff_prefetch_count++; }); + + SyncPoint::GetInstance()->EnableProcessing(); + + ReadOptions ropts; + if (std::get<0>(GetParam())) { + ropts.readahead_size = 32768; + } + if (std::get<1>(GetParam())) { + ropts.async_io = true; + } + + Slice ub = Slice("my_key_uuu"); + ropts.iterate_upper_bound = &ub; + Slice seek_key = Slice("my_key_aaa"); + + // With tuning readahead_size. + { + ASSERT_OK(options.statistics->Reset()); + ropts.auto_readahead_size = true; + + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + + iter->Seek(seek_key); + + while (iter->Valid()) { + keys_with_tuning++; + iter->Next(); + } + + uint64_t readhahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_GT(readhahead_trimmed, 0); + buff_count_with_tuning = buff_prefetch_count; + } + + // Without tuning readahead_size + { + buff_prefetch_count = 0; + ASSERT_OK(options.statistics->Reset()); + ropts.auto_readahead_size = false; + + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + + iter->Seek(seek_key); + + while (iter->Valid()) { + keys_without_tuning++; + iter->Next(); + } + buff_count_without_tuning = buff_prefetch_count; + uint64_t readhahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_EQ(readhahead_trimmed, 0); + } + + { + // Verify results with and without tuning. + if (std::get<1>(GetParam())) { + // In case of async_io. + ASSERT_GE(buff_count_with_tuning, buff_count_without_tuning); + } else { + ASSERT_EQ(buff_count_without_tuning, buff_count_with_tuning); + } + // Prefetching should happen. + ASSERT_GT(buff_count_without_tuning, 0); + ASSERT_GT(buff_count_with_tuning, 0); + // No of keys should be equal. + ASSERT_EQ(keys_without_tuning, keys_with_tuning); + } + Close(); + } +} + namespace { #ifdef GFLAGS const int kMaxArgCount = 100; @@ -2590,7 +2737,7 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) { std::unique_ptr r; Read(fname, opts, &r); - FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, fs()); + FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, 0, fs()); Slice result; // Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings, // it will do two reads of 4096+8192 and 8192 @@ -2625,7 +2772,8 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { FilePrefetchBuffer fpb( /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, fs()); + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, + /*upper_bound_offset=*/0, fs()); int read_async_called = 0; SyncPoint::GetInstance()->SetCallBack( diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index ee6a1096e..8f62c2dfb 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1569,8 +1569,6 @@ struct ReadOptions { // broken, stale keys could be served in read paths. bool ignore_range_deletions = false; - // Experimental - // // If async_io is enabled, RocksDB will prefetch some of data asynchronously. // RocksDB apply it if reads are sequential and its internal automatic // prefetching. @@ -1707,6 +1705,15 @@ struct ReadOptions { // Default: empty (every table will be scanned) std::function table_filter; + // Experimental + // + // If auto_readahead_size is set to true, it will auto tune the readahead_size + // during scans internally. + // For this feature to enabled, iterate_upper_bound must also be specified. + // + // Default: false + bool auto_readahead_size = false; + // *** END options only relevant to iterators or scans *** // ** For RocksDB internal use only ** diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 7576d4a7b..b07ee1f33 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -511,6 +511,10 @@ enum Tickers : uint32_t { // compressed SST blocks from storage. BYTES_DECOMPRESSED_TO, + // Number of times readahead is trimmed during scans when + // ReadOptions.auto_readahead_size is set. + READAHEAD_TRIMMED, + TICKER_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index c75c233db..16120b037 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5131,6 +5131,8 @@ class TickerTypeJni { return -0x3B; case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_MISMATCH_COUNT: return -0x3C; + case ROCKSDB_NAMESPACE::Tickers::READAHEAD_TRIMMED: + return -0x3D; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java index c167f74c4..ac4cc9213 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/rocksdb/TickerType.java @@ -764,6 +764,8 @@ public enum TickerType { */ BLOCK_CHECKSUM_MISMATCH_COUNT((byte) -0x3C), + READAHEAD_TRIMMED((byte) -0x3D), + TICKER_ENUM_MAX((byte) 0x5F); private final byte value; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index c72754f7e..5a7473f2a 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -257,6 +257,7 @@ const std::vector> TickersNameMap = { "rocksdb.number.block_compression_rejected"}, {BYTES_DECOMPRESSED_FROM, "rocksdb.bytes.decompressed.from"}, {BYTES_DECOMPRESSED_TO, "rocksdb.bytes.decompressed.to"}, + {READAHEAD_TRIMMED, "rocksdb.readahead.trimmed"}, }; const std::vector> HistogramsNameMap = { diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index beccc0112..8c313ed61 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -79,6 +79,15 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } } + if (read_options_.auto_readahead_size && read_options_.iterate_upper_bound) { + FindReadAheadSizeUpperBound(); + if (target) { + index_iter_->Seek(*target); + } else { + index_iter_->SeekToFirst(); + } + } + IndexValue v = index_iter_->value(); const bool same_block = block_iter_points_to_real_block_ && v.handle.offset() == prev_block_offset_; @@ -497,4 +506,46 @@ void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() { : BlockUpperBound::kUpperBoundInCurBlock; } } + +void BlockBasedTableIterator::FindReadAheadSizeUpperBound() { + size_t total_bytes_till_upper_bound = 0; + size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); + uint64_t start_offset = index_iter_->value().handle.offset(); + + do { + BlockHandle block_handle = index_iter_->value().handle; + total_bytes_till_upper_bound += block_handle.size(); + total_bytes_till_upper_bound += footer; + + // Can't figure out for current block if current block + // is out of bound. But for next block we can find that. + // If curr block's index key >= iterate_upper_bound, it + // means all the keys in next block or above are out of + // bound. + bool next_block_out_of_bound = + (user_comparator_.CompareWithoutTimestamp( + index_iter_->user_key(), + /*a_has_ts=*/true, *read_options_.iterate_upper_bound, + /*b_has_ts=*/false) >= 0 + ? true + : false); + + if (next_block_out_of_bound) { + break; + } + + // Since next block is not out of bound, iterate to that + // index block and add it's Data block size to + // readahead_size. + index_iter_->Next(); + + if (!index_iter_->Valid()) { + break; + } + + } while (true); + + block_prefetcher_.SetUpperBoundOffset(start_offset + + total_bytes_till_upper_bound); +} } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index 6ea53f331..ce407467e 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -306,5 +306,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { } return true; } + + void FindReadAheadSizeUpperBound(); }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index df6c0a912..57d65d555 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -883,7 +883,7 @@ Status BlockBasedTable::PrefetchTail( 0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */, true /* track_min_offset */, false /* implicit_auto_readahead */, 0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */, - nullptr /* fs */, nullptr /* clock */, stats, + 0 /* upper_bound_offset */, nullptr /* fs */, nullptr /* clock */, stats, FilePrefetchBufferUsage::kTableOpenPrefetchTail)); if (s.ok()) { diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 0b5fe1cb8..4ea4212ae 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -682,28 +682,31 @@ struct BlockBasedTable::Rep { uint64_t sst_number_for_tracing() const { return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX; } - void CreateFilePrefetchBuffer( - size_t readahead_size, size_t max_readahead_size, - std::unique_ptr* fpb, bool implicit_auto_readahead, - uint64_t num_file_reads, - uint64_t num_file_reads_for_auto_readahead) const { + void CreateFilePrefetchBuffer(size_t readahead_size, + size_t max_readahead_size, + std::unique_ptr* fpb, + bool implicit_auto_readahead, + uint64_t num_file_reads, + uint64_t num_file_reads_for_auto_readahead, + uint64_t upper_bound_offset) const { fpb->reset(new FilePrefetchBuffer( readahead_size, max_readahead_size, !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */, implicit_auto_readahead, num_file_reads, - num_file_reads_for_auto_readahead, ioptions.fs.get(), ioptions.clock, - ioptions.stats)); + num_file_reads_for_auto_readahead, upper_bound_offset, + ioptions.fs.get(), ioptions.clock, ioptions.stats)); } void CreateFilePrefetchBufferIfNotExists( size_t readahead_size, size_t max_readahead_size, std::unique_ptr* fpb, bool implicit_auto_readahead, - uint64_t num_file_reads, - uint64_t num_file_reads_for_auto_readahead) const { + uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, + uint64_t upper_bound_offset) const { if (!(*fpb)) { CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb, implicit_auto_readahead, num_file_reads, - num_file_reads_for_auto_readahead); + num_file_reads_for_auto_readahead, + upper_bound_offset); } } diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 08d4cc9e8..7a36ad58c 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -48,7 +48,8 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, rep->CreateFilePrefetchBufferIfNotExists( compaction_readahead_size_, compaction_readahead_size_, &prefetch_buffer_, /*implicit_auto_readahead=*/false, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0); + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, + /*upper_bound_offset=*/0); return; } @@ -57,7 +58,7 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, rep->CreateFilePrefetchBufferIfNotExists( readahead_size, readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, - /*num_file_reads_for_auto_readahead=*/0); + /*num_file_reads_for_auto_readahead=*/0, upper_bound_offset_); return; } @@ -81,7 +82,8 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, /*num_file_reads=*/0, - rep->table_options.num_file_reads_for_auto_readahead); + rep->table_options.num_file_reads_for_auto_readahead, + upper_bound_offset_); return; } @@ -111,7 +113,8 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, rep->CreateFilePrefetchBufferIfNotExists( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, - rep->table_options.num_file_reads_for_auto_readahead); + rep->table_options.num_file_reads_for_auto_readahead, + upper_bound_offset_); return; } @@ -134,7 +137,8 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, rep->CreateFilePrefetchBufferIfNotExists( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, - rep->table_options.num_file_reads_for_auto_readahead); + rep->table_options.num_file_reads_for_auto_readahead, + upper_bound_offset_); return; } diff --git a/table/block_based/block_prefetcher.h b/table/block_based/block_prefetcher.h index e2032ed9d..eecb12f40 100644 --- a/table/block_based/block_prefetcher.h +++ b/table/block_based/block_prefetcher.h @@ -53,6 +53,10 @@ class BlockPrefetcher { &initial_auto_readahead_size_); } + void SetUpperBoundOffset(uint64_t upper_bound_offset) { + upper_bound_offset_ = upper_bound_offset; + } + private: // Readahead size used in compaction, its value is used only if // lookup_context_.caller = kCompaction. @@ -69,5 +73,7 @@ class BlockPrefetcher { uint64_t prev_offset_ = 0; size_t prev_len_ = 0; std::unique_ptr prefetch_buffer_; + + uint64_t upper_bound_offset_ = 0; }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 9b53fe72f..84888d0e9 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -497,7 +497,8 @@ Status PartitionedFilterBlockReader::CacheDependencies( tail_prefetch_buffer->GetPrefetchOffset() > prefetch_off) { rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /* Implicit autoreadahead */, - 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/); + 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, + /*upper_bound_offset*/ 0); IOOptions opts; s = rep->file->PrepareIOOptions(ro, opts); diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index 0c862b9b2..b4dc5fce2 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -169,7 +169,8 @@ Status PartitionIndexReader::CacheDependencies( tail_prefetch_buffer->GetPrefetchOffset() > prefetch_off) { rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /*Implicit auto readahead*/, - 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/); + 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, + /*upper_bound_offset*/ 0); IOOptions opts; { Status s = rep->file->PrepareIOOptions(ro, opts); diff --git a/unreleased_history/new_features/auto_readahead.md b/unreleased_history/new_features/auto_readahead.md new file mode 100644 index 000000000..e962fa826 --- /dev/null +++ b/unreleased_history/new_features/auto_readahead.md @@ -0,0 +1 @@ +Add a new feature to trim readahead_size during scans upto upper_bound when iterate_upper_bound is specified. It's enabled through ReadOptions.auto_readahead_size. Users must also specify ReadOptions.iterate_upper_bound. From f53018c0c8a312be86eb0df956e03d4c81706030 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 18 Aug 2023 17:47:22 -0700 Subject: [PATCH 053/386] Improve PrefetchTest.Basic with explicit flush and file num variable (#11720) Summary: **Context/Summary:** as title, should be harmless. And it's a guessed fix to https://github.com/facebook/rocksdb/issues/11717 while no repro has obtained on my end yet. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11720 Test Plan: existing tests Reviewed By: cbi42 Differential Revision: D48475661 Pulled By: hx235 fbshipit-source-id: 7c7390319f094c540e703fe2e78a8d601b7a894b --- file/prefetch_test.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index f7c1a6fed..14c7c28a5 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -169,18 +169,18 @@ TEST_P(PrefetchTest, Basic) { // create first key range WriteBatch batch; for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i), "value for range 1 key")); + ASSERT_OK(batch.Put(BuildKey(i), "v1")); } ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(db_->Flush(FlushOptions())); // create second key range batch.Clear(); for (int i = 0; i < kNumKeys; i++) { - ASSERT_OK(batch.Put(BuildKey(i, "key2"), "value for range 2 key")); + ASSERT_OK(batch.Put(BuildKey(i, "key2"), "v2")); } ASSERT_OK(db_->Write(WriteOptions(), &batch)); - ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_OK(db_->Flush(FlushOptions())); // delete second key range batch.Clear(); @@ -190,16 +190,19 @@ TEST_P(PrefetchTest, Basic) { ASSERT_OK(db_->Write(WriteOptions(), &batch)); ASSERT_OK(db_->Flush(FlushOptions())); + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + const size_t num_file = metadata.size(); // To verify SST file tail prefetch (once per file) during flush output // verification if (support_prefetch && !use_direct_io) { ASSERT_TRUE(fs->IsPrefetchCalled()); - ASSERT_EQ(3, fs->GetPrefetchCount()); + ASSERT_EQ(num_file, fs->GetPrefetchCount()); ASSERT_EQ(0, buff_prefetch_count); fs->ClearPrefetchCount(); } else { ASSERT_FALSE(fs->IsPrefetchCalled()); - ASSERT_EQ(buff_prefetch_count, 3); + ASSERT_EQ(buff_prefetch_count, num_file); buff_prefetch_count = 0; } From a9770b185d88dfca7c8df7b1835e649ae4d6950d Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 21 Aug 2023 11:53:40 -0700 Subject: [PATCH 054/386] Circleci macos sunset (#11633) Summary: [draft] this PR is created in order to test CI changes Closes: https://github.com/facebook/rocksdb/pull/11543 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11633 Reviewed By: akankshamahajan15 Differential Revision: D48525552 Pulled By: cbi42 fbshipit-source-id: 758d57f248304213228af459789459cc2f0bf419 --- .circleci/config.yml | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c614d3f0e..3e10aedde 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,7 +16,8 @@ commands: - run: name: Install JDK 8 on macos command: | - brew install --cask adoptopenjdk/openjdk/adoptopenjdk8 + HOMEBREW_NO_AUTO_UPDATE=1 brew tap bell-sw/liberica + HOMEBREW_NO_AUTO_UPDATE=1 brew install --cask liberica-jdk8 increase-max-open-files-on-macos: steps: @@ -209,21 +210,21 @@ executors: jobs: build-macos: macos: - xcode: 12.5.1 - resource_class: large + xcode: 14.3.1 + resource_class: macos.m1.medium.gen1 environment: ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now steps: - increase-max-open-files-on-macos - install-gflags-on-macos - pre-steps-macos - - run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all + - run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=16 -j16 all - post-steps build-macos-cmake: macos: - xcode: 12.5.1 - resource_class: large + xcode: 14.3.1 + resource_class: macos.m1.medium.gen1 parameters: run_even_tests: description: run even or odd tests, used to split tests to 2 groups @@ -239,20 +240,20 @@ jobs: command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. - run: name: "Build tests" - command: cd build && make V=1 -j32 + command: cd build && make V=1 -j16 - when: condition: << parameters.run_even_tests >> steps: - run: name: "Run even tests" - command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2 + command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j16 -I 0,,2 - when: condition: not: << parameters.run_even_tests >> steps: - run: name: "Run odd tests" - command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2 + command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j16 -I 1,,2 - post-steps build-linux: @@ -607,10 +608,10 @@ jobs: build-macos-java: macos: - xcode: 12.5.1 - resource_class: large + xcode: 14.3.1 + resource_class: macos.m1.medium.gen1 environment: - JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home + JAVA_HOME: /Library/Java/JavaVirtualMachines/liberica-jdk-8.jdk/Contents/Home ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash steps: - increase-max-open-files-on-macos @@ -632,10 +633,10 @@ jobs: build-macos-java-static: macos: - xcode: 12.5.1 - resource_class: large + xcode: 14.3.1 + resource_class: macos.m1.medium.gen1 environment: - JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home + JAVA_HOME: /Library/Java/JavaVirtualMachines/liberica-jdk-8.jdk/Contents/Home steps: - increase-max-open-files-on-macos - install-gflags-on-macos @@ -657,10 +658,10 @@ jobs: build-macos-java-static-universal: macos: - xcode: 12.5.1 - resource_class: large + xcode: 14.3.1 + resource_class: macos.m1.medium.gen1 environment: - JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home + JAVA_HOME: /Library/Java/JavaVirtualMachines/liberica-jdk-8.jdk/Contents/Home steps: - increase-max-open-files-on-macos - install-gflags-on-macos From 03a74411c01721f714553f2e300f8dbb2829f328 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 21 Aug 2023 12:14:03 -0700 Subject: [PATCH 055/386] Add unit test for default temperature (#11722) Summary: This piggy back the existing last level file temperature statistics test to test the default temperature becoming effective. While adding this unit test, I found that the approach to swap out and use default temperature in `VersionBuilder::LoadTableHandlers` will miss the L0 files created from flush, and only work for existing SST files, SST files created by compaction. So this PR moves that logic to `TableCache::GetTableReader`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11722 Test Plan: ``` ./db_test2 --gtest_filter="*LastLevelStatistics*" make all check ``` Reviewed By: pdillinger Differential Revision: D48489171 Pulled By: jowlyzhang fbshipit-source-id: ac29f7d484916f3218729594c5bb35c4f2979ac2 --- db/db_test2.cc | 33 +++++++++++++++++++++++++++++++++ db/table_cache.cc | 4 ++++ db/version_builder.cc | 7 +------ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/db/db_test2.cc b/db/db_test2.cc index d3bc34fee..c9fbe15f4 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -6876,6 +6876,7 @@ TEST_F(DBTest2, LastLevelTemperatureUniversal) { TEST_F(DBTest2, LastLevelStatistics) { Options options = CurrentOptions(); options.bottommost_temperature = Temperature::kWarm; + options.default_temperature = Temperature::kHot; options.level0_file_num_compaction_trigger = 2; options.level_compaction_dynamic_level_bytes = true; options.statistics = CreateDBStatistics(); @@ -6889,6 +6890,10 @@ TEST_F(DBTest2, LastLevelStatistics) { ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), 0); ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), 0); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), 0); ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), 0); @@ -6899,6 +6904,10 @@ TEST_F(DBTest2, LastLevelStatistics) { ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ("bar", Get("bar")); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), @@ -6919,6 +6928,30 @@ TEST_F(DBTest2, LastLevelStatistics) { pre_bytes); ASSERT_GT(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), pre_count); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(HOT_FILE_READ_COUNT)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(WARM_FILE_READ_COUNT)); + + // Not a realistic setting to make last level kWarm and default temp kCold. + // This is just for testing default temp can be reset on reopen while the + // last level temp is consistent across DB reopen because those file's temp + // are persisted in manifest. + options.default_temperature = Temperature::kCold; + ASSERT_OK(options.statistics->Reset()); + Reopen(options); + ASSERT_EQ("bar", Get("bar")); + + ASSERT_EQ(0, options.statistics->getTickerCount(HOT_FILE_READ_BYTES)); + + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_BYTES), + options.statistics->getTickerCount(COLD_FILE_READ_BYTES)); + ASSERT_EQ(options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT), + options.statistics->getTickerCount(COLD_FILE_READ_COUNT)); ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_BYTES), options.statistics->getTickerCount(WARM_FILE_READ_BYTES)); ASSERT_EQ(options.statistics->getTickerCount(LAST_LEVEL_READ_COUNT), diff --git a/db/table_cache.cc b/db/table_cache.cc index bdbb47a2f..2a4f33505 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -129,6 +129,10 @@ Status TableCache::GetTableReader( if (!sequential_mode && ioptions_.advise_random_on_open) { file->Hint(FSRandomAccessFile::kRandom); } + if (ioptions_.default_temperature != Temperature::kUnknown && + file_temperature == Temperature::kUnknown) { + file_temperature = ioptions_.default_temperature; + } StopWatch sw(ioptions_.clock, ioptions_.stats, TABLE_OPEN_IO_MICROS); std::unique_ptr file_reader( new RandomAccessFileReader(std::move(file), fname, ioptions_.clock, diff --git a/db/version_builder.cc b/db/version_builder.cc index b78a91643..210b0de86 100644 --- a/db/version_builder.cc +++ b/db/version_builder.cc @@ -1323,11 +1323,6 @@ class VersionBuilder::Rep { auto* file_meta = files_meta[file_idx].first; int level = files_meta[file_idx].second; - Temperature file_temperature = file_meta->temperature; - if (ioptions_->default_temperature != Temperature::kUnknown && - file_temperature == Temperature::kUnknown) { - file_temperature = ioptions_->default_temperature; - } TableCache::TypedHandle* handle = nullptr; statuses[file_idx] = table_cache_->FindTable( read_options, file_options_, @@ -1335,7 +1330,7 @@ class VersionBuilder::Rep { block_protection_bytes_per_key, prefix_extractor, false /*no_io */, internal_stats->GetFileReadHist(level), false, level, prefetch_index_and_filter_in_cache, max_file_size_for_l0_meta_pin, - file_temperature); + file_meta->temperature); if (handle != nullptr) { file_meta->table_reader_handle = handle; // Load table_reader From 4fa2c017190a99a9ad44562b12aeef6f6d2a1d16 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 21 Aug 2023 12:14:57 -0700 Subject: [PATCH 056/386] Replace existing waitforcompaction with new WaitForCompact API in db_bench_tool (#11727) Summary: As the new API to wait for compaction is available (https://github.com/facebook/rocksdb/issues/11436), we can now replace the existing logic of waiting in db_bench_tool with the new API. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11727 Test Plan: ``` ./db_bench --benchmarks="fillrandom,compactall,waitforcompaction,readrandom" ``` **Before change** ``` Set seed to 1692635571470041 because --seed was 0 Initializing RocksDB Options from the specified file Initializing RocksDB Options from command-line flags Integrated BlobDB: blob cache disabled RocksDB: version 8.6.0 Date: Mon Aug 21 09:33:40 2023 CPU: 80 * Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz CPUCache: 28160 KB Keys: 16 bytes each (+ 0 bytes user-defined timestamp) Values: 100 bytes each (50 bytes after compression) Entries: 1000000 Prefix: 0 bytes Keys per prefix: 0 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) Write rate: 0 bytes/second Read rate: 0 ops/second Compression: Snappy Compression sampling rate: 0 Memtablerep: SkipListFactory Perf Level: 1 WARNING: Optimization is disabled: benchmarks unnecessarily slow WARNING: Assertions are enabled; benchmarks unnecessarily slow ------------------------------------------------ Initializing RocksDB Options from the specified file Initializing RocksDB Options from command-line flags Integrated BlobDB: blob cache disabled DB path: [/tmp/rocksdbtest-226125/dbbench] fillrandom : 51.826 micros/op 19295 ops/sec 51.826 seconds 1000000 operations; 2.1 MB/s waitforcompaction(/tmp/rocksdbtest-226125/dbbench): started waitforcompaction(/tmp/rocksdbtest-226125/dbbench): finished waitforcompaction(/tmp/rocksdbtest-226125/dbbench): started waitforcompaction(/tmp/rocksdbtest-226125/dbbench): finished DB path: [/tmp/rocksdbtest-226125/dbbench] readrandom : 39.042 micros/op 25613 ops/sec 39.042 seconds 1000000 operations; 1.8 MB/s (632886 of 1000000 found) ``` **After change** ``` Set seed to 1692636574431745 because --seed was 0 Initializing RocksDB Options from the specified file Initializing RocksDB Options from command-line flags Integrated BlobDB: blob cache disabled RocksDB: version 8.6.0 Date: Mon Aug 21 09:49:34 2023 CPU: 80 * Intel(R) Xeon(R) Gold 6138 CPU @ 2.00GHz CPUCache: 28160 KB Keys: 16 bytes each (+ 0 bytes user-defined timestamp) Values: 100 bytes each (50 bytes after compression) Entries: 1000000 Prefix: 0 bytes Keys per prefix: 0 RawSize: 110.6 MB (estimated) FileSize: 62.9 MB (estimated) Write rate: 0 bytes/second Read rate: 0 ops/second Compression: Snappy Compression sampling rate: 0 Memtablerep: SkipListFactory Perf Level: 1 WARNING: Optimization is disabled: benchmarks unnecessarily slow WARNING: Assertions are enabled; benchmarks unnecessarily slow ------------------------------------------------ Initializing RocksDB Options from the specified file Initializing RocksDB Options from command-line flags Integrated BlobDB: blob cache disabled DB path: [/tmp/rocksdbtest-226125/dbbench] fillrandom : 51.271 micros/op 19504 ops/sec 51.271 seconds 1000000 operations; 2.2 MB/s waitforcompaction(/tmp/rocksdbtest-226125/dbbench): started waitforcompaction(/tmp/rocksdbtest-226125/dbbench): finished with status (OK) DB path: [/tmp/rocksdbtest-226125/dbbench] readrandom : 39.264 micros/op 25468 ops/sec 39.264 seconds 1000000 operations; 1.8 MB/s (632921 of 1000000 found) ``` Reviewed By: ajkr Differential Revision: D48524667 Pulled By: jaykorean fbshipit-source-id: 1052a15b2ed79a35165ec4d9998d0454b2552ef4 --- tools/db_bench_tool.cc | 40 +++------------------------------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index db66d0721..0a133289b 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -8114,58 +8114,24 @@ class Benchmark { } void WaitForCompactionHelper(DBWithColumnFamilies& db) { - // This is an imperfect way of waiting for compaction. The loop and sleep - // is done because a thread that finishes a compaction job should get a - // chance to pickup a new compaction job. - - std::vector keys = {DB::Properties::kMemTableFlushPending, - DB::Properties::kNumRunningFlushes, - DB::Properties::kCompactionPending, - DB::Properties::kNumRunningCompactions}; - fprintf(stdout, "waitforcompaction(%s): started\n", db.db->GetName().c_str()); - while (true) { - bool retry = false; + Status s = db.db->WaitForCompact(WaitForCompactOptions()); - for (const auto& k : keys) { - uint64_t v; - if (!db.db->GetIntProperty(k, &v)) { - fprintf(stderr, "waitforcompaction(%s): GetIntProperty(%s) failed\n", - db.db->GetName().c_str(), k.c_str()); - exit(1); - } else if (v > 0) { - fprintf(stdout, - "waitforcompaction(%s): active(%s). Sleep 10 seconds\n", - db.db->GetName().c_str(), k.c_str()); - FLAGS_env->SleepForMicroseconds(10 * 1000000); - retry = true; - break; - } - } - - if (!retry) { - fprintf(stdout, "waitforcompaction(%s): finished\n", - db.db->GetName().c_str()); - return; - } - } + fprintf(stdout, "waitforcompaction(%s): finished with status (%s)\n", + db.db->GetName().c_str(), s.ToString().c_str()); } void WaitForCompaction() { // Give background threads a chance to wake FLAGS_env->SleepForMicroseconds(5 * 1000000); - // I am skeptical that this check race free. I hope that checking twice - // reduces the chance. if (db_.db != nullptr) { WaitForCompactionHelper(db_); - WaitForCompactionHelper(db_); } else { for (auto& db_with_cfh : multi_dbs_) { WaitForCompactionHelper(db_with_cfh); - WaitForCompactionHelper(db_with_cfh); } } } From 4b5352070975dbffb64c6fe9ace3f3789b4ffa02 Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 21 Aug 2023 13:25:04 -0700 Subject: [PATCH 057/386] Update HISTORY.md and version.h for 8.6 (#11728) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11728 Reviewed By: jaykorean, jowlyzhang Differential Revision: D48527100 Pulled By: anand1976 fbshipit-source-id: c48baa44e538fb6bfd3fe7f19046746d3540763f --- HISTORY.md | 25 +++++++++++++++++++ include/rocksdb/version.h | 2 +- tools/check_format_compatible.sh | 2 +- .../behavior_changes/more_sst_read_micros.md | 1 - .../universal_ttl_periodic_compaction.md | 1 - ...ng_componeng_too_large_file_ttl_booster.md | 1 - .../new_features/1_context_checksum.md | 1 - .../new_features/auto_readahead.md | 1 - .../compaction_verify_input_count.md | 1 - ...delay_bottommost_single_file_compaction.md | 1 - .../new_features/enable_disable_udt.md | 1 - ...lement_allow_cache_hit_admission_policy.md | 1 - .../new_features/memetable_range_del_limit.md | 1 - .../put_entity_support_in_sst_file_writer.md | 1 - .../timeout_for_wait_for_compact_api.md | 1 - .../user_read_io_activity_stats.md | 1 - .../wait_for_compact_close_db_option.md | 1 - .../new_features/zstd-checksum.md | 1 - ...ark_dep_access_hint_on_compaction_start.md | 1 - 19 files changed, 27 insertions(+), 18 deletions(-) delete mode 100644 unreleased_history/behavior_changes/more_sst_read_micros.md delete mode 100644 unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md delete mode 100644 unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md delete mode 100644 unreleased_history/new_features/1_context_checksum.md delete mode 100644 unreleased_history/new_features/auto_readahead.md delete mode 100644 unreleased_history/new_features/compaction_verify_input_count.md delete mode 100644 unreleased_history/new_features/delay_bottommost_single_file_compaction.md delete mode 100644 unreleased_history/new_features/enable_disable_udt.md delete mode 100644 unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md delete mode 100644 unreleased_history/new_features/memetable_range_del_limit.md delete mode 100644 unreleased_history/new_features/put_entity_support_in_sst_file_writer.md delete mode 100644 unreleased_history/new_features/timeout_for_wait_for_compact_api.md delete mode 100644 unreleased_history/new_features/user_read_io_activity_stats.md delete mode 100644 unreleased_history/new_features/wait_for_compact_close_db_option.md delete mode 100644 unreleased_history/new_features/zstd-checksum.md delete mode 100644 unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md diff --git a/HISTORY.md b/HISTORY.md index 71b91d926..27a248b4e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,31 @@ # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` +## 8.6.0 (08/18/2023) +### New Features +* Added enhanced data integrity checking on SST files with new format_version=6. Performance impact is very small or negligible. Previously if SST data was misplaced or re-arranged by the storage layer, it could pass block checksum with higher than 1 in 4 billion probability. With format_version=6, block checksums depend on what file they are in and location within the file. This way, misplaced SST data is no more likely to pass checksum verification than randomly corrupted data. Also in format_version=6, SST footers are checksum-protected. +* Add a new feature to trim readahead_size during scans upto upper_bound when iterate_upper_bound is specified. It's enabled through ReadOptions.auto_readahead_size. Users must also specify ReadOptions.iterate_upper_bound. +* RocksDB will compare the number of input keys to the number of keys processed after each compaction. Compaction will fail and report Corruption status if the verification fails. Option `compaction_verify_record_count` is introduced for this purpose and is enabled by default. +* Add a CF option `bottommost_file_compaction_delay` to allow specifying the delay of bottommost level single-file compactions. +* Add support to allow enabling / disabling user-defined timestamps feature for an existing column family in combination with the in-Memtable only feature. +* Implement a new admission policy for the compressed secondary cache that admits blocks evicted from the primary cache with the hit bit set. This policy can be specified in TieredVolatileCacheOptions by setting the newly added adm_policy option. +* Add a column family option `memtable_max_range_deletions` that limits the number of range deletions in a memtable. RocksDB will try to do an automatic flush after the limit is reached. (#11358) +* Add PutEntity API in sst_file_writer +* Add `timeout` in microsecond option to `WaitForCompactOptions` to allow timely termination of prolonged waiting in scenarios like recurring recoverable errors, such as out-of-space situations and continuous write streams that sustain ongoing flush and compactions +* New statistics `rocksdb.file.read.{db.open|get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` measure read time of block-based SST tables or blob files during db open, `Get()`, `MultiGet()`, using db iterator, `VerifyFileChecksums()` and `VerifyChecksum()`. They require stats level greater than `StatsLevel::kExceptDetailedTimers`. +* Add close_db option to `WaitForCompactOptions` to call Close() after waiting is done. +* Add a new compression option `CompressionOptions::checksum` for enabling ZSTD's checksum feature to detect corruption during decompression. + +### Public API Changes +* Mark `Options::access_hint_on_compaction_start` related APIs as deprecated. See #11631 for alternative behavior. + +### Behavior Changes +* Statistics `rocksdb.sst.read.micros` now includes time spent on multi read and async read into the file +* For Universal Compaction users, periodic compaction (option `periodic_compaction_seconds`) will be set to 30 days by default if block based table is used. + +### Bug Fixes +* Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." (#11673). + ## 8.5.0 (07/21/2023) ### Public API Changes * Removed recently added APIs `GeneralCache` and `MakeSharedGeneralCache()` as our plan changed to stop exposing a general-purpose cache interface. The old forms of these APIs, `Cache` and `NewLRUCache()`, are still available, although general-purpose caching support will be dropped eventually. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index de6629d80..8b1f0f651 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -12,7 +12,7 @@ // NOTE: in 'main' development branch, this should be the *next* // minor or major version number planned for release. #define ROCKSDB_MAJOR 8 -#define ROCKSDB_MINOR 6 +#define ROCKSDB_MINOR 7 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with diff --git a/tools/check_format_compatible.sh b/tools/check_format_compatible.sh index 1282d1375..eff949ce0 100755 --- a/tools/check_format_compatible.sh +++ b/tools/check_format_compatible.sh @@ -125,7 +125,7 @@ EOF # To check for DB forward compatibility with loading options (old version # reading data from new), as well as backward compatibility -declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb") +declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb") # To check for DB forward compatibility without loading options (in addition # to the "with loading options" set), as well as backward compatibility declare -a db_forward_no_options_refs=() # N/A at the moment diff --git a/unreleased_history/behavior_changes/more_sst_read_micros.md b/unreleased_history/behavior_changes/more_sst_read_micros.md deleted file mode 100644 index 99be430c8..000000000 --- a/unreleased_history/behavior_changes/more_sst_read_micros.md +++ /dev/null @@ -1 +0,0 @@ -Statistics `rocksdb.sst.read.micros` now includes time spent on multi read and async read into the file diff --git a/unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md b/unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md deleted file mode 100644 index 14fbf4f69..000000000 --- a/unreleased_history/behavior_changes/universal_ttl_periodic_compaction.md +++ /dev/null @@ -1 +0,0 @@ -For Universal Compaction users, periodic compaction (option `periodic_compaction_seconds`) will be set to 30 days by default if block based table is used. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md b/unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md deleted file mode 100644 index f76830232..000000000 --- a/unreleased_history/bug_fixes/shifting_componeng_too_large_file_ttl_booster.md +++ /dev/null @@ -1 +0,0 @@ -Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." (#11673). \ No newline at end of file diff --git a/unreleased_history/new_features/1_context_checksum.md b/unreleased_history/new_features/1_context_checksum.md deleted file mode 100644 index 303613cad..000000000 --- a/unreleased_history/new_features/1_context_checksum.md +++ /dev/null @@ -1 +0,0 @@ -* Added enhanced data integrity checking on SST files with new format_version=6. Performance impact is very small or negligible. Previously if SST data was misplaced or re-arranged by the storage layer, it could pass block checksum with higher than 1 in 4 billion probability. With format_version=6, block checksums depend on what file they are in and location within the file. This way, misplaced SST data is no more likely to pass checksum verification than randomly corrupted data. Also in format_version=6, SST footers are checksum-protected. diff --git a/unreleased_history/new_features/auto_readahead.md b/unreleased_history/new_features/auto_readahead.md deleted file mode 100644 index e962fa826..000000000 --- a/unreleased_history/new_features/auto_readahead.md +++ /dev/null @@ -1 +0,0 @@ -Add a new feature to trim readahead_size during scans upto upper_bound when iterate_upper_bound is specified. It's enabled through ReadOptions.auto_readahead_size. Users must also specify ReadOptions.iterate_upper_bound. diff --git a/unreleased_history/new_features/compaction_verify_input_count.md b/unreleased_history/new_features/compaction_verify_input_count.md deleted file mode 100644 index 32cfe0910..000000000 --- a/unreleased_history/new_features/compaction_verify_input_count.md +++ /dev/null @@ -1 +0,0 @@ -* RocksDB will compare the number of input keys to the number of keys processed after each compaction. Compaction will fail and report Corruption status if the verification fails. Option `compaction_verify_record_count` is introduced for this purpose and is enabled by default. diff --git a/unreleased_history/new_features/delay_bottommost_single_file_compaction.md b/unreleased_history/new_features/delay_bottommost_single_file_compaction.md deleted file mode 100644 index ca703674b..000000000 --- a/unreleased_history/new_features/delay_bottommost_single_file_compaction.md +++ /dev/null @@ -1 +0,0 @@ -Add a CF option `bottommost_file_compaction_delay` to allow specifying the delay of bottommost level single-file compactions. \ No newline at end of file diff --git a/unreleased_history/new_features/enable_disable_udt.md b/unreleased_history/new_features/enable_disable_udt.md deleted file mode 100644 index d4a7ce850..000000000 --- a/unreleased_history/new_features/enable_disable_udt.md +++ /dev/null @@ -1 +0,0 @@ -Add support to allow enabling / disabling user-defined timestamps feature for an existing column family in combination with the in-Memtable only feature. \ No newline at end of file diff --git a/unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md b/unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md deleted file mode 100644 index 97ed644fa..000000000 --- a/unreleased_history/new_features/implement_allow_cache_hit_admission_policy.md +++ /dev/null @@ -1 +0,0 @@ -Implement a new admission policy for the compressed secondary cache that admits blocks evicted from the primary cache with the hit bit set. This policy can be specified in TieredVolatileCacheOptions by setting the newly added adm_policy option. diff --git a/unreleased_history/new_features/memetable_range_del_limit.md b/unreleased_history/new_features/memetable_range_del_limit.md deleted file mode 100644 index 72c4d53f4..000000000 --- a/unreleased_history/new_features/memetable_range_del_limit.md +++ /dev/null @@ -1 +0,0 @@ -Add a column family option `memtable_max_range_deletions` that limits the number of range deletions in a memtable. RocksDB will try to do an automatic flush after the limit is reached. (#11358) \ No newline at end of file diff --git a/unreleased_history/new_features/put_entity_support_in_sst_file_writer.md b/unreleased_history/new_features/put_entity_support_in_sst_file_writer.md deleted file mode 100644 index 7420a67fa..000000000 --- a/unreleased_history/new_features/put_entity_support_in_sst_file_writer.md +++ /dev/null @@ -1 +0,0 @@ -Add PutEntity API in sst_file_writer diff --git a/unreleased_history/new_features/timeout_for_wait_for_compact_api.md b/unreleased_history/new_features/timeout_for_wait_for_compact_api.md deleted file mode 100644 index fccc34f56..000000000 --- a/unreleased_history/new_features/timeout_for_wait_for_compact_api.md +++ /dev/null @@ -1 +0,0 @@ -Add `timeout` in microsecond option to `WaitForCompactOptions` to allow timely termination of prolonged waiting in scenarios like recurring recoverable errors, such as out-of-space situations and continuous write streams that sustain ongoing flush and compactions diff --git a/unreleased_history/new_features/user_read_io_activity_stats.md b/unreleased_history/new_features/user_read_io_activity_stats.md deleted file mode 100644 index 7a36aa98f..000000000 --- a/unreleased_history/new_features/user_read_io_activity_stats.md +++ /dev/null @@ -1 +0,0 @@ -New statistics `rocksdb.file.read.{db.open|get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` measure read time of block-based SST tables or blob files during db open, `Get()`, `MultiGet()`, using db iterator, `VerifyFileChecksums()` and `VerifyChecksum()`. They require stats level greater than `StatsLevel::kExceptDetailedTimers`. diff --git a/unreleased_history/new_features/wait_for_compact_close_db_option.md b/unreleased_history/new_features/wait_for_compact_close_db_option.md deleted file mode 100644 index 7fd48ff2e..000000000 --- a/unreleased_history/new_features/wait_for_compact_close_db_option.md +++ /dev/null @@ -1 +0,0 @@ -Add close_db option to `WaitForCompactOptions` to call Close() after waiting is done. diff --git a/unreleased_history/new_features/zstd-checksum.md b/unreleased_history/new_features/zstd-checksum.md deleted file mode 100644 index 033576c9c..000000000 --- a/unreleased_history/new_features/zstd-checksum.md +++ /dev/null @@ -1 +0,0 @@ -* Add a new compression option `CompressionOptions::checksum` for enabling ZSTD's checksum feature to detect corruption during decompression. \ No newline at end of file diff --git a/unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md b/unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md deleted file mode 100644 index 066caaf3c..000000000 --- a/unreleased_history/public_api_changes/mark_dep_access_hint_on_compaction_start.md +++ /dev/null @@ -1 +0,0 @@ -Mark `Options::access_hint_on_compaction_start` related APIs as deprecated. See #11631 for alternative behavior. \ No newline at end of file From 2a9f3b6cc5aa072ce823fd35dcdb0e00c88c9279 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 21 Aug 2023 15:04:22 -0700 Subject: [PATCH 058/386] Try to use a db's OPTIONS file for some ldb commands (#11721) Summary: For some ldb commands that doesn't need to open the DB, it's still useful to use the DB's existing OPTIONS file if it's available. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11721 Reviewed By: pdillinger Differential Revision: D48485540 Pulled By: jowlyzhang fbshipit-source-id: 2d2db837523044066f1a2c4b59a5c03f6cd35e6b --- tools/ldb_cmd.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 46dd36ccc..263cbe472 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1372,6 +1372,7 @@ ManifestDumpCommand::ManifestDumpCommand( } void ManifestDumpCommand::DoCommand() { + PrepareOptions(); std::string manifestfile; if (!path_.empty()) { @@ -1514,6 +1515,7 @@ FileChecksumDumpCommand::FileChecksumDumpCommand( } void FileChecksumDumpCommand::DoCommand() { + PrepareOptions(); // print out the checksum information in the following format: // sst file number, checksum function name, checksum value // sst file number, checksum function name, checksum value @@ -1618,6 +1620,7 @@ ListColumnFamiliesCommand::ListColumnFamiliesCommand( : LDBCommand(options, flags, false, BuildCmdLineOptions({})) {} void ListColumnFamiliesCommand::DoCommand() { + PrepareOptions(); std::vector column_families; Status s = DB::ListColumnFamilies(options_, db_path_, &column_families); if (!s.ok()) { @@ -2731,6 +2734,7 @@ void WALDumperCommand::Help(std::string& ret) { } void WALDumperCommand::DoCommand() { + PrepareOptions(); DumpWalFile(options_, wal_file_, print_header_, print_values_, is_write_committed_, &exec_state_); } From 5e0584bd73773544d78d5a359e2ef73eea5bf8d7 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Tue, 22 Aug 2023 09:47:04 -0700 Subject: [PATCH 059/386] Do not drop unsynced data during reopen in stress test (#11731) Summary: Currently the stress test does not support restoring expected state (to a specific sequence number) when there is unsynced data loss during the reopen phase. This causes a few internal stress test failure with errors like inconsistent value. This PR disables dropping unsynced data during reopen to avoid failures due to this issue. We can re-enable later after we decide to support unsynced data loss during DB reopen in stress test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11731 Test Plan: * Running this test a few times can fail for inconsistent value before this change ``` ./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_concurrent_memtable_write=1 --allow_data_in_errors=True --async_io=0 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=0 --batch_protection_bytes_per_key=0 --block_protection_bytes_per_key=8 --block_size=16384 --bloom_bits=20.57166126835524 --bottommost_compression_type=disable --bytes_per_sync=262144 --cache_index_and_filter_blocks=1 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=1 --charge_filter_construction=0 --charge_table_reader=1 --checkpoint_one_in=0 --checksum_type=kxxHash --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=3 --compaction_style=1 --compaction_ttl=100 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=zstd --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=0 --db=/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox --db_write_buffer_size=0 --delpercent=4 --delrangepercent=1 --destroy_db_initially=0 --detect_filter_construct_corruption=0 --disable_wal=0 --enable_compaction_filter=0 --enable_pipelined_write=1 --enable_thread_tracking=0 --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=1 --file_checksum_impl=big --flush_one_in=1000000 --format_version=3 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=6 --index_type=3 --ingest_external_file_one_in=0 --initial_auto_readahead_size=16384 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --log2_keys_per_lock=10 --long_running_snapshots=1 --manual_wal_flush_one_in=1000000 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=0 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --max_key=25000000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16777216 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=0 --memtable_max_range_deletions=100 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=1 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=2 --mmap_read=0 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=-1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=5 --open_write_fault_one_in=0 --ops_per_thread=200000 --optimize_filters_for_memory=0 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=3 --pause_background_one_in=1000000 --periodic_compaction_seconds=10 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=1 --preserve_internal_time_seconds=0 --progress_reports=0 --read_fault_one_in=1000 --readahead_size=524288 --readpercent=50 --recycle_log_file_num=0 --reopen=20 --ribbon_starting_level=0 --secondary_cache_fault_one_in=32 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=10 --subcompactions=3 --sync=0 --sync_fault_injection=1 --target_file_size_base=16777216 --target_file_size_multiplier=1 --test_batches_snapshots=0 --top_level_index_pinning=2 --unpartitioned_pinning=1 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=1 --use_merge=0 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=1 --user_timestamp_size=0 --value_size_mult=32 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --wal_compression=zstd --write_buffer_size=33554432 --write_dbid_to_manifest=1 --writepercent=35``` Reviewed By: hx235 Differential Revision: D48537494 Pulled By: cbi42 fbshipit-source-id: ddae21b9bb6ee8d67229121f58513e95f7ef6d8d --- db_stress_tool/db_stress_test_base.cc | 27 ++++++++++++++++----------- db_stress_tool/db_stress_test_base.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 3e8b4ea70..9745e0f9a 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2562,7 +2562,7 @@ void StressTest::PrintEnv() const { fprintf(stdout, "------------------------------------------------\n"); } -void StressTest::Open(SharedState* shared) { +void StressTest::Open(SharedState* shared, bool reopen) { assert(db_ == nullptr); assert(txn_db_ == nullptr); assert(optimistic_txn_db_ == nullptr); @@ -2777,15 +2777,20 @@ void StressTest::Open(SharedState* shared) { ingest_write_error = false; ingest_read_error = false; - Random rand(static_cast(FLAGS_seed)); - if (rand.OneIn(2)) { - fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), - nullptr); - } - if (rand.OneIn(3)) { - fault_fs_guard->DropUnsyncedFileData(); - } else if (rand.OneIn(2)) { - fault_fs_guard->DropRandomUnsyncedFileData(&rand); + // TODO: Unsynced data loss during DB reopen is not supported yet in + // stress test. Will need to recreate expected state if we decide + // to support unsynced data loss during DB reopen. + if (!reopen) { + Random rand(static_cast(FLAGS_seed)); + if (rand.OneIn(2)) { + fault_fs_guard->DeleteFilesCreatedAfterLastDirSync(IOOptions(), + nullptr); + } + if (rand.OneIn(3)) { + fault_fs_guard->DropUnsyncedFileData(); + } else if (rand.OneIn(2)) { + fault_fs_guard->DropRandomUnsyncedFileData(&rand); + } } continue; } @@ -2940,7 +2945,7 @@ void StressTest::Reopen(ThreadState* thread) { auto now = clock_->NowMicros(); fprintf(stdout, "%s Reopening database for the %dth time\n", clock_->TimeToString(now / 1000000).c_str(), num_times_reopened_); - Open(thread->shared); + Open(thread->shared, /*reopen=*/true); if ((FLAGS_sync_fault_injection || FLAGS_disable_wal || FLAGS_manual_wal_flush_one_in > 0) && diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h index dc235fcdf..3008f0366 100644 --- a/db_stress_tool/db_stress_test_base.h +++ b/db_stress_tool/db_stress_test_base.h @@ -241,7 +241,7 @@ class StressTest { void PrintEnv() const; - void Open(SharedState* shared); + void Open(SharedState* shared, bool reopen = false); void Reopen(ThreadState* thread); From 1303573589e9e8ab08825cd2202574934f8eb642 Mon Sep 17 00:00:00 2001 From: chuhao zeng Date: Tue, 22 Aug 2023 11:22:35 -0700 Subject: [PATCH 060/386] Reverse sort order in dedup to enable iter checking in callback (#11725) Summary: Fix https://github.com/facebook/rocksdb/issues/6470 Ensure TransactionLogIter being initialized correctly with SYNC_POINT API when calling `GetSortedWALFiles`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11725 Reviewed By: akankshamahajan15 Differential Revision: D48529411 Pulled By: ajkr fbshipit-source-id: 970ca1a6259ed996c6d87f7fcd40f95acf441517 --- db/db_impl/db_impl_files.cc | 6 +++--- db/db_log_iter_test.cc | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 9fc9bb27a..9e2fbb73d 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -457,12 +457,12 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) { std::sort(candidate_files.begin(), candidate_files.end(), [](const JobContext::CandidateFileInfo& lhs, const JobContext::CandidateFileInfo& rhs) { - if (lhs.file_name > rhs.file_name) { + if (lhs.file_name < rhs.file_name) { return true; - } else if (lhs.file_name < rhs.file_name) { + } else if (lhs.file_name > rhs.file_name) { return false; } else { - return (lhs.file_path > rhs.file_path); + return (lhs.file_path < rhs.file_path); } }); candidate_files.erase( diff --git a/db/db_log_iter_test.cc b/db/db_log_iter_test.cc index 4c9434586..41f5fe4d1 100644 --- a/db/db_log_iter_test.cc +++ b/db/db_log_iter_test.cc @@ -145,6 +145,41 @@ TEST_F(DBTestXactLogIterator, TransactionLogIteratorRace) { } while (ChangeCompactOptions()); } } + +TEST_F(DBTestXactLogIterator, TransactionLogIteratorCheckWhenArchive) { + do { + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearTrace(); + Options options = OptionsForLogIterTest(); + DestroyAndReopen(options); + ColumnFamilyHandle* cf; + auto s = dbfull()->CreateColumnFamily(ColumnFamilyOptions(), "CF", &cf); + ASSERT_TRUE(s.ok()); + + ASSERT_OK(dbfull()->Put(WriteOptions(), cf, "key1", DummyString(1024))); + + ASSERT_OK(dbfull()->Put(WriteOptions(), "key2", DummyString(1024))); + + ASSERT_OK(dbfull()->Flush(FlushOptions())); + + ASSERT_OK(dbfull()->Put(WriteOptions(), "key3", DummyString(1024))); + + ASSERT_OK(dbfull()->Flush(FlushOptions())); + + ASSERT_OK(dbfull()->Put(WriteOptions(), "key4", DummyString(1024))); + ASSERT_OK(dbfull()->Flush(FlushOptions())); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "WalManager::PurgeObsoleteFiles:1", [&](void*) { + auto iter = OpenTransactionLogIter(0); + ExpectRecords(4, iter); + }); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(dbfull()->Flush(FlushOptions(), cf)); + + delete cf; + } while (ChangeCompactOptions()); +} #endif TEST_F(DBTestXactLogIterator, TransactionLogIteratorStallAtLastRecord) { From 2b6bcfe590fa6bded36d5ccdc416527a8faa759a Mon Sep 17 00:00:00 2001 From: Alexander Bulimov Date: Tue, 22 Aug 2023 14:32:35 -0700 Subject: [PATCH 061/386] Add C API for WaitForCompact (#11737) Summary: Add a bunch of C API functions to expose new `WaitForCompact` function and related options. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11737 Test Plan: unit tests Reviewed By: jaykorean Differential Revision: D48568239 Pulled By: abulimov fbshipit-source-id: 1ff35972d7abacd7e1e17fe2ada1e20cdc88d8de --- db/c.cc | 59 +++++++++++++++++++++++++++++++++++++++++++++ db/c_test.c | 36 +++++++++++++++++++++++++++ include/rocksdb/c.h | 40 ++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+) diff --git a/db/c.cc b/db/c.cc index 42ddc5217..0e508d326 100644 --- a/db/c.cc +++ b/db/c.cc @@ -120,6 +120,7 @@ using ROCKSDB_NAMESPACE::TransactionDB; using ROCKSDB_NAMESPACE::TransactionDBOptions; using ROCKSDB_NAMESPACE::TransactionLogIterator; using ROCKSDB_NAMESPACE::TransactionOptions; +using ROCKSDB_NAMESPACE::WaitForCompactOptions; using ROCKSDB_NAMESPACE::WALRecoveryMode; using ROCKSDB_NAMESPACE::WritableFile; using ROCKSDB_NAMESPACE::WriteBatch; @@ -275,6 +276,9 @@ struct rocksdb_optimistictransactiondb_t { struct rocksdb_optimistictransaction_options_t { OptimisticTransactionOptions rep; }; +struct rocksdb_wait_for_compact_options_t { + WaitForCompactOptions rep; +}; struct rocksdb_compactionfiltercontext_t { CompactionFilter::Context rep; @@ -6719,4 +6723,59 @@ double rocksdb_statistics_histogram_data_get_min( return data->rep.min; } +void rocksdb_wait_for_compact(rocksdb_t* db, + rocksdb_wait_for_compact_options_t* options, + char** errptr) { + SaveError(errptr, db->rep->WaitForCompact(options->rep)); +} + +rocksdb_wait_for_compact_options_t* rocksdb_wait_for_compact_options_create() { + return new rocksdb_wait_for_compact_options_t; +} + +void rocksdb_wait_for_compact_options_destroy( + rocksdb_wait_for_compact_options_t* opt) { + delete opt; +} + +void rocksdb_wait_for_compact_options_set_abort_on_pause( + rocksdb_wait_for_compact_options_t* opt, unsigned char v) { + opt->rep.abort_on_pause = v; +} + +unsigned char rocksdb_wait_for_compact_options_get_abort_on_pause( + rocksdb_wait_for_compact_options_t* opt) { + return opt->rep.abort_on_pause; +} + +void rocksdb_wait_for_compact_options_set_flush( + rocksdb_wait_for_compact_options_t* opt, unsigned char v) { + opt->rep.flush = v; +} + +unsigned char rocksdb_wait_for_compact_options_get_flush( + rocksdb_wait_for_compact_options_t* opt) { + return opt->rep.flush; +} + +void rocksdb_wait_for_compact_options_set_close_db( + rocksdb_wait_for_compact_options_t* opt, unsigned char v) { + opt->rep.close_db = v; +} + +unsigned char rocksdb_wait_for_compact_options_get_close_db( + rocksdb_wait_for_compact_options_t* opt) { + return opt->rep.close_db; +} + +void rocksdb_wait_for_compact_options_set_timeout( + rocksdb_wait_for_compact_options_t* opt, uint64_t microseconds) { + opt->rep.timeout = std::chrono::microseconds(microseconds); +} + +uint64_t rocksdb_wait_for_compact_options_get_timeout( + rocksdb_wait_for_compact_options_t* opt) { + return opt->rep.timeout.count(); +} + } // end extern "C" diff --git a/db/c_test.c b/db/c_test.c index b21e1aee3..1305ee3cd 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -375,6 +375,11 @@ static rocksdb_t* CheckCompaction(rocksdb_t* db, rocksdb_options_t* options, // Force compaction rocksdb_compact_range(db, NULL, 0, NULL, 0); + rocksdb_wait_for_compact_options_t* wco; + wco = rocksdb_wait_for_compact_options_create(); + rocksdb_wait_for_compact(db, wco, &err); + CheckNoError(err); + rocksdb_wait_for_compact_options_destroy(wco); // should have filtered bar, but not foo CheckGet(db, roptions, "foo", "foovalue"); CheckGet(db, roptions, "bar", NULL); @@ -3687,6 +3692,37 @@ int main(int argc, char** argv) { rocksdb_statistics_histogram_data_destroy(hist); } + StartPhase("wait_for_compact_options"); + { + rocksdb_wait_for_compact_options_t* wco; + wco = rocksdb_wait_for_compact_options_create(); + + rocksdb_wait_for_compact_options_set_abort_on_pause(wco, 1); + CheckCondition(1 == + rocksdb_wait_for_compact_options_get_abort_on_pause(wco)); + + rocksdb_wait_for_compact_options_set_flush(wco, 1); + CheckCondition(1 == rocksdb_wait_for_compact_options_get_flush(wco)); + + rocksdb_wait_for_compact_options_set_close_db(wco, 1); + CheckCondition(1 == rocksdb_wait_for_compact_options_get_close_db(wco)); + + rocksdb_wait_for_compact_options_set_timeout(wco, 342); + CheckCondition(342 == rocksdb_wait_for_compact_options_get_timeout(wco)); + + rocksdb_wait_for_compact_options_destroy(wco); + } + StartPhase("wait_for_compact"); + { + rocksdb_wait_for_compact_options_t* wco; + wco = rocksdb_wait_for_compact_options_create(); + rocksdb_wait_for_compact_options_set_flush(wco, 1); + + rocksdb_wait_for_compact(db, wco, &err); + CheckNoError(err); + rocksdb_wait_for_compact_options_destroy(wco); + } + StartPhase("cancel_all_background_work"); rocksdb_cancel_all_background_work(db, 1); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 6636b592b..f215c899d 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -138,6 +138,8 @@ typedef struct rocksdb_memory_consumers_t rocksdb_memory_consumers_t; typedef struct rocksdb_memory_usage_t rocksdb_memory_usage_t; typedef struct rocksdb_statistics_histogram_data_t rocksdb_statistics_histogram_data_t; +typedef struct rocksdb_wait_for_compact_options_t + rocksdb_wait_for_compact_options_t; /* DB operations */ @@ -2927,6 +2929,44 @@ extern ROCKSDB_LIBRARY_API uint64_t rocksdb_statistics_histogram_data_get_sum( extern ROCKSDB_LIBRARY_API double rocksdb_statistics_histogram_data_get_min( rocksdb_statistics_histogram_data_t* data); +extern ROCKSDB_LIBRARY_API void rocksdb_wait_for_compact( + rocksdb_t* db, rocksdb_wait_for_compact_options_t* options, char** errptr); + +extern ROCKSDB_LIBRARY_API rocksdb_wait_for_compact_options_t* +rocksdb_wait_for_compact_options_create(void); + +extern ROCKSDB_LIBRARY_API void rocksdb_wait_for_compact_options_destroy( + rocksdb_wait_for_compact_options_t* opt); + +extern ROCKSDB_LIBRARY_API void +rocksdb_wait_for_compact_options_set_abort_on_pause( + rocksdb_wait_for_compact_options_t* opt, unsigned char v); + +extern ROCKSDB_LIBRARY_API unsigned char +rocksdb_wait_for_compact_options_get_abort_on_pause( + rocksdb_wait_for_compact_options_t* opt); + +extern ROCKSDB_LIBRARY_API void rocksdb_wait_for_compact_options_set_flush( + rocksdb_wait_for_compact_options_t* opt, unsigned char v); + +extern ROCKSDB_LIBRARY_API unsigned char +rocksdb_wait_for_compact_options_get_flush( + rocksdb_wait_for_compact_options_t* opt); + +extern ROCKSDB_LIBRARY_API void rocksdb_wait_for_compact_options_set_close_db( + rocksdb_wait_for_compact_options_t* opt, unsigned char v); + +extern ROCKSDB_LIBRARY_API unsigned char +rocksdb_wait_for_compact_options_get_close_db( + rocksdb_wait_for_compact_options_t* opt); + +extern ROCKSDB_LIBRARY_API void rocksdb_wait_for_compact_options_set_timeout( + rocksdb_wait_for_compact_options_t* opt, uint64_t microseconds); + +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_wait_for_compact_options_get_timeout( + rocksdb_wait_for_compact_options_t* opt); + #ifdef __cplusplus } /* end extern "C" */ #endif From f833ca3878ecff6ce7ca66319bc30255aeefcdad Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 23 Aug 2023 11:27:48 -0700 Subject: [PATCH 062/386] Pick files from the last sorted run in size amp compaction picker (#11740) Summary: **Context/Summary:** Same intention as https://github.com/facebook/rocksdb/pull/2693 - basically we now pick from the last sorted run and expand forward till we can't Pull Request resolved: https://github.com/facebook/rocksdb/pull/11740 Test Plan: Existing UT Stress test Reviewed By: ajkr Differential Revision: D48586475 Pulled By: hx235 fbshipit-source-id: 3eb3c3ee1d5f7e0b0d6d649baaeb8c6990fee398 --- db/compaction/compaction_picker_universal.cc | 102 +++++++------------ 1 file changed, 39 insertions(+), 63 deletions(-) diff --git a/db/compaction/compaction_picker_universal.cc b/db/compaction/compaction_picker_universal.cc index 9eaf39546..ecd67131d 100644 --- a/db/compaction/compaction_picker_universal.cc +++ b/db/compaction/compaction_picker_universal.cc @@ -114,6 +114,13 @@ class UniversalCompactionBuilder { // because some files are being compacted. Compaction* PickPeriodicCompaction(); + bool ShouldSkipLastSortedRunForSizeAmpCompaction() { + assert(!sorted_runs_.empty()); + return ioptions_.preclude_last_level_data_seconds > 0 && + ioptions_.num_levels > 2 && + sorted_runs_.back().level == ioptions_.num_levels - 1 && + sorted_runs_.size() > 1; + } // Used in universal compaction when the allow_trivial_move // option is set. Checks whether there are any overlapping files // in the input. Returns true if the input files are non @@ -778,85 +785,55 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSortedRuns( // Look at overall size amplification. If size amplification // exceeds the configured value, then do a compaction -// of the candidate files all the way upto the earliest -// base file (overrides configured values of file-size ratios, -// min_merge_width and max_merge_width). -// +// on longest span of candidate files without conflict with other compactions +// ending at the earliest base file (overriding configured values of file-size +// ratios, min_merge_width and max_merge_width). Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { - // percentage flexibility while reducing size amplification - uint64_t ratio = mutable_cf_options_.compaction_options_universal - .max_size_amplification_percent; - - unsigned int candidate_count = 0; - uint64_t candidate_size = 0; - size_t start_index = 0; - const SortedRun* sr = nullptr; - assert(!sorted_runs_.empty()); - if (sorted_runs_.back().being_compacted) { + + const size_t end_index = ShouldSkipLastSortedRunForSizeAmpCompaction() + ? sorted_runs_.size() - 2 + : sorted_runs_.size() - 1; + if (sorted_runs_[end_index].being_compacted) { return nullptr; } + const size_t base_sr_size = sorted_runs_[end_index].size; + size_t start_index = end_index; + size_t candidate_size = 0; - // Skip files that are already being compacted - for (size_t loop = 0; loop + 1 < sorted_runs_.size(); loop++) { - sr = &sorted_runs_[loop]; - if (!sr->being_compacted) { - start_index = loop; // Consider this as the first candidate. + // Get longest span (i.e, [start_index, end_index]) of available sorted runs + while (start_index > 0) { + const SortedRun* sr = &sorted_runs_[start_index - 1]; + if (sr->being_compacted) { + char file_num_buf[kFormatFileNumberBufSize]; + sr->Dump(file_num_buf, sizeof(file_num_buf), true); + ROCKS_LOG_BUFFER( + log_buffer_, + "[%s] Universal: stopping at sorted run undergoing compaction: " + "%s[%" ROCKSDB_PRIszt "]", + cf_name_.c_str(), file_num_buf, start_index - 1); break; } - char file_num_buf[kFormatFileNumberBufSize]; - sr->Dump(file_num_buf, sizeof(file_num_buf), true); - ROCKS_LOG_BUFFER(log_buffer_, - "[%s] Universal: skipping %s[%d] compacted %s", - cf_name_.c_str(), file_num_buf, loop, - " cannot be a candidate to reduce size amp.\n"); - sr = nullptr; + candidate_size += sr->compensated_file_size; + --start_index; } - if (sr == nullptr) { - return nullptr; // no candidate files + if (start_index == end_index) { + return nullptr; } + { char file_num_buf[kFormatFileNumberBufSize]; - sr->Dump(file_num_buf, sizeof(file_num_buf), true); + sorted_runs_[start_index].Dump(file_num_buf, sizeof(file_num_buf), true); ROCKS_LOG_BUFFER( log_buffer_, "[%s] Universal: First candidate %s[%" ROCKSDB_PRIszt "] %s", cf_name_.c_str(), file_num_buf, start_index, " to reduce size amp.\n"); } - // size of the base sorted run for size amp calculation - uint64_t base_sr_size = sorted_runs_.back().size; - size_t sr_end_idx = sorted_runs_.size() - 1; - // If tiered compaction is enabled and the last sorted run is the last level - if (ioptions_.preclude_last_level_data_seconds > 0 && - ioptions_.num_levels > 2 && - sorted_runs_.back().level == ioptions_.num_levels - 1 && - sorted_runs_.size() > 1) { - sr_end_idx = sorted_runs_.size() - 2; - base_sr_size = sorted_runs_[sr_end_idx].size; - } - - // keep adding up all the remaining files - for (size_t loop = start_index; loop < sr_end_idx; loop++) { - sr = &sorted_runs_[loop]; - if (sr->being_compacted) { - // TODO with incremental compaction is supported, we might want to - // schedule some incremental compactions in parallel if needed. - char file_num_buf[kFormatFileNumberBufSize]; - sr->Dump(file_num_buf, sizeof(file_num_buf), true); - ROCKS_LOG_BUFFER( - log_buffer_, "[%s] Universal: Possible candidate %s[%d] %s", - cf_name_.c_str(), file_num_buf, start_index, - " is already being compacted. No size amp reduction possible.\n"); - return nullptr; - } - candidate_size += sr->compensated_file_size; - candidate_count++; - } - if (candidate_count == 0) { - return nullptr; - } + // percentage flexibility while reducing size amplification + const uint64_t ratio = mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent; // size amplification = percentage of additional size if (candidate_size * 100 < ratio * base_sr_size) { @@ -893,7 +870,7 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { } } return PickCompactionWithSortedRunRange( - start_index, sr_end_idx, CompactionReason::kUniversalSizeAmplification); + start_index, end_index, CompactionReason::kUniversalSizeAmplification); } Compaction* UniversalCompactionBuilder::PickIncrementalForReduceSizeAmp( @@ -1442,4 +1419,3 @@ uint64_t UniversalCompactionBuilder::GetMaxOverlappingBytes() const { } } } // namespace ROCKSDB_NAMESPACE - From bc448e9c89ffcef97434cd35591cee7b8e7c98c3 Mon Sep 17 00:00:00 2001 From: Fuat Basik Date: Wed, 23 Aug 2023 15:24:23 -0700 Subject: [PATCH 063/386] Run db_stress for final time to ensure un-interrupted validation (#11592) Summary: In blackbox tests, db_stress command always run with timeout. Timeout can happen during validation, leaving some of the keys not checked. Since key validation is done in order, it is quite likely that keys those are towards to the end of the set are never validated. This PR adds a final execution, without timeout, to ensure validation is executed for all keys, at least once. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11592 Reviewed By: cbi42 Differential Revision: D48003998 Pulled By: hx235 fbshipit-source-id: 72543475a932f12cf0f57534b7e3b6e07e87080f --- db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_driver.cc | 126 +++++++++++++++----------- db_stress_tool/db_stress_gflags.cc | 3 + db_stress_tool/db_stress_test_base.cc | 2 + tools/db_crashtest.py | 22 ++++- 5 files changed, 99 insertions(+), 55 deletions(-) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 88a113d92..9699fce66 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -254,6 +254,7 @@ DECLARE_int32(verify_db_one_in); DECLARE_int32(continuous_verification_interval); DECLARE_int32(get_property_one_in); DECLARE_string(file_checksum_impl); +DECLARE_bool(verification_only); // Options for transaction dbs. // Use TransactionDB (a.k.a. Pessimistic Transaction DB) diff --git a/db_stress_tool/db_stress_driver.cc b/db_stress_tool/db_stress_driver.cc index e8b75c1d1..15c1ad5e5 100644 --- a/db_stress_tool/db_stress_driver.cc +++ b/db_stress_tool/db_stress_driver.cc @@ -27,34 +27,39 @@ void ThreadBody(void* v) { if (shared->AllInitialized()) { shared->GetCondVar()->SignalAll(); } - while (!shared->Started()) { - shared->GetCondVar()->Wait(); - } } - thread->shared->GetStressTest()->OperateDb(thread); - - { - MutexLock l(shared->GetMutex()); - shared->IncOperated(); - if (shared->AllOperated()) { - shared->GetCondVar()->SignalAll(); + if (!FLAGS_verification_only) { + { + MutexLock l(shared->GetMutex()); + while (!shared->Started()) { + shared->GetCondVar()->Wait(); + } } - while (!shared->VerifyStarted()) { - shared->GetCondVar()->Wait(); + thread->shared->GetStressTest()->OperateDb(thread); + { + MutexLock l(shared->GetMutex()); + shared->IncOperated(); + if (shared->AllOperated()) { + shared->GetCondVar()->SignalAll(); + } + while (!shared->VerifyStarted()) { + shared->GetCondVar()->Wait(); + } } - } - if (!FLAGS_skip_verifydb) { - thread->shared->GetStressTest()->VerifyDb(thread); - } + if (!FLAGS_skip_verifydb) { + thread->shared->GetStressTest()->VerifyDb(thread); + } - { - MutexLock l(shared->GetMutex()); - shared->IncDone(); - if (shared->AllDone()) { - shared->GetCondVar()->SignalAll(); + { + MutexLock l(shared->GetMutex()); + shared->IncDone(); + if (shared->AllDone()) { + shared->GetCondVar()->SignalAll(); + } } } + ThreadStatusUtil::UnregisterThread(); } bool RunStressTestImpl(SharedState* shared) { @@ -141,45 +146,55 @@ bool RunStressTestImpl(SharedState* shared) { } } - // This is after the verification step to avoid making all those `Get()`s - // and `MultiGet()`s contend on the DB-wide trace mutex. - if (!FLAGS_expected_values_dir.empty()) { - stress->TrackExpectedState(shared); - } - - now = clock->NowMicros(); - fprintf(stdout, "%s Starting database operations\n", - clock->TimeToString(now / 1000000).c_str()); + if (!FLAGS_verification_only) { + // This is after the verification step to avoid making all those `Get()`s + // and `MultiGet()`s contend on the DB-wide trace mutex. + if (!FLAGS_expected_values_dir.empty()) { + stress->TrackExpectedState(shared); + } + now = clock->NowMicros(); + fprintf(stdout, "%s Starting database operations\n", + clock->TimeToString(now / 1000000).c_str()); - shared->SetStart(); - shared->GetCondVar()->SignalAll(); - while (!shared->AllOperated()) { - shared->GetCondVar()->Wait(); - } + shared->SetStart(); + shared->GetCondVar()->SignalAll(); + while (!shared->AllOperated()) { + shared->GetCondVar()->Wait(); + } - now = clock->NowMicros(); - if (FLAGS_test_batches_snapshots) { - fprintf(stdout, "%s Limited verification already done during gets\n", - clock->TimeToString((uint64_t)now / 1000000).c_str()); - } else if (FLAGS_skip_verifydb) { - fprintf(stdout, "%s Verification skipped\n", - clock->TimeToString((uint64_t)now / 1000000).c_str()); - } else { - fprintf(stdout, "%s Starting verification\n", - clock->TimeToString((uint64_t)now / 1000000).c_str()); - } + now = clock->NowMicros(); + if (FLAGS_test_batches_snapshots) { + fprintf(stdout, "%s Limited verification already done during gets\n", + clock->TimeToString((uint64_t)now / 1000000).c_str()); + } else if (FLAGS_skip_verifydb) { + fprintf(stdout, "%s Verification skipped\n", + clock->TimeToString((uint64_t)now / 1000000).c_str()); + } else { + fprintf(stdout, "%s Starting verification\n", + clock->TimeToString((uint64_t)now / 1000000).c_str()); + } - shared->SetStartVerify(); - shared->GetCondVar()->SignalAll(); - while (!shared->AllDone()) { - shared->GetCondVar()->Wait(); + shared->SetStartVerify(); + shared->GetCondVar()->SignalAll(); + while (!shared->AllDone()) { + shared->GetCondVar()->Wait(); + } } } - for (unsigned int i = 1; i < n; i++) { - threads[0]->stats.Merge(threads[i]->stats); + // If we are running verification_only + // stats will be empty and trying to report them will + // emit no ops or writes error. To avoid this, merging and reporting stats + // are not executed when running with verification_only + // TODO: We need to create verification stats (e.g. how many keys + // are verified by which method) and report them here instead of operation + // stats. + if (!FLAGS_verification_only) { + for (unsigned int i = 1; i < n; i++) { + threads[0]->stats.Merge(threads[i]->stats); + } + threads[0]->stats.Report("Stress Test"); } - threads[0]->stats.Report("Stress Test"); for (unsigned int i = 0; i < n; i++) { delete threads[i]; @@ -191,7 +206,10 @@ bool RunStressTestImpl(SharedState* shared) { fprintf(stdout, "%s Verification successful\n", clock->TimeToString(now / 1000000).c_str()); } - stress->PrintStatistics(); + + if (!FLAGS_verification_only) { + stress->PrintStatistics(); + } if (FLAGS_compaction_thread_pool_adjust_interval > 0 || FLAGS_continuous_verification_interval > 0) { diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index ddca21176..1b158628f 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1110,6 +1110,9 @@ DEFINE_uint64(stats_dump_period_sec, "Gap between printing stats to log in seconds"); DEFINE_bool(use_io_uring, false, "Enable the use of IO uring on Posix"); + +DEFINE_bool(verification_only, false, + "If true, tests will only execute verification step"); extern "C" bool RocksDbIOUringEnable() { return FLAGS_use_io_uring; } DEFINE_uint32(memtable_max_range_deletions, 0, diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 9745e0f9a..a5eb747b1 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2484,6 +2484,8 @@ void StressTest::PrintEnv() const { FLAGS_use_get_entity ? "true" : "false"); fprintf(stdout, "Use MultiGetEntity : %s\n", FLAGS_use_multi_get_entity ? "true" : "false"); + fprintf(stdout, "Verification only : %s\n", + FLAGS_verification_only ? "true" : "false"); const char* memtablerep = ""; switch (FLAGS_rep_factory) { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 11ac75d78..11bb70215 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -135,6 +135,7 @@ # 999 -> use Bloom API "ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]), "value_size_mult": 32, + "verification_only": 0, "verify_checksum": 1, "write_buffer_size": 4 * 1024 * 1024, "writepercent": 35, @@ -756,7 +757,7 @@ def gen_cmd(params, unknown_params): return cmd -def execute_cmd(cmd, timeout): +def execute_cmd(cmd, timeout=None): child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd))) @@ -814,6 +815,25 @@ def blackbox_crash_main(args, unknown_args): time.sleep(1) # time to stabilize before the next run + # We should run the test one more time with VerifyOnly setup and no-timeout + # Only do this if the tests are not failed for total-duration + print("Running final time for verification") + cmd_params.update({"verification_only": 1}) + cmd_params.update({"skip_verifydb": 0}) + + cmd = gen_cmd( + dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args + ) + hit_timeout, retcode, outs, errs = execute_cmd(cmd) + + # Print stats of the final run + print("stdout:", outs) + + for line in errs.split("\n"): + if line != "" and not line.startswith("WARNING"): + print("stderr has error message:") + print("***" + line + "***") + # we need to clean up after ourselves -- only do this on test success shutil.rmtree(dbname, True) From 451316597f55a07ca7920e2287fbed4b421b2fc5 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 24 Aug 2023 14:55:48 -0700 Subject: [PATCH 064/386] Clarify comment about compaction_readahead_size's sanitization change (#11755) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11755 Reviewed By: anand1976 Differential Revision: D48656627 Pulled By: hx235 fbshipit-source-id: 568fa7749cbf6ecf65102b4513fa3af975fd91b8 --- HISTORY.md | 1 + include/rocksdb/options.h | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 27a248b4e..50ec905ca 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -22,6 +22,7 @@ ### Behavior Changes * Statistics `rocksdb.sst.read.micros` now includes time spent on multi read and async read into the file * For Universal Compaction users, periodic compaction (option `periodic_compaction_seconds`) will be set to 30 days by default if block based table is used. +* `Options::compaction_readahead_size` will be sanitized to 2MB when set to 0 under non-direct IO since we have moved prefetching responsibility to page cache for compaction read with readhead size equal to `Options::compaction_readahead_size` under non-direct IO (#11631) ### Bug Fixes * Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." (#11673). diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 8f62c2dfb..4dee0c952 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -951,10 +951,13 @@ struct DBOptions { enum AccessHint { NONE, NORMAL, SEQUENTIAL, WILLNEED }; AccessHint access_hint_on_compaction_start = NORMAL; - // If non-zero, we perform bigger reads when doing compaction. If you're + // The size RocksDB uses to perform readahead during compaction read. + // If set zero, RocksDB will sanitize it to be 2MB during db open. + // If you're // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // + // // Default: 0 // // Dynamically changeable through SetDBOptions() API. From 6353c6e2fbac3c982dd93e7be9208fed27107803 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan <43301668+akankshamahajan15@users.noreply.github.com> Date: Thu, 24 Aug 2023 14:58:27 -0700 Subject: [PATCH 065/386] Add new experimental ReadOption auto_readahead_size to db_bench and db_stress (#11729) Summary: Same as title Pull Request resolved: https://github.com/facebook/rocksdb/pull/11729 Test Plan: make crash_test -j32 Reviewed By: anand1976 Differential Revision: D48534820 Pulled By: akankshamahajan15 fbshipit-source-id: 3a2a28af98dfad164b82ddaaf9fddb94c53a652e --- db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_gflags.cc | 3 +++ db_stress_tool/db_stress_test_base.cc | 1 + tools/db_bench_tool.cc | 8 ++++++++ tools/db_crashtest.py | 1 + 5 files changed, 14 insertions(+) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 9699fce66..32e77d40c 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -347,6 +347,7 @@ DECLARE_uint64(initial_auto_readahead_size); DECLARE_uint64(max_auto_readahead_size); DECLARE_uint64(num_file_reads_for_auto_readahead); DECLARE_bool(use_io_uring); +DECLARE_bool(auto_readahead_size); constexpr long KB = 1024; constexpr int kRandomValueMaxFactor = 3; diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 1b158628f..4c3a2b84f 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1123,4 +1123,7 @@ DEFINE_uint32(bottommost_file_compaction_delay, 0, "Delay kBottommostFiles compaction by this amount of seconds." "See more in option comment."); +DEFINE_bool(auto_readahead_size, false, + "Does auto tuning of readahead_size when enabled during scans."); + #endif // GFLAGS diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index a5eb747b1..aff559a63 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -747,6 +747,7 @@ void StressTest::OperateDb(ThreadState* thread) { read_opts.async_io = FLAGS_async_io; read_opts.adaptive_readahead = FLAGS_adaptive_readahead; read_opts.readahead_size = FLAGS_readahead_size; + read_opts.auto_readahead_size = FLAGS_auto_readahead_size; WriteOptions write_opts; if (FLAGS_rate_limit_auto_wal_flush) { write_opts.rate_limiter_priority = Env::IO_USER; diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 0a133289b..996650f20 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1243,6 +1243,10 @@ DEFINE_uint64( "num_file_reads_for_auto_readahead indicates after how many sequential " "reads into that file internal auto prefetching should be start."); +DEFINE_bool( + auto_readahead_size, false, + "When set true, RocksDB does auto tuning of readahead size during Scans"); + static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType( const char* ctype) { assert(ctype); @@ -3368,6 +3372,7 @@ class Benchmark { read_options_.adaptive_readahead = FLAGS_adaptive_readahead; read_options_.async_io = FLAGS_async_io; read_options_.optimize_multiget_for_io = FLAGS_optimize_multiget_for_io; + read_options_.auto_readahead_size = FLAGS_auto_readahead_size; void (Benchmark::*method)(ThreadState*) = nullptr; void (Benchmark::*post_process_method)() = nullptr; @@ -5754,6 +5759,7 @@ class Benchmark { options.adaptive_readahead = FLAGS_adaptive_readahead; options.async_io = FLAGS_async_io; + options.auto_readahead_size = FLAGS_auto_readahead_size; Iterator* iter = db->NewIterator(options); int64_t i = 0; @@ -7749,6 +7755,7 @@ class Benchmark { ro.rate_limiter_priority = FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; ro.readahead_size = FLAGS_readahead_size; + ro.auto_readahead_size = FLAGS_auto_readahead_size; Status s = db->VerifyChecksum(ro); if (!s.ok()) { fprintf(stderr, "VerifyChecksum() failed: %s\n", s.ToString().c_str()); @@ -7764,6 +7771,7 @@ class Benchmark { ro.rate_limiter_priority = FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL; ro.readahead_size = FLAGS_readahead_size; + ro.auto_readahead_size = FLAGS_auto_readahead_size; Status s = db->VerifyFileChecksums(ro); if (!s.ok()) { fprintf(stderr, "VerifyFileChecksums() failed: %s\n", diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 11bb70215..72ee14865 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -217,6 +217,7 @@ "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]), # 0 (disable) is the default and more commonly used value. "bottommost_file_compaction_delay": lambda: random.choice([0, 0, 0, 600, 3600, 86400]), + "auto_readahead_size" : lambda: random.choice([0, 1]), } _TEST_DIR_ENV_VAR = "TEST_TMPDIR" From d3420464c36852f2ddd3e079d63027e5d16cecfe Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 24 Aug 2023 19:14:38 -0700 Subject: [PATCH 066/386] cache_bench enhancements for jemalloc etc. (#11758) Summary: * Add some options to cache_bench to use JemallocNodumpAllocator * Make num_shard_bits option use and report cache-specific defaults * Add a usleep option to sleep between operations, for simulating a workload with more CPU idle/wait time. * Use const& for JemallocAllocatorOptions, to improve API usability (e.g. can bind to temporary `{}`) * InstallStackTraceHandler() Pull Request resolved: https://github.com/facebook/rocksdb/pull/11758 Test Plan: manual Reviewed By: jowlyzhang Differential Revision: D48668479 Pulled By: pdillinger fbshipit-source-id: b6032fbe09444cdb8f1443a5e017d2eea4f6205a --- cache/cache_bench_tool.cc | 69 +++++++++++++++++++++++------ include/rocksdb/memory_allocator.h | 2 +- memory/jemalloc_nodump_allocator.cc | 4 +- memory/jemalloc_nodump_allocator.h | 2 +- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index cb37862a2..4c0f366a9 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -3,7 +3,6 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#include "cache_key.h" #ifdef GFLAGS #include #include @@ -13,9 +12,12 @@ #include #include +#include "cache/cache_key.h" +#include "cache/sharded_cache.h" #include "db/db_impl/db_impl.h" #include "monitoring/histogram.h" #include "port/port.h" +#include "port/stack_trace.h" #include "rocksdb/advanced_cache.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" @@ -44,7 +46,8 @@ static constexpr uint64_t GiB = MiB << 10; DEFINE_uint32(threads, 16, "Number of concurrent threads to run."); DEFINE_uint64(cache_size, 1 * GiB, "Number of bytes to use as a cache of uncompressed data."); -DEFINE_uint32(num_shard_bits, 6, "shard_bits."); +DEFINE_int32(num_shard_bits, -1, + "ShardedCacheOptions::shard_bits. Default = auto"); DEFINE_double(resident_ratio, 0.25, "Ratio of keys fitting in cache to keyspace."); @@ -76,6 +79,8 @@ DEFINE_uint32( DEFINE_uint32(gather_stats_entries_per_lock, 256, "For Cache::ApplyToAllEntries"); +DEFINE_uint32(usleep, 0, "Sleep up to this many microseconds after each op."); + DEFINE_bool(lean, false, "If true, no additional computation is performed besides cache " "operations."); @@ -97,6 +102,17 @@ static class std::shared_ptr secondary_cache; DEFINE_string(cache_type, "lru_cache", "Type of block cache."); +DEFINE_bool(use_jemalloc_no_dump_allocator, false, + "Whether to use JemallocNoDumpAllocator"); + +DEFINE_uint32(jemalloc_no_dump_allocator_num_arenas, + ROCKSDB_NAMESPACE::JemallocAllocatorOptions().num_arenas, + "JemallocNodumpAllocator::num_arenas"); + +DEFINE_bool(jemalloc_no_dump_allocator_limit_tcache_size, + ROCKSDB_NAMESPACE::JemallocAllocatorOptions().limit_tcache_size, + "JemallocNodumpAllocator::limit_tcache_size"); + // ## BEGIN stress_cache_key sub-tool options ## // See class StressCacheKey below. DEFINE_bool(stress_cache_key, false, @@ -239,8 +255,8 @@ struct KeyGen { } }; -Cache::ObjectPtr createValue(Random64& rnd) { - char* rv = new char[FLAGS_value_bytes]; +Cache::ObjectPtr createValue(Random64& rnd, MemoryAllocator* alloc) { + char* rv = AllocateBlock(FLAGS_value_bytes, alloc).release(); // Fill with some filler data, and take some CPU time for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) { EncodeFixed64(rv + i, rnd.Next()); @@ -266,8 +282,8 @@ Status CreateFn(const Slice& data, Cache::CreateContext* /*context*/, return Status::OK(); }; -void DeleteFn(Cache::ObjectPtr value, MemoryAllocator* /*alloc*/) { - delete[] static_cast(value); +void DeleteFn(Cache::ObjectPtr value, MemoryAllocator* alloc) { + CustomDeleter{alloc}(static_cast(value)); } Cache::CacheItemHelper helper1_wos(CacheEntryRole::kDataBlock, DeleteFn); @@ -302,6 +318,15 @@ class CacheBench { exit(1); } + std::shared_ptr allocator; + if (FLAGS_use_jemalloc_no_dump_allocator) { + JemallocAllocatorOptions opts; + opts.num_arenas = FLAGS_jemalloc_no_dump_allocator_num_arenas; + opts.limit_tcache_size = + FLAGS_jemalloc_no_dump_allocator_limit_tcache_size; + Status s = NewJemallocNodumpAllocator(opts, &allocator); + assert(s.ok()); + } if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); @@ -309,6 +334,7 @@ class CacheBench { HyperClockCacheOptions opts( FLAGS_cache_size, /*estimated_entry_charge=*/0, FLAGS_num_shard_bits); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); + opts.memory_allocator = allocator; if (FLAGS_cache_type == "fixed_hyper_clock_cache" || FLAGS_cache_type == "hyper_clock_cache") { opts.estimated_entry_charge = FLAGS_value_bytes_estimate > 0 @@ -319,7 +345,7 @@ class CacheBench { opts.min_avg_entry_charge = FLAGS_value_bytes_estimate; } } else { - fprintf(stderr, "Cache type not supported."); + fprintf(stderr, "Cache type not supported.\n"); exit(1); } cache_ = opts.MakeSharedCache(); @@ -328,6 +354,7 @@ class CacheBench { false /* strict_capacity_limit */, 0.5 /* high_pri_pool_ratio */); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); + opts.memory_allocator = allocator; if (!FLAGS_secondary_cache_uri.empty()) { Status s = SecondaryCache::CreateFromString( ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); @@ -343,7 +370,7 @@ class CacheBench { cache_ = NewLRUCache(opts); } else { - fprintf(stderr, "Cache type not supported."); + fprintf(stderr, "Cache type not supported.\n"); exit(1); } } @@ -373,7 +400,8 @@ class CacheBench { keys_since_last_not_found = 0; Status s = - cache_->Insert(key, createValue(rnd), &helper1, FLAGS_value_bytes); + cache_->Insert(key, createValue(rnd, cache_->memory_allocator()), + &helper1, FLAGS_value_bytes); assert(s.ok()); handle = cache_->Lookup(key); @@ -610,6 +638,7 @@ class CacheBench { const auto clock = SystemClock::Default().get(); uint64_t start_time = clock->NowMicros(); StopWatchNano timer(clock); + auto system_clock = SystemClock::Default(); for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) { Slice key = gen.GetRand(thread->rnd, max_key_, FLAGS_skew); @@ -637,8 +666,9 @@ class CacheBench { } else { ++lookup_misses; // do insert - Status s = cache_->Insert(key, createValue(thread->rnd), &helper2, - FLAGS_value_bytes, &handle); + Status s = cache_->Insert( + key, createValue(thread->rnd, cache_->memory_allocator()), + &helper2, FLAGS_value_bytes, &handle); assert(s.ok()); } } else if (random_op < insert_threshold_) { @@ -647,8 +677,9 @@ class CacheBench { handle = nullptr; } // do insert - Status s = cache_->Insert(key, createValue(thread->rnd), &helper3, - FLAGS_value_bytes, &handle); + Status s = cache_->Insert( + key, createValue(thread->rnd, cache_->memory_allocator()), &helper3, + FLAGS_value_bytes, &handle); assert(s.ok()); } else if (random_op < lookup_threshold_) { if (handle) { @@ -679,6 +710,13 @@ class CacheBench { thread->latency_ns_hist.Add(timer.ElapsedNanos()); } thread->shared->AddLookupStats(lookup_hits, lookup_misses); + if (FLAGS_usleep > 0) { + unsigned us = + static_cast(thread->rnd.Uniform(FLAGS_usleep + 1)); + if (us > 0) { + system_clock->SleepForMicroseconds(us); + } + } } if (FLAGS_early_exit) { MutexLock l(thread->shared->GetMutex()); @@ -712,7 +750,9 @@ class CacheBench { printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread); printf("Cache size : %s\n", BytesToHumanString(FLAGS_cache_size).c_str()); - printf("Num shard bits : %u\n", FLAGS_num_shard_bits); + printf("Num shard bits : %d\n", + static_cast_with_check(cache_.get()) + ->GetNumShardBits()); printf("Max key : %" PRIu64 "\n", max_key_); printf("Resident ratio : %g\n", FLAGS_resident_ratio); printf("Skew degree : %u\n", FLAGS_skew); @@ -1032,6 +1072,7 @@ class StressCacheKey { }; int cache_bench_tool(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ParseCommandLineFlags(&argc, &argv, true); if (FLAGS_stress_cache_key) { diff --git a/include/rocksdb/memory_allocator.h b/include/rocksdb/memory_allocator.h index d126abfe6..dc744d7d1 100644 --- a/include/rocksdb/memory_allocator.h +++ b/include/rocksdb/memory_allocator.h @@ -81,7 +81,7 @@ struct JemallocAllocatorOptions { // The tcache normally incurs 0.5M extra memory usage per-thread. The usage // can be reduced by limiting allocation sizes to cache. extern Status NewJemallocNodumpAllocator( - JemallocAllocatorOptions& options, + const JemallocAllocatorOptions& options, std::shared_ptr* memory_allocator); } // namespace ROCKSDB_NAMESPACE diff --git a/memory/jemalloc_nodump_allocator.cc b/memory/jemalloc_nodump_allocator.cc index 02e1fad16..9bcd679ae 100644 --- a/memory/jemalloc_nodump_allocator.cc +++ b/memory/jemalloc_nodump_allocator.cc @@ -63,7 +63,7 @@ bool JemallocNodumpAllocator::IsSupported(std::string* why) { } JemallocNodumpAllocator::JemallocNodumpAllocator( - JemallocAllocatorOptions& options) + const JemallocAllocatorOptions& options) : options_(options) #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR , @@ -283,7 +283,7 @@ void JemallocNodumpAllocator::DestroyThreadSpecificCache(void* ptr) { #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR Status NewJemallocNodumpAllocator( - JemallocAllocatorOptions& options, + const JemallocAllocatorOptions& options, std::shared_ptr* memory_allocator) { if (memory_allocator == nullptr) { return Status::InvalidArgument("memory_allocator must be non-null."); diff --git a/memory/jemalloc_nodump_allocator.h b/memory/jemalloc_nodump_allocator.h index 2bdbaeb32..c7ab0d6d1 100644 --- a/memory/jemalloc_nodump_allocator.h +++ b/memory/jemalloc_nodump_allocator.h @@ -30,7 +30,7 @@ namespace ROCKSDB_NAMESPACE { // arena mutexes. class JemallocNodumpAllocator : public BaseMemoryAllocator { public: - explicit JemallocNodumpAllocator(JemallocAllocatorOptions& options); + explicit JemallocNodumpAllocator(const JemallocAllocatorOptions& options); #ifdef ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR ~JemallocNodumpAllocator(); #endif // ROCKSDB_JEMALLOC_NODUMP_ALLOCATOR From 6cbb10466368217a91d3f293d62f48a47862f8e5 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Fri, 25 Aug 2023 13:50:48 -0700 Subject: [PATCH 067/386] Fix seg fault in auto_readahead_size during IOError (#11761) Summary: Fix seg fault in auto_readahead_size ``` db_stress: internal_repo_rocksdb/repo/table/block_based/partitioned_index_iterator.h:70: virtual rocksdb::IndexValue rocksdb::PartitionedIndexIterator::value() const: Assertion `Valid()' failed. ``` During seek, after calculating readahead_size, db_stress can inject IOError resulting in failure to index_iter_->Seek and making index_iter_ invalid. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11761 Test Plan: Reproducible locally and passed with this fix Reviewed By: anand1976 Differential Revision: D48696248 Pulled By: akankshamahajan15 fbshipit-source-id: 2be43bf56ad0fc2f95f9093c19c9a1b15a716091 --- table/block_based/block_based_table_iterator.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 8c313ed61..ac47043f0 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -79,13 +79,20 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } } - if (read_options_.auto_readahead_size && read_options_.iterate_upper_bound) { + if (read_options_.auto_readahead_size && read_options_.iterate_upper_bound && + is_first_pass) { FindReadAheadSizeUpperBound(); if (target) { index_iter_->Seek(*target); } else { index_iter_->SeekToFirst(); } + + // Check for IO error. + if (!index_iter_->Valid()) { + ResetDataIter(); + return; + } } IndexValue v = index_iter_->value(); From 38e9e6903eccf9f13a8def6a28eb967a749dac41 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Fri, 25 Aug 2023 16:22:09 -0700 Subject: [PATCH 068/386] Del `(object)` from 200 inc instagram-server/distillery/slipstream/thrift_models/StoryFeedMediaSticker/ttypes.py Summary: Python3 makes the use of `(object)` in class inheritance unnecessary. Let's modernize our code by eliminating this. Reviewed By: itamaro Differential Revision: D48673915 fbshipit-source-id: a1a6ae8572271eb2898b748c8216ea68e362f06a --- buckifier/targets_builder.py | 2 +- buckifier/util.py | 2 +- build_tools/error_filter.py | 2 +- tools/block_cache_analyzer/block_cache_pysim.py | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/buckifier/targets_builder.py b/buckifier/targets_builder.py index 343b2207d..94dbd3653 100644 --- a/buckifier/targets_builder.py +++ b/buckifier/targets_builder.py @@ -23,7 +23,7 @@ def pretty_list(lst, indent=8): return res -class TARGETSBuilder(object): +class TARGETSBuilder: def __init__(self, path, extra_argv): self.path = path header = targets_cfg.rocksdb_target_header_template.format( diff --git a/buckifier/util.py b/buckifier/util.py index 8943fed2b..be197efd0 100644 --- a/buckifier/util.py +++ b/buckifier/util.py @@ -14,7 +14,7 @@ import time -class ColorString(object): +class ColorString: """Generate colorful strings on terminal""" HEADER = "\033[95m" diff --git a/build_tools/error_filter.py b/build_tools/error_filter.py index c42df1f91..d9cb1099c 100644 --- a/build_tools/error_filter.py +++ b/build_tools/error_filter.py @@ -15,7 +15,7 @@ import sys -class ErrorParserBase(object): +class ErrorParserBase: def parse_error(self, line): """Parses a line of test output. If it contains an error, returns a formatted message describing the error; otherwise, returns None. diff --git a/tools/block_cache_analyzer/block_cache_pysim.py b/tools/block_cache_analyzer/block_cache_pysim.py index 67307df53..3962f37eb 100644 --- a/tools/block_cache_analyzer/block_cache_pysim.py +++ b/tools/block_cache_analyzer/block_cache_pysim.py @@ -492,7 +492,7 @@ def write_policy_ratio_timeline( file.write(row + "\n") -class Policy(object): +class Policy: """ A policy maintains a set of evicted keys. It returns a reward of one to itself if it has not evicted a missing key. Otherwise, it gives itself 0 @@ -654,7 +654,7 @@ def policy_name(self): return "cc" -class Cache(object): +class Cache: """ This is the base class for the implementations of alternative cache replacement policies. @@ -1310,7 +1310,7 @@ def _should_admit(self, trace_record, key, hash, value_size): return True -class Deque(object): +class Deque: """A Deque class facilitates the implementation of LRU and ARC.""" def __init__(self): From ba597514309b686d8addb59616f067d5522186b7 Mon Sep 17 00:00:00 2001 From: Jan Date: Fri, 25 Aug 2023 18:01:14 -0700 Subject: [PATCH 069/386] remove an unused typedef (#11286) Summary: `VersionBuilderMap` type alias definition seem unused. If this PR can be compiled fine then the alias is probably not needed anymore. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11286 Reviewed By: jaykorean Differential Revision: D48656747 Pulled By: ajkr fbshipit-source-id: ac8554922aead7dc3d24fe7e6544a4622578c514 --- db/version_set.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/db/version_set.h b/db/version_set.h index f08f758c4..87c6eb514 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1526,9 +1526,6 @@ class VersionSet { } protected: - using VersionBuilderMap = - UnorderedMap>; - struct ManifestWriter; friend class Version; From 310a242c57aad2d52a89bdadea797fab4c135c0a Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 28 Aug 2023 13:36:25 -0700 Subject: [PATCH 070/386] Fix `GenericRateLimiter` hanging bug (#11763) Summary: Fixes https://github.com/facebook/rocksdb/issues/11742 Even after performing duty (1) ("Waiting for the next refill time"), it is possible the remaining threads are all in `Wait()`. Waking up at least one thread is enough to ensure progress continues, even if no new requests arrive. The repro unit test (https://github.com/facebook/rocksdb/commit/bb54245e6) is not included as it depends on an unlanded PR (https://github.com/facebook/rocksdb/issues/11753) Pull Request resolved: https://github.com/facebook/rocksdb/pull/11763 Reviewed By: jaykorean Differential Revision: D48710130 Pulled By: ajkr fbshipit-source-id: 9d166bd577ea3a96ccd81dde85871fec5e85a4eb --- .../fixed_generic_rate_limiter_hang.md | 1 + util/rate_limiter.cc | 20 +++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md diff --git a/unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md b/unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md new file mode 100644 index 000000000..8f789e186 --- /dev/null +++ b/unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md @@ -0,0 +1 @@ +Fixed a race condition in `GenericRateLimiter` that could cause it to stop granting requests diff --git a/util/rate_limiter.cc b/util/rate_limiter.cc index be54138d9..ddb9bdbf0 100644 --- a/util/rate_limiter.cc +++ b/util/rate_limiter.cc @@ -179,16 +179,16 @@ void GenericRateLimiter::Request(int64_t bytes, const Env::IOPriority pri, // Whichever thread reaches here first performs duty (2) as described // above. RefillBytesAndGrantRequestsLocked(); - if (r.request_bytes == 0) { - // If there is any remaining requests, make sure there exists at least - // one candidate is awake for future duties by signaling a front request - // of a queue. - for (int i = Env::IO_TOTAL - 1; i >= Env::IO_LOW; --i) { - std::deque queue = queue_[i]; - if (!queue.empty()) { - queue.front()->cv.Signal(); - break; - } + } + if (r.request_bytes == 0) { + // If there is any remaining requests, make sure there exists at least + // one candidate is awake for future duties by signaling a front request + // of a queue. + for (int i = Env::IO_TOTAL - 1; i >= Env::IO_LOW; --i) { + auto& queue = queue_[i]; + if (!queue.empty()) { + queue.front()->cv.Signal(); + break; } } } From f36394ff20b8cb3476deb19c141fae2651ebb641 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Mon, 28 Aug 2023 17:08:28 -0700 Subject: [PATCH 071/386] Fix seg fault in auto_readahead_size with async_io (#11769) Summary: Fix seg fault in auto_readahead_size with async_io when readahead_size = 0. If readahead_size is trimmed and is 0, it's not eligible for further prefetching and should return. Error occured when the first buffer already contains data and it goes for prefetching in second buffer leading to assertion failure - `assert(roundup_len1 >= alignment); ` because roundup_len1 = length + readahead_size. length is 0 and readahead_size is also 0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11769 Test Plan: Reproducible with db_stress with async_io enabled. Reviewed By: anand1976 Differential Revision: D48743031 Pulled By: akankshamahajan15 fbshipit-source-id: 0e08c41f862f6287ca223fbfaf6cd42fc97b3c87 --- file/file_prefetch_buffer.cc | 14 +++-- file/file_prefetch_buffer.h | 6 ++ file/prefetch_test.cc | 115 +++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 6 deletions(-) diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index d34a65c82..5c9c2797c 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -545,7 +545,9 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, assert(roundup_len1 >= chunk_len1); read_len1 = static_cast(roundup_len1 - chunk_len1); } - { + + // Prefetch in second buffer only if readahead_size_ > 0. + if (readahead_size_ > 0) { // offset and size alignment for second buffer for asynchronous // prefetching uint64_t rounddown_start2 = roundup_end1; @@ -733,7 +735,9 @@ bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( (bufs_[curr_].async_read_in_progress_ || offset + n > bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize())) { - if (readahead_size_ > 0) { + // In case readahead_size is trimmed (=0), we still want to poll the data + // submitted with explicit_prefetch_submitted_=true. + if (readahead_size_ > 0 || explicit_prefetch_submitted_) { Status s; assert(reader != nullptr); assert(max_readahead_size_ >= readahead_size_); @@ -825,14 +829,12 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, num_file_reads_ = 0; explicit_prefetch_submitted_ = false; bool is_eligible_for_prefetching = false; + + UpdateReadAheadSizeForUpperBound(offset, n); if (readahead_size_ > 0 && (!implicit_auto_readahead_ || num_file_reads_ >= num_file_reads_for_auto_readahead_)) { - UpdateReadAheadSizeForUpperBound(offset, n); - // After trim, readahead size can be 0. - if (readahead_size_ > 0) { is_eligible_for_prefetching = true; - } } // 1. Cancel any pending async read to make code simpler as buffers can be out diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index 334e32b6e..8c2e82476 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -389,6 +389,12 @@ class FilePrefetchBuffer { bufs_[second].offset_)) { return false; } + + // Readahead size can be 0 because of trimming. + if (readahead_size_ == 0) { + return false; + } + bufs_[second].buffer_.Clear(); return true; } diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 14c7c28a5..782751c82 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -2164,6 +2164,121 @@ TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { } } +// This test checks if readahead_size is trimmed when upper_bound is reached +// during Seek in async_io and it goes for polling without any extra +// prefetching. +TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBoundSeekOnly) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + + // First param is if the mockFS support_prefetch or not + std::shared_ptr fs = + std::make_shared(FileSystem::Default(), false); + + bool use_direct_io = false; + if (std::get<0>(GetParam())) { + use_direct_io = true; + } + + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + Options options; + SetGenericOptions(env.get(), use_direct_io, options); + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions table_options; + SetBlockBasedTableOptions(table_options); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + Status s = TryReopen(options); + if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { + // If direct IO is not supported, skip the test + return; + } else { + ASSERT_OK(s); + } + + Random rnd(309); + WriteBatch batch; + + for (int i = 0; i < 26; i++) { + std::string key = "my_key_"; + + for (int j = 0; j < 10; j++) { + key += char('a' + i); + ASSERT_OK(batch.Put(key, rnd.RandomString(1000))); + } + } + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + std::string start_key = "my_key_a"; + + std::string end_key = "my_key_"; + for (int j = 0; j < 10; j++) { + end_key += char('a' + 25); + } + + Slice least(start_key.data(), start_key.size()); + Slice greatest(end_key.data(), end_key.size()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); + + s = TryReopen(options); + ASSERT_OK(s); + + int buff_count_with_tuning = 0; + + SyncPoint::GetInstance()->SetCallBack( + "FilePrefetchBuffer::PrefetchAsyncInternal:Start", + [&](void*) { buff_count_with_tuning++; }); + + bool read_async_called = false; + SyncPoint::GetInstance()->SetCallBack( + "UpdateResults::io_uring_result", + [&](void* /*arg*/) { read_async_called = true; }); + + SyncPoint::GetInstance()->EnableProcessing(); + + SyncPoint::GetInstance()->EnableProcessing(); + + ReadOptions ropts; + if (std::get<1>(GetParam())) { + ropts.readahead_size = 32768; + } + ropts.async_io = true; + + Slice ub = Slice("my_key_aaa"); + ropts.iterate_upper_bound = &ub; + Slice seek_key = Slice("my_key_aaa"); + + // With tuning readahead_size. + { + ASSERT_OK(options.statistics->Reset()); + ropts.auto_readahead_size = true; + + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + + iter->Seek(seek_key); + + ASSERT_OK(iter->status()); + + // Verify results. + uint64_t readhahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + // Readahead got trimmed. + if (read_async_called) { + ASSERT_GT(readhahead_trimmed, 0); + // Seek called PrefetchAsync to poll the data. + ASSERT_EQ(1, buff_count_with_tuning); + } else { + // async_io disabled. + ASSERT_GE(readhahead_trimmed, 0); + ASSERT_EQ(0, buff_count_with_tuning); + } + } + Close(); +} + namespace { #ifdef GFLAGS const int kMaxArgCount = 100; From ecbeb305a07ea1745cb0670ab19fe37f221e4d2c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 29 Aug 2023 16:51:48 -0700 Subject: [PATCH 072/386] Removing some checks for UDT in memtable only feature (#11732) Summary: The user-defined timestamps feature only enforces that for the same key, user-defined timestamps should be non-decreasing. For the user-defined timestamps in memtable only feature, during flush, we check the user-defined timestamps in each memtable to examine if the data is considered expired with regard to `full_history_ts_low`. In this process, it's assuming that a newer memtable should not have smaller user-defined timestamps than an older memtable. This check however is enforcing ordering of user-defined timestamps across keys, as apposed to the vanilla UDT feature, that only enforce ordering of user-defined timestamps for the same key. This more strict user-defined timestamp ordering requirement could be an issue for secondary instances where commits can be out of order. And after thinking more about it, this requirement is really an overkill to keep the invariants of `full_history_ts_low` which are: 1) users cannot read below `full_history_ts_low` 2) users cannot write at or below `full_history_ts_low` 3) `full_history_ts_low` can only be increasing As long as RocksDB enforces these 3 checks, we can prohibit inconsistent read that returns a different value. And these three checks are covered in existing APIs. So this PR removes the extra checks in the UDT in memtable only feature that requires user-defined timestamps to be non decreasing across keys. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11732 Reviewed By: ltamasi Differential Revision: D48541466 Pulled By: jowlyzhang fbshipit-source-id: 95453c6e391cbd511c0feab05f0b11c312d17186 --- db/column_family.cc | 13 ++----------- db/flush_job.cc | 14 +++++++------- include/rocksdb/advanced_options.h | 3 --- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 185ec729c..7578b7ec7 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1557,24 +1557,15 @@ bool ColumnFamilyData::ShouldPostponeFlushToRetainUDT( if (full_history_ts_low.empty()) { return false; } -#ifndef NDEBUG - Slice last_table_newest_udt; -#endif /* !NDEBUG */ for (const Slice& table_newest_udt : imm()->GetTablesNewestUDT(max_memtable_id)) { assert(table_newest_udt.size() == full_history_ts_low.size()); - assert(last_table_newest_udt.empty() || - ucmp->CompareTimestamp(table_newest_udt, last_table_newest_udt) >= - 0); // Checking the newest UDT contained in MemTable with ascending ID up to - // `max_memtable_id`. MemTable with bigger ID will have newer UDT, return - // immediately on finding the first MemTable that needs postponing. + // `max_memtable_id`. Return immediately on finding the first MemTable that + // needs postponing. if (ucmp->CompareTimestamp(table_newest_udt, full_history_ts_low) >= 0) { return true; } -#ifndef NDEBUG - last_table_newest_udt = table_newest_udt; -#endif /* !NDEBUG */ } return false; } diff --git a/db/flush_job.cc b/db/flush_job.cc index d3a777b44..451e61937 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -1122,16 +1122,16 @@ void FlushJob::GetEffectiveCutoffUDTForPickedMemTables() { cfd_->ioptions()->persist_user_defined_timestamps) { return; } + // Find the newest user-defined timestamps from all the flushed memtables. for (MemTable* m : mems_) { Slice table_newest_udt = m->GetNewestUDT(); - // The picked Memtables should have ascending ID, and should have - // non-decreasing newest user-defined timestamps. - if (!cutoff_udt_.empty()) { - assert(table_newest_udt.size() == cutoff_udt_.size()); - assert(ucmp->CompareTimestamp(table_newest_udt, cutoff_udt_) >= 0); - cutoff_udt_.clear(); + if (cutoff_udt_.empty() || + ucmp->CompareTimestamp(table_newest_udt, cutoff_udt_) > 0) { + if (!cutoff_udt_.empty()) { + assert(table_newest_udt.size() == cutoff_udt_.size()); + } + cutoff_udt_.assign(table_newest_udt.data(), table_newest_udt.size()); } - cutoff_udt_.assign(table_newest_udt.data(), table_newest_udt.size()); } } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index f22ade185..18eb02081 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -1181,9 +1181,6 @@ struct AdvancedColumnFamilyOptions { // refrains from flushing a memtable with data still above // the cutoff timestamp with best effort. If this cutoff timestamp is not set, // flushing continues normally. - // NOTE: in order for the cutoff timestamp to work properly, users of this - // feature need to ensure to write to a column family with globally - // non-decreasing user-defined timestamps. // // Users can do user-defined // multi-versioned read above the cutoff timestamp. When users try to read From 0b8b17a9d15d6d65f6483aa692284e51a244865f Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 29 Aug 2023 18:34:59 -0700 Subject: [PATCH 073/386] avoid find() -> insert() sequence (#11743) Summary: when a key is recorded for locking in a pessimistic transaction, the key is first looked up in a map, and then inserted into the map if it was not already contained. this can be simplified to an unconditional insert. in the ideal case that all keys are unique, this saves all the find() operations. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11743 Reviewed By: anand1976 Differential Revision: D48656798 Pulled By: ajkr fbshipit-source-id: d0150de2db757e0c05e1797cfc24380790c71276 --- utilities/transactions/pessimistic_transaction.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index 1771497a6..2986fae8d 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -888,14 +888,8 @@ Status PessimisticTransaction::LockBatch(WriteBatch* batch, Handler() {} void RecordKey(uint32_t column_family_id, const Slice& key) { - std::string key_str = key.ToString(); - auto& cfh_keys = keys_[column_family_id]; - auto iter = cfh_keys.find(key_str); - if (iter == cfh_keys.end()) { - // key not yet seen, store it. - cfh_keys.insert({std::move(key_str)}); - } + cfh_keys.insert(key.ToString()); } Status PutCF(uint32_t column_family_id, const Slice& key, From e373685dab1b0716a8ed7532c80ad2e4d05e7590 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 29 Aug 2023 18:39:10 -0700 Subject: [PATCH 074/386] Add SystemClock::TimedWait() function (#11753) Summary: Having a synthetic implementation of `TimedWait()` in `SystemClock` will allow us to add `SyncPoint`s while mutex is released, which was previously impossible since the lock was released and reacquired all within `pthread_cond_timedwait()`. Additionally, integrating `TimedWait()` with `MockSystemClock` allows us to cleanup some workarounds in the test code. In this PR I only cleaned up the `GenericRateLimiter` test workaround. This is related to the intended follow-up mentioned in https://github.com/facebook/rocksdb/issues/7101's description. There are a couple differences: (1) This PR does not include removing the particular workaround that initially motivated it. Actually, the `Timer` class uses `InstrumentedCondVar`, so the interface introduced here is inadequate to remove that workaround. On the bright side, the interface introduced in this PR can be changed as needed since it can neither be used nor extended externally, due to using forward-declared `port::CondVar*` in the interface. (2) This PR only makes the change in `SystemClock` not `Env`. Older revisions of this PR included `Env::TimedWait()` and `SpecialEnv::TimedWait()`; however, since they were unused it probably makes sense to defer adding them until when they are needed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11753 Reviewed By: pdillinger Differential Revision: D48654995 Pulled By: ajkr fbshipit-source-id: 15e19f2454b64d4ec7f50e328691c66ca9911122 --- env/env.cc | 5 +++++ include/rocksdb/port_defs.h | 4 ++++ include/rocksdb/system_clock.h | 15 +++++++++++++++ port/port_posix.h | 3 +++ port/win/port_win.h | 3 +++ test_util/mock_time_env.h | 27 +++++++++++++++++++++++++++ util/rate_limiter.cc | 2 +- util/rate_limiter_test.cc | 25 ++++++------------------- 8 files changed, 64 insertions(+), 20 deletions(-) diff --git a/env/env.cc b/env/env.cc index 937be43c0..40493b478 100644 --- a/env/env.cc +++ b/env/env.cc @@ -1229,4 +1229,9 @@ Status SystemClock::CreateFromString(const ConfigOptions& config_options, return LoadSharedObject(config_options, value, result); } } + +bool SystemClock::TimedWait(port::CondVar* cv, + std::chrono::microseconds deadline) { + return cv->TimedWait(deadline.count()); +} } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/port_defs.h b/include/rocksdb/port_defs.h index 9771aacb9..68f1d61d2 100644 --- a/include/rocksdb/port_defs.h +++ b/include/rocksdb/port_defs.h @@ -12,6 +12,10 @@ namespace ROCKSDB_NAMESPACE { +namespace port { +class CondVar; +} + enum class CpuPriority { kIdle = 0, kLow = 1, diff --git a/include/rocksdb/system_clock.h b/include/rocksdb/system_clock.h index 7ca92e54e..c4cfcecb5 100644 --- a/include/rocksdb/system_clock.h +++ b/include/rocksdb/system_clock.h @@ -9,9 +9,11 @@ #pragma once #include +#include #include #include "rocksdb/customizable.h" +#include "rocksdb/port_defs.h" #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/status.h" @@ -68,6 +70,14 @@ class SystemClock : public Customizable { // Sleep/delay the thread for the prescribed number of micro-seconds. virtual void SleepForMicroseconds(int micros) = 0; + // For internal use/extension only. + // + // Issues a wait on `cv` that times out at `deadline`. May wakeup and return + // spuriously. + // + // Returns true if wait timed out, false otherwise + virtual bool TimedWait(port::CondVar* cv, std::chrono::microseconds deadline); + // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC). // Only overwrites *unix_time on success. virtual Status GetCurrentTime(int64_t* unix_time) = 0; @@ -94,6 +104,11 @@ class SystemClockWrapper : public SystemClock { return target_->SleepForMicroseconds(micros); } + virtual bool TimedWait(port::CondVar* cv, + std::chrono::microseconds deadline) override { + return target_->TimedWait(cv, deadline); + } + Status GetCurrentTime(int64_t* unix_time) override { return target_->GetCurrentTime(unix_time); } diff --git a/port/port_posix.h b/port/port_posix.h index cdb256a6d..e49818604 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -149,6 +149,9 @@ class CondVar { public: explicit CondVar(Mutex* mu); ~CondVar(); + + Mutex* GetMutex() const { return mu_; } + void Wait(); // Timed condition wait. Returns true if timeout occurred. bool TimedWait(uint64_t abs_time_us); diff --git a/port/win/port_win.h b/port/win/port_win.h index 4d9883b63..621f05370 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -170,6 +170,9 @@ class CondVar { explicit CondVar(Mutex* mu) : mu_(mu) {} ~CondVar(); + + Mutex* GetMutex() const { return mu_; } + void Wait(); bool TimedWait(uint64_t expiration_time); void Signal(); diff --git a/test_util/mock_time_env.h b/test_util/mock_time_env.h index 7834368e0..e11bed0d1 100644 --- a/test_util/mock_time_env.h +++ b/test_util/mock_time_env.h @@ -8,7 +8,10 @@ #include #include +#include "port/port.h" #include "rocksdb/system_clock.h" +#include "test_util/mock_time_env.h" +#include "util/random.h" namespace ROCKSDB_NAMESPACE { @@ -65,6 +68,30 @@ class MockSystemClock : public SystemClockWrapper { current_time_us_.fetch_add(micros); } + virtual bool TimedWait(port::CondVar* cv, + std::chrono::microseconds deadline) override { + uint64_t now_micros = NowMicros(); + uint64_t deadline_micros = static_cast(deadline.count()); + uint64_t delay_micros; + if (deadline_micros > now_micros) { + delay_micros = deadline_micros - now_micros; + } else { + delay_micros = 0; + } + // To prevent slowdown, this `TimedWait()` is completely synthetic. First, + // it yields to coerce other threads to run while the lock is released. + // Second, it randomly selects between mocking an immediate wakeup and a + // timeout. + cv->GetMutex()->Unlock(); + std::this_thread::yield(); + bool mock_timeout = Random::GetTLSInstance()->OneIn(2); + if (mock_timeout) { + current_time_us_.fetch_add(delay_micros); + } + cv->GetMutex()->Lock(); + return mock_timeout; + } + // TODO: this is a workaround for the different behavior on different platform // for timedwait timeout. Ideally timedwait API should be moved to env. // details: PR #7101. diff --git a/util/rate_limiter.cc b/util/rate_limiter.cc index ddb9bdbf0..12eef1311 100644 --- a/util/rate_limiter.cc +++ b/util/rate_limiter.cc @@ -170,7 +170,7 @@ void GenericRateLimiter::Request(int64_t bytes, const Env::IOPriority pri, RecordTick(stats, NUMBER_RATE_LIMITER_DRAINS); ++num_drains_; wait_until_refill_pending_ = true; - r.cv.TimedWait(wait_until); + clock_->TimedWait(&r.cv, std::chrono::microseconds(wait_until)); TEST_SYNC_POINT_CALLBACK("GenericRateLimiter::Request:PostTimedWait", &time_until_refill_us); wait_until_refill_pending_ = false; diff --git a/util/rate_limiter_test.cc b/util/rate_limiter_test.cc index 7df2bb04f..dfaa3a2cd 100644 --- a/util/rate_limiter_test.cc +++ b/util/rate_limiter_test.cc @@ -15,6 +15,7 @@ #include "db/db_test_util.h" #include "port/port.h" #include "rocksdb/system_clock.h" +#include "test_util/mock_time_env.h" #include "test_util/sync_point.h" #include "test_util/testharness.h" #include "util/random.h" @@ -464,31 +465,21 @@ TEST_F(RateLimiterTest, AutoTuneIncreaseWhenFull) { const std::chrono::seconds kTimePerRefill(1); const int kRefillsPerTune = 100; // needs to match util/rate_limiter.cc - SpecialEnv special_env(Env::Default(), /*time_elapse_only_sleep*/ true); + auto mock_clock = + std::make_shared(Env::Default()->GetSystemClock()); auto stats = CreateDBStatistics(); std::unique_ptr rate_limiter(new GenericRateLimiter( 1000 /* rate_bytes_per_sec */, std::chrono::microseconds(kTimePerRefill).count(), 10 /* fairness */, - RateLimiter::Mode::kWritesOnly, special_env.GetSystemClock(), - true /* auto_tuned */)); - - // Rate limiter uses `CondVar::TimedWait()`, which does not have access to the - // `Env` to advance its time according to the fake wait duration. The - // workaround is to install a callback that advance the `Env`'s mock time. - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "GenericRateLimiter::Request:PostTimedWait", [&](void* arg) { - int64_t time_waited_us = *static_cast(arg); - special_env.SleepForMicroseconds(static_cast(time_waited_us)); - }); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + RateLimiter::Mode::kWritesOnly, mock_clock, true /* auto_tuned */)); // verify rate limit increases after a sequence of periods where rate limiter // is always drained int64_t orig_bytes_per_sec = rate_limiter->GetSingleBurstBytes(); rate_limiter->Request(orig_bytes_per_sec, Env::IO_HIGH, stats.get(), RateLimiter::OpType::kWrite); - while (std::chrono::microseconds(special_env.NowMicros()) <= + while (std::chrono::microseconds(mock_clock->NowMicros()) <= kRefillsPerTune * kTimePerRefill) { rate_limiter->Request(orig_bytes_per_sec, Env::IO_HIGH, stats.get(), RateLimiter::OpType::kWrite); @@ -496,13 +487,9 @@ TEST_F(RateLimiterTest, AutoTuneIncreaseWhenFull) { int64_t new_bytes_per_sec = rate_limiter->GetSingleBurstBytes(); ASSERT_GT(new_bytes_per_sec, orig_bytes_per_sec); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack( - "GenericRateLimiter::Request:PostTimedWait"); - // decreases after a sequence of periods where rate limiter is not drained orig_bytes_per_sec = new_bytes_per_sec; - special_env.SleepForMicroseconds(static_cast( + mock_clock->SleepForMicroseconds(static_cast( kRefillsPerTune * std::chrono::microseconds(kTimePerRefill).count())); // make a request so tuner can be triggered rate_limiter->Request(1 /* bytes */, Env::IO_HIGH, stats.get(), From c1e6ffc40a2898e6b97ecdb4528f523c3debc875 Mon Sep 17 00:00:00 2001 From: jsteemann Date: Tue, 29 Aug 2023 18:40:13 -0700 Subject: [PATCH 075/386] remove a sub-condition that is always true (#11746) Summary: the value of `done` is always false here, so the sub-condition `!done` will always be true and the check can be removed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11746 Reviewed By: anand1976 Differential Revision: D48656845 Pulled By: ajkr fbshipit-source-id: 523ba3d07b3af7880c8c8ccb20442fd7c0f49417 --- db/memtable_list.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/memtable_list.cc b/db/memtable_list.cc index ee1563f01..b665c19b8 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -184,7 +184,7 @@ bool MemTableListVersion::GetFromList( assert(*seq != kMaxSequenceNumber || s->IsNotFound()); return true; } - if (!done && !s->ok() && !s->IsMergeInProgress() && !s->IsNotFound()) { + if (!s->ok() && !s->IsMergeInProgress() && !s->IsNotFound()) { return false; } } From 4234a6a301f5a5e25e8bf20629375ca3d4996309 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 30 Aug 2023 09:34:31 -0700 Subject: [PATCH 076/386] Increase full_history_ts_low when flush happens during recovery (#11774) Summary: This PR adds a missing piece for the UDT in memtable only feature, which is to automatically increase `full_history_ts_low` when flush happens during recovery. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11774 Test Plan: Added unit test make all check Reviewed By: ltamasi Differential Revision: D48799109 Pulled By: jowlyzhang fbshipit-source-id: fd681ed66d9d40904ca2c919b2618eb692686035 --- db/db_impl/db_impl.cc | 3 ++- db/db_impl/db_impl_open.cc | 16 ++++++++++++++++ db/db_wal_test.cc | 11 +++++++++++ db/flush_job.cc | 9 +++------ include/rocksdb/advanced_options.h | 4 +++- util/udt_util.cc | 8 ++++++++ util/udt_util.h | 8 ++++++++ util/udt_util_test.cc | 14 ++++++++++++++ 8 files changed, 65 insertions(+), 8 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 93681c70b..8bda80bf4 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1693,7 +1693,8 @@ Status DBImpl::GetFullHistoryTsLow(ColumnFamilyHandle* column_family, } InstrumentedMutexLock l(&mutex_); *ts_low = cfd->GetFullHistoryTsLow(); - assert(cfd->user_comparator()->timestamp_size() == ts_low->size()); + assert(ts_low->empty() || + cfd->user_comparator()->timestamp_size() == ts_low->size()); return Status::OK(); } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index e6d97b125..8db53dac9 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1722,6 +1722,22 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, for (const auto& blob : blob_file_additions) { edit->AddBlobFile(blob); } + + // For UDT in memtable only feature, move up the cutoff timestamp whenever + // a flush happens. + const Comparator* ucmp = cfd->user_comparator(); + size_t ts_sz = ucmp->timestamp_size(); + if (ts_sz > 0 && !cfd->ioptions()->persist_user_defined_timestamps) { + Slice mem_newest_udt = mem->GetNewestUDT(); + std::string full_history_ts_low = cfd->GetFullHistoryTsLow(); + if (full_history_ts_low.empty() || + ucmp->CompareTimestamp(mem_newest_udt, full_history_ts_low) >= 0) { + std::string new_full_history_ts_low; + GetFullHistoryTsLowFromU64CutoffTs(&mem_newest_udt, + &new_full_history_ts_low); + edit->SetFullHistoryTsLow(new_full_history_ts_low); + } + } } InternalStats::CompactionStats stats(CompactionReason::kFlush, 1); diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 72b6f7c7b..01dc84a0f 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -14,6 +14,7 @@ #include "port/stack_trace.h" #include "rocksdb/file_system.h" #include "test_util/sync_point.h" +#include "util/udt_util.h" #include "utilities/fault_injection_env.h" #include "utilities/fault_injection_fs.h" @@ -384,6 +385,7 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { ts_options.persist_user_defined_timestamps = persist_udt; bool avoid_flush_during_recovery = true; + std::string full_history_ts_low; ReadOptions read_opts; do { Slice ts_slice = ts1; @@ -439,6 +441,8 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndNoFlush) { CheckGet(read_opts, 1, "foo", "v4", ts3); CheckGet(read_opts, 1, "bar", "v2", ts2); CheckGet(read_opts, 1, "baz", "v5", ts1); + ASSERT_OK(db_->GetFullHistoryTsLow(handles_[1], &full_history_ts_low)); + ASSERT_TRUE(full_history_ts_low.empty()); } while (ChangeWalOptions()); } @@ -470,6 +474,8 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { std::vector> level_to_files; dbfull()->TEST_GetFilesMetaData(handles_[1], &level_to_files); + std::string full_history_ts_low; + ASSERT_OK(db_->GetFullHistoryTsLow(handles_[1], &full_history_ts_low)); ASSERT_GT(level_to_files.size(), 1); // L0 only has one SST file. ASSERT_EQ(level_to_files[0].size(), 1); @@ -477,9 +483,14 @@ TEST_P(DBWALTestWithTimestamp, RecoverAndFlush) { if (persist_udt) { ASSERT_EQ(smallest_ukey_without_ts + write_ts, meta.smallest.user_key()); ASSERT_EQ(largest_ukey_without_ts + write_ts, meta.largest.user_key()); + ASSERT_TRUE(full_history_ts_low.empty()); } else { ASSERT_EQ(smallest_ukey_without_ts + min_ts, meta.smallest.user_key()); ASSERT_EQ(largest_ukey_without_ts + min_ts, meta.largest.user_key()); + std::string effective_cutoff; + Slice write_ts_slice = write_ts; + GetFullHistoryTsLowFromU64CutoffTs(&write_ts_slice, &effective_cutoff); + ASSERT_EQ(effective_cutoff, full_history_ts_low); } } diff --git a/db/flush_job.cc b/db/flush_job.cc index 451e61937..0e6c66cac 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -1147,16 +1147,13 @@ Status FlushJob::MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT() { ucmp->CompareTimestamp(cutoff_udt_, full_history_ts_low) < 0)) { return Status::OK(); } - Slice cutoff_udt_slice = cutoff_udt_; - uint64_t cutoff_udt_ts = 0; - bool format_res = GetFixed64(&cutoff_udt_slice, &cutoff_udt_ts); - assert(format_res); - (void)format_res; std::string new_full_history_ts_low; + Slice cutoff_udt_slice = cutoff_udt_; // TODO(yuzhangyu): Add a member to AdvancedColumnFamilyOptions for an // operation to get the next immediately larger user-defined timestamp to // expand this feature to other user-defined timestamp formats. - PutFixed64(&new_full_history_ts_low, cutoff_udt_ts + 1); + GetFullHistoryTsLowFromU64CutoffTs(&cutoff_udt_slice, + &new_full_history_ts_low); VersionEdit edit; edit.SetColumnFamily(cfd_->GetID()); edit.SetFullHistoryTsLow(new_full_history_ts_low); diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 18eb02081..08e4e08b7 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -1190,7 +1190,9 @@ struct AdvancedColumnFamilyOptions { // persisted to WAL even if this flag is set to `false`. The benefit of this // is that user-defined timestamps can be recovered with the caveat that users // should flush all memtables so there is no active WAL files before doing a - // downgrade. + // downgrade. In order to use WAL to recover user-defined timestamps, users of + // this feature would want to set both `avoid_flush_during_shutdown` and + // `avoid_flush_during_recovery` to be true. // // Note that setting this flag to false is not supported in combination with // atomic flush, or concurrent memtable write enabled by diff --git a/util/udt_util.cc b/util/udt_util.cc index 9380f4560..7d549acab 100644 --- a/util/udt_util.cc +++ b/util/udt_util.cc @@ -8,6 +8,7 @@ #include "db/dbformat.h" #include "rocksdb/types.h" +#include "util/coding.h" #include "util/write_batch_util.h" namespace ROCKSDB_NAMESPACE { @@ -340,4 +341,11 @@ Status ValidateUserDefinedTimestampsOptions( return Status::InvalidArgument( "Unsupported user defined timestamps settings change."); } + +void GetFullHistoryTsLowFromU64CutoffTs(Slice* cutoff_ts, + std::string* full_history_ts_low) { + uint64_t cutoff_udt_ts = 0; + [[maybe_unused]] bool format_res = GetFixed64(cutoff_ts, &cutoff_udt_ts); + PutFixed64(full_history_ts_low, cutoff_udt_ts + 1); +} } // namespace ROCKSDB_NAMESPACE diff --git a/util/udt_util.h b/util/udt_util.h index 4bc837739..706b02e90 100644 --- a/util/udt_util.h +++ b/util/udt_util.h @@ -246,4 +246,12 @@ Status ValidateUserDefinedTimestampsOptions( const Comparator* new_comparator, const std::string& old_comparator_name, bool new_persist_udt, bool old_persist_udt, bool* mark_sst_files_has_no_udt); + +// Given a cutoff user-defined timestamp formatted as uint64_t, get the +// effective `full_history_ts_low` timestamp, which is the next immediately +// bigger timestamp. Used by the UDT in memtable only feature when flushing +// memtables and remove timestamps. This process collapses history and increase +// the effective `full_history_ts_low`. +void GetFullHistoryTsLowFromU64CutoffTs(Slice* cutoff_ts, + std::string* full_history_ts_low); } // namespace ROCKSDB_NAMESPACE diff --git a/util/udt_util_test.cc b/util/udt_util_test.cc index 47e1edf34..44ee567f7 100644 --- a/util/udt_util_test.cc +++ b/util/udt_util_test.cc @@ -438,6 +438,20 @@ TEST(ValidateUserDefinedTimestampsOptionsTest, InvalidUserComparatorChange) { &mark_sst_files) .IsInvalidArgument()); } + +TEST(GetFullHistoryTsLowFromU64CutoffTsTest, Success) { + std::string cutoff_ts; + uint64_t cutoff_ts_int = 3; + PutFixed64(&cutoff_ts, 3); + Slice cutoff_ts_slice = cutoff_ts; + std::string actual_full_history_ts_low; + GetFullHistoryTsLowFromU64CutoffTs(&cutoff_ts_slice, + &actual_full_history_ts_low); + + std::string expected_ts_low; + PutFixed64(&expected_ts_low, cutoff_ts_int + 1); + ASSERT_EQ(expected_ts_low, actual_full_history_ts_low); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From c073c2edde297793619649f38ca46ccc51fdd916 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 30 Aug 2023 12:29:04 -0700 Subject: [PATCH 077/386] =?UTF-8?q?Revert=20"Clarify=20comment=20about=20c?= =?UTF-8?q?ompaction=5Freadahead=5Fsize's=20sanitizatio=E2=80=A6=20(#11773?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: …n change (https://github.com/facebook/rocksdb/issues/11755)" This reverts commit 451316597f55a07ca7920e2287fbed4b421b2fc5. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11773 Reviewed By: ajkr Differential Revision: D48832320 Pulled By: hx235 fbshipit-source-id: 96cef26a885134360766a83505f6717598eac6a9 --- HISTORY.md | 1 - include/rocksdb/options.h | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 50ec905ca..27a248b4e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -22,7 +22,6 @@ ### Behavior Changes * Statistics `rocksdb.sst.read.micros` now includes time spent on multi read and async read into the file * For Universal Compaction users, periodic compaction (option `periodic_compaction_seconds`) will be set to 30 days by default if block based table is used. -* `Options::compaction_readahead_size` will be sanitized to 2MB when set to 0 under non-direct IO since we have moved prefetching responsibility to page cache for compaction read with readhead size equal to `Options::compaction_readahead_size` under non-direct IO (#11631) ### Bug Fixes * Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." (#11673). diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 4dee0c952..8f62c2dfb 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -951,13 +951,10 @@ struct DBOptions { enum AccessHint { NONE, NORMAL, SEQUENTIAL, WILLNEED }; AccessHint access_hint_on_compaction_start = NORMAL; - // The size RocksDB uses to perform readahead during compaction read. - // If set zero, RocksDB will sanitize it to be 2MB during db open. - // If you're + // If non-zero, we perform bigger reads when doing compaction. If you're // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // - // // Default: 0 // // Dynamically changeable through SetDBOptions() API. From ea9a5b2914e09b9105436e7d6f932bc4434d02c5 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Wed, 30 Aug 2023 12:45:52 -0700 Subject: [PATCH 078/386] Wide Column support in ldb (#11754) Summary: wide_columns can now be pretty-printed in the following commands - `./ldb dump_wal` - `./ldb dump` - `./ldb idump` - `./ldb dump_live_files` - `./ldb scan` - `./sst_dump --command=scan` There are opportunities to refactor to reduce some nearly identical code. This PR is initial change to add wide column support in `ldb` and `sst_dump` tool. More PRs to come for the refactor. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11754 Test Plan: **New Tests added** - `WideColumnsHelperTest::DumpWideColumns` - `WideColumnsHelperTest::DumpSliceAsWideColumns` **Changes added to existing tests** - `ExternalSSTFileTest::BasicMixed` added to cover mixed case (This test should have been added in https://github.com/facebook/rocksdb/issues/11688). This test does not verify the ldb or sst_dump output. This test was used to create test SST files having some rows with wide columns and some without and the generated SST files were used to manually test sst_dump_tool. - `createSST()` in `sst_dump_test` now takes `wide_column_one_in` to add wide column value in SST **dump_wal** ``` ./ldb dump_wal --walfile=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/000004.log --print_value --header ``` ``` Sequence,Count,ByteSize,Physical Offset,Key(s) : value 1,1,59,0,PUT_ENTITY(0) : 0x:0x68656C6C6F 0x617474725F6E616D6531:0x666F6F 0x617474725F6E616D6532:0x626172 2,1,34,42,PUT_ENTITY(0) : 0x617474725F6F6E65:0x74776F 0x617474725F7468726565:0x666F7572 3,1,17,7d,PUT(0) : 0x7468697264 : 0x62617A ``` **idump** ``` ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ idump ``` ``` 'first' seq:1, type:22 => :hello attr_name1:foo attr_name2:bar 'second' seq:2, type:22 => attr_one:two attr_three:four 'third' seq:3, type:1 => baz Internal keys in range: 3 ``` **SST Dump from dump_live_files** ``` ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ compact ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ dump_live_files ``` ``` ... ============================== SST Files ============================== /tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst level:1 ------------------------------ Process /tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst Sst file format: block-based 'first' seq:0, type:22 => :hello attr_name1:foo attr_name2:bar 'second' seq:0, type:22 => attr_one:two attr_three:four 'third' seq:0, type:1 => baz ... ``` **dump** ``` ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ dump ``` ``` first ==> :hello attr_name1:foo attr_name2:bar second ==> attr_one:two attr_three:four third ==> baz Keys in range: 3 ``` **scan** ``` ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/ scan ``` ``` first : :hello attr_name1:foo attr_name2:bar second : attr_one:two attr_three:four third : baz ``` **sst_dump** ``` ./sst_dump --file=/tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst --command=scan ``` ``` options.env is 0x7ff54b296000 Process /tmp/rocksdbtest-226125/db_wide_basic_test_3481961_2308393776696827948/000013.sst Sst file format: block-based from [] to [] 'first' seq:0, type:22 => :hello attr_name1:foo attr_name2:bar 'second' seq:0, type:22 => attr_one:two attr_three:four 'third' seq:0, type:1 => baz ``` Reviewed By: ltamasi Differential Revision: D48837999 Pulled By: jaykorean fbshipit-source-id: b0280f0589d2b9716bb9b50530ffcabb397d140f --- CMakeLists.txt | 2 + Makefile | 3 ++ TARGETS | 7 +++ db/external_sst_file_test.cc | 57 ++++++++++++++++++++++ db/wide/wide_columns_helper.cc | 36 ++++++++++++++ db/wide/wide_columns_helper.h | 21 ++++++++ db/wide/wide_columns_helper_test.cc | 39 +++++++++++++++ db_stress_tool/db_stress_common.h | 9 +--- include/rocksdb/wide_columns.h | 15 +++++- src.mk | 2 + table/sst_file_dumper.cc | 22 +++++++-- tools/ldb_cmd.cc | 76 ++++++++++++++++++++++++++--- tools/sst_dump_test.cc | 43 ++++++++++++---- 13 files changed, 303 insertions(+), 29 deletions(-) create mode 100644 db/wide/wide_columns_helper.cc create mode 100644 db/wide/wide_columns_helper.h create mode 100644 db/wide/wide_columns_helper_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e30f6631..2be25e2c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -714,6 +714,7 @@ set(SOURCES db/wal_manager.cc db/wide/wide_column_serialization.cc db/wide/wide_columns.cc + db/wide/wide_columns_helper.cc db/write_batch.cc db/write_batch_base.cc db/write_controller.cc @@ -1357,6 +1358,7 @@ if(WITH_TESTS) db/wal_edit_test.cc db/wide/db_wide_basic_test.cc db/wide/wide_column_serialization_test.cc + db/wide/wide_columns_helper_test.cc db/write_batch_test.cc db/write_callback_test.cc db/write_controller_test.cc diff --git a/Makefile b/Makefile index 41ca0ee28..08ad7e48a 100644 --- a/Makefile +++ b/Makefile @@ -1984,6 +1984,9 @@ cache_reservation_manager_test: $(OBJ_DIR)/cache/cache_reservation_manager_test. wide_column_serialization_test: $(OBJ_DIR)/db/wide/wide_column_serialization_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +wide_columns_helper_test: $(OBJ_DIR)/db/wide/wide_columns_helper_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + #------------------------------------------------- # make install related stuff PREFIX ?= /usr/local diff --git a/TARGETS b/TARGETS index 5125fcf54..ad0da6100 100644 --- a/TARGETS +++ b/TARGETS @@ -103,6 +103,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "db/wal_manager.cc", "db/wide/wide_column_serialization.cc", "db/wide/wide_columns.cc", + "db/wide/wide_columns_helper.cc", "db/write_batch.cc", "db/write_batch_base.cc", "db/write_controller.cc", @@ -5552,6 +5553,12 @@ cpp_unittest_wrapper(name="wide_column_serialization_test", extra_compiler_flags=[]) +cpp_unittest_wrapper(name="wide_columns_helper_test", + srcs=["db/wide/wide_columns_helper_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) + + cpp_unittest_wrapper(name="work_queue_test", srcs=["util/work_queue_test.cc"], deps=[":rocksdb_test_lib"], diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index 4507b16c2..9a0b7e659 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -588,6 +588,63 @@ TEST_F(ExternalSSTFileTest, BasicWideColumn) { kRangeDelSkipConfigs)); } +TEST_F(ExternalSSTFileTest, BasicMixed) { + do { + Options options = CurrentOptions(); + + SstFileWriter sst_file_writer(EnvOptions(), options); + + // Current file size should be 0 after sst_file_writer init and before open + // a file. + ASSERT_EQ(sst_file_writer.FileSize(), 0); + + std::string file = sst_files_dir_ + "mixed_file.sst"; + ASSERT_OK(sst_file_writer.Open(file)); + for (int k = 0; k < 100; k++) { + if (k % 5 == 0) { + std::string val1 = Key(k) + "_attr_1_val"; + std::string val2 = Key(k) + "_attr_2_val"; + WideColumns columns{{"attr_1", val1}, {"attr_2", val2}}; + ASSERT_OK(sst_file_writer.PutEntity(Key(k), columns)); + } else { + ASSERT_OK(sst_file_writer.Put(Key(k), Key(k) + "_val")); + } + } + ExternalSstFileInfo file_info; + ASSERT_OK(sst_file_writer.Finish(&file_info)); + + // Current file size should be non-zero after success write. + ASSERT_GT(sst_file_writer.FileSize(), 0); + + ASSERT_EQ(file_info.file_path, file); + ASSERT_EQ(file_info.num_entries, 100); + ASSERT_EQ(file_info.smallest_key, Key(0)); + ASSERT_EQ(file_info.largest_key, Key(99)); + ASSERT_EQ(file_info.num_range_del_entries, 0); + ASSERT_EQ(file_info.smallest_range_del_key, ""); + ASSERT_EQ(file_info.largest_range_del_key, ""); + + DestroyAndReopen(options); + // Add file using file path + ASSERT_OK(DeprecatedAddFile({file})); + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); + for (int k = 0; k < 10; k++) { + if (k % 5 == 0) { + PinnableWideColumns result; + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), + Key(k), &result)); + std::string val1 = Key(k) + "_attr_1_val"; + std::string val2 = Key(k) + "_attr_2_val"; + WideColumns expected_columns{{"attr_1", val1}, {"attr_2", val2}}; + ASSERT_EQ(result.columns(), expected_columns); + } else { + ASSERT_EQ(Get(Key(k)), Key(k) + "_val"); + } + } + } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction | + kRangeDelSkipConfigs)); +} + class SstFileWriterCollector : public TablePropertiesCollector { public: explicit SstFileWriterCollector(const std::string prefix) : prefix_(prefix) { diff --git a/db/wide/wide_columns_helper.cc b/db/wide/wide_columns_helper.cc new file mode 100644 index 000000000..80a6b5c43 --- /dev/null +++ b/db/wide/wide_columns_helper.cc @@ -0,0 +1,36 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/wide/wide_columns_helper.h" + +#include "db/wide/wide_column_serialization.h" + +namespace ROCKSDB_NAMESPACE { +void WideColumnsHelper::DumpWideColumns(const WideColumns& columns, + std::ostream& os, bool hex) { + if (columns.empty()) { + return; + } + if (hex) { + os << std::hex; + } + auto it = columns.begin(); + os << *it; + for (++it; it != columns.end(); ++it) { + os << ' ' << *it; + } +} +Status WideColumnsHelper::DumpSliceAsWideColumns(const Slice& value, + std::ostream& oss, bool hex) { + WideColumns columns; + Slice value_copy = value; + const Status s = WideColumnSerialization::Deserialize(value_copy, columns); + if (s.ok()) { + DumpWideColumns(columns, oss, hex); + } + return s; +} + +} // namespace ROCKSDB_NAMESPACE diff --git a/db/wide/wide_columns_helper.h b/db/wide/wide_columns_helper.h new file mode 100644 index 000000000..83c8548a3 --- /dev/null +++ b/db/wide/wide_columns_helper.h @@ -0,0 +1,21 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#include +#include + +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/wide_columns.h" + +namespace ROCKSDB_NAMESPACE { +class WideColumnsHelper { + public: + static void DumpWideColumns(const WideColumns& columns, std::ostream& oss, + bool hex); + static Status DumpSliceAsWideColumns(const Slice& value, std::ostream& oss, + bool hex); +}; +} // namespace ROCKSDB_NAMESPACE diff --git a/db/wide/wide_columns_helper_test.cc b/db/wide/wide_columns_helper_test.cc new file mode 100644 index 000000000..482bba531 --- /dev/null +++ b/db/wide/wide_columns_helper_test.cc @@ -0,0 +1,39 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "db/wide/wide_columns_helper.h" + +#include "db/wide/wide_column_serialization.h" +#include "test_util/testharness.h" +#include "util/coding.h" + +namespace ROCKSDB_NAMESPACE { + +TEST(WideColumnsHelperTest, DumpWideColumns) { + WideColumns columns{{"foo", "bar"}, {"hello", "world"}}; + std::ostringstream oss; + WideColumnsHelper::DumpWideColumns(columns, oss, false /* hex */); + EXPECT_EQ("foo:bar hello:world", oss.str()); +} + +TEST(WideColumnsHelperTest, DumpSliceAsWideColumns) { + WideColumns columns{{"foo", "bar"}, {"hello", "world"}}; + std::string output; + ASSERT_OK(WideColumnSerialization::Serialize(columns, output)); + Slice input(output); + + std::ostringstream oss; + ASSERT_OK( + WideColumnsHelper::DumpSliceAsWideColumns(input, oss, false /* hex */)); + + EXPECT_EQ("foo:bar hello:world", oss.str()); +} +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 32e77d40c..4d011434f 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -37,6 +37,7 @@ #include "db/db_impl/db_impl.h" #include "db/version_set.h" +#include "db/wide/wide_columns_helper.h" #include "db_stress_tool/db_stress_env_wrapper.h" #include "db_stress_tool/db_stress_listener.h" #include "db_stress_tool/db_stress_shared_state.h" @@ -629,13 +630,7 @@ inline std::string WideColumnsToHex(const WideColumns& columns) { std::ostringstream oss; - oss << std::hex; - - auto it = columns.begin(); - oss << *it; - for (++it; it != columns.end(); ++it) { - oss << ' ' << *it; - } + WideColumnsHelper::DumpWideColumns(columns, oss, true); return oss.str(); } diff --git a/include/rocksdb/wide_columns.h b/include/rocksdb/wide_columns.h index 5af3f51de..1da66b73c 100644 --- a/include/rocksdb/wide_columns.h +++ b/include/rocksdb/wide_columns.h @@ -74,8 +74,19 @@ inline bool operator!=(const WideColumn& lhs, const WideColumn& rhs) { inline std::ostream& operator<<(std::ostream& os, const WideColumn& column) { const bool hex = (os.flags() & std::ios_base::basefield) == std::ios_base::hex; - os << column.name().ToString(hex) << ':' << column.value().ToString(hex); - + if (!column.name().empty()) { + if (hex) { + os << "0x"; + } + os << column.name().ToString(hex); + } + os << ':'; + if (!column.value().empty()) { + if (hex) { + os << "0x"; + } + os << column.value().ToString(hex); + } return os; } diff --git a/src.mk b/src.mk index 7d2663b99..2992f5c4a 100644 --- a/src.mk +++ b/src.mk @@ -94,6 +94,7 @@ LIB_SOURCES = \ db/wal_manager.cc \ db/wide/wide_column_serialization.cc \ db/wide/wide_columns.cc \ + db/wide/wide_columns_helper.cc \ db/write_batch.cc \ db/write_batch_base.cc \ db/write_controller.cc \ @@ -528,6 +529,7 @@ TEST_MAIN_SOURCES = \ db/wal_manager_test.cc \ db/wide/db_wide_basic_test.cc \ db/wide/wide_column_serialization_test.cc \ + db/wide/wide_columns_helper_test.cc \ db/write_batch_test.cc \ db/write_callback_test.cc \ db/write_controller_test.cc \ diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 4a9fd3807..2334b3ee2 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -16,6 +16,8 @@ #include "db/blob/blob_index.h" #include "db/memtable.h" +#include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "db/write_batch_internal.h" #include "options/cf_options.h" #include "port/port.h" @@ -489,9 +491,23 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num, if (print_kv) { if (!decode_blob_index_ || ikey.type != kTypeBlobIndex) { - fprintf(stdout, "%s => %s\n", - ikey.DebugString(true, output_hex_).c_str(), - value.ToString(output_hex_).c_str()); + if (ikey.type == kTypeWideColumnEntity) { + std::ostringstream oss; + const Status s = WideColumnsHelper::DumpSliceAsWideColumns( + iter->value(), oss, output_hex_); + if (!s.ok()) { + fprintf(stderr, "%s => error deserializing wide columns\n", + ikey.DebugString(true, output_hex_).c_str()); + continue; + } + fprintf(stdout, "%s => %s\n", + ikey.DebugString(true, output_hex_).c_str(), + oss.str().c_str()); + } else { + fprintf(stdout, "%s => %s\n", + ikey.DebugString(true, output_hex_).c_str(), + value.ToString(output_hex_).c_str()); + } } else { BlobIndex blob_index; diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 263cbe472..be48e0204 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -22,6 +22,8 @@ #include "db/dbformat.h" #include "db/log_reader.h" #include "db/version_util.h" +#include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "db/write_batch_internal.h" #include "file/filename.h" #include "rocksdb/cache.h" @@ -1884,8 +1886,20 @@ void InternalDumpCommand::DoCommand() { std::string key = ikey.DebugString(is_key_hex_); Slice value(key_version.value); if (!decode_blob_index_ || value_type != kTypeBlobIndex) { - fprintf(stdout, "%s => %s\n", key.c_str(), - value.ToString(is_value_hex_).c_str()); + if (value_type == kTypeWideColumnEntity) { + std::ostringstream oss; + const Status s = WideColumnsHelper::DumpSliceAsWideColumns( + value, oss, is_value_hex_); + if (!s.ok()) { + fprintf(stderr, "%s => error deserializing wide columns\n", + key.c_str()); + } else { + fprintf(stdout, "%s => %s\n", key.c_str(), oss.str().c_str()); + } + } else { + fprintf(stdout, "%s => %s\n", key.c_str(), + value.ToString(is_value_hex_).c_str()); + } } else { BlobIndex blob_index; @@ -2185,10 +2199,31 @@ void DBDumperCommand::DoDumpCommand() { if (is_db_ttl_ && timestamp_) { fprintf(stdout, "%s ", TimeToHumanString(rawtime).c_str()); } - std::string str = - PrintKeyValue(iter->key().ToString(), iter->value().ToString(), - is_key_hex_, is_value_hex_); - fprintf(stdout, "%s\n", str.c_str()); + // (TODO) TTL Iterator does not support wide columns yet. + if (is_db_ttl_ || iter->columns().empty() || + (iter->columns().size() == 1 && + iter->columns().front().name() == kDefaultWideColumnName)) { + std::string str = + PrintKeyValue(iter->key().ToString(), iter->value().ToString(), + is_key_hex_, is_value_hex_); + fprintf(stdout, "%s\n", str.c_str()); + } else { + /* + // Sample plaintext output (first column is kDefaultWideColumnName) + key_1 ==> :foo attr_name1:bar attr_name2:baz + + // Sample hex output (first column is kDefaultWideColumnName) + 0x6669727374 ==> :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F + */ + + std::ostringstream oss; + WideColumnsHelper::DumpWideColumns(iter->columns(), oss, is_value_hex_); + std::string str = PrintKeyValue( + iter->key().ToString(), oss.str().c_str(), is_key_hex_, + false); // is_value_hex_ is already honored in oss. avoid + // double-hexing it. + fprintf(stdout, "%s\n", str.c_str()); + } } } @@ -2529,6 +2564,16 @@ class InMemoryHandler : public WriteBatch::Handler { return Status::OK(); } + Status PutEntityCF(uint32_t cf, const Slice& key, + const Slice& value) override { + row_ << "PUT_ENTITY(" << cf << ") : "; + std::string k = LDBCommand::StringToHex(key.ToString()); + if (print_values_) { + return WideColumnsHelper::DumpSliceAsWideColumns(value, row_, true); + } + return Status::OK(); + } + Status MergeCF(uint32_t cf, const Slice& key, const Slice& value) override { row_ << "MERGE(" << cf << ") : "; commonPutMerge(key, value); @@ -3042,7 +3087,10 @@ void ScanCommand::DoCommand() { if (no_value_) { fprintf(stdout, "%.*s\n", static_cast(key_slice.size()), key_slice.data()); - } else { + // (TODO) TTL Iterator does not support wide columns yet. + } else if (is_db_ttl_ || it->columns().empty() || + (it->columns().size() == 1 && + it->columns().front().name() == kDefaultWideColumnName)) { Slice val_slice = it->value(); std::string formatted_value; if (is_value_hex_) { @@ -3052,6 +3100,20 @@ void ScanCommand::DoCommand() { fprintf(stdout, "%.*s : %.*s\n", static_cast(key_slice.size()), key_slice.data(), static_cast(val_slice.size()), val_slice.data()); + } else { + /* + // Sample plaintext output (first column is kDefaultWideColumnName) + key_1 : :foo attr_name1:bar attr_name2:baz + + // Sample hex output (first column is kDefaultWideColumnName) + 0x6669727374 : :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F + */ + + std::ostringstream oss; + WideColumnsHelper::DumpWideColumns(it->columns(), oss, is_value_hex_); + fprintf(stdout, "%.*s : %.*s\n", static_cast(key_slice.size()), + key_slice.data(), static_cast(oss.str().length()), + oss.str().c_str()); } num_keys_scanned++; diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index 481c4b722..f0b71bf8e 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -9,6 +9,7 @@ #include +#include "db/wide/wide_column_serialization.h" #include "file/random_access_file_reader.h" #include "port/stack_trace.h" #include "rocksdb/convenience.h" @@ -24,10 +25,11 @@ namespace ROCKSDB_NAMESPACE { const uint32_t kOptLength = 1024; namespace { -static std::string MakeKey(int i) { +static std::string MakeKey(int i, + ValueType value_type = ValueType::kTypeValue) { char buf[100]; snprintf(buf, sizeof(buf), "k_%04d", i); - InternalKey key(std::string(buf), 0, ValueType::kTypeValue); + InternalKey key(std::string(buf), 0, value_type); return key.Encode().ToString(); } @@ -44,6 +46,16 @@ static std::string MakeValue(int i) { return key.Encode().ToString(); } +static std::string MakeWideColumn(int i) { + std::string val = MakeValue(i); + std::string val1 = "attr_1_val_" + val; + std::string val2 = "attr_2_val_" + val; + WideColumns columns{{"attr_1", val1}, {"attr_2", val2}}; + std::string entity; + EXPECT_OK(WideColumnSerialization::Serialize(columns, entity)); + return entity; +} + void cleanup(const Options& opts, const std::string& file_name) { Env* env = opts.env; ASSERT_OK(env->DeleteFile(file_name)); @@ -94,7 +106,8 @@ class SSTDumpToolTest : public testing::Test { snprintf(usage[2], kOptLength, "--file=%s", file_path.c_str()); } - void createSST(const Options& opts, const std::string& file_name) { + void createSST(const Options& opts, const std::string& file_name, + uint32_t wide_column_one_in = 0) { Env* test_env = opts.env; FileOptions file_options(opts); ReadOptions read_options; @@ -123,7 +136,12 @@ class SSTDumpToolTest : public testing::Test { const char* comparator_name = ikc.user_comparator()->Name(); if (strcmp(comparator_name, ReverseBytewiseComparator()->Name()) == 0) { for (int32_t i = num_keys; i >= 0; i--) { - tb->Add(MakeKey(i), MakeValue(i)); + if (wide_column_one_in == 0 || i % wide_column_one_in != 0) { + tb->Add(MakeKey(i), MakeValue(i)); + } else { + tb->Add(MakeKey(i, ValueType::kTypeWideColumnEntity), + MakeWideColumn(i)); + } } } else if (strcmp(comparator_name, test::BytewiseComparatorWithU64TsWrapper()->Name()) == @@ -133,7 +151,12 @@ class SSTDumpToolTest : public testing::Test { } } else { for (uint32_t i = 0; i < num_keys; i++) { - tb->Add(MakeKey(i), MakeValue(i)); + if (wide_column_one_in == 0 || i % wide_column_one_in != 0) { + tb->Add(MakeKey(i), MakeValue(i)); + } else { + tb->Add(MakeKey(i, ValueType::kTypeWideColumnEntity), + MakeWideColumn(i)); + } } } ASSERT_OK(tb->Finish()); @@ -164,7 +187,7 @@ TEST_F(SSTDumpToolTest, EmptyFilter) { Options opts; opts.env = env(); std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); + createSST(opts, file_path, 10); char* usage[3]; PopulateCommandArgs(file_path, "--command=raw", usage); @@ -212,7 +235,7 @@ TEST_F(SSTDumpToolTest, SstDumpComparatorWithU64Ts) { opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); std::string file_path = MakeFilePath("rocksdb_sst_comparator_with_u64_ts.sst"); - createSST(opts, file_path); + createSST(opts, file_path, 10); char* usage[3]; PopulateCommandArgs(file_path, "--command=raw", usage); @@ -234,7 +257,7 @@ TEST_F(SSTDumpToolTest, FilterBlock) { ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, true)); opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); + createSST(opts, file_path, 10); char* usage[3]; PopulateCommandArgs(file_path, "--command=raw", usage); @@ -300,7 +323,7 @@ TEST_F(SSTDumpToolTest, CompressedSizes) { ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false)); opts.table_factory.reset(new BlockBasedTableFactory(table_opts)); std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); + createSST(opts, file_path, 10); char* usage[3]; PopulateCommandArgs(file_path, "--command=recompress", usage); @@ -426,7 +449,7 @@ TEST_F(SSTDumpToolTest, RawOutput) { Options opts; opts.env = env(); std::string file_path = MakeFilePath("rocksdb_sst_test.sst"); - createSST(opts, file_path); + createSST(opts, file_path, 10); char* usage[3]; PopulateCommandArgs(file_path, "--command=raw", usage); From fc58c7c62a10f590ab3d6b73b8a3109d85ff28fe Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 30 Aug 2023 13:42:04 -0700 Subject: [PATCH 079/386] Add UDT support in SstFileDumper (#11757) Summary: For a SST file that uses user-defined timestamp aware comparators, if a lower or upper bound is set, sst_dump tool doesn't handle it well. This PR adds support for that. While working on this `MaybeAddTimestampsToRange` is moved to the udt_util.h file to be shared. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11757 Test Plan: make all check for changes in db_impl.cc and db_impl_compaction_flush.cc for changes in sst_file_dumper.cc, I manually tested this change handles specifying bounds for UDT use cases. It probably should have a unit test file eventually. Reviewed By: ltamasi Differential Revision: D48668048 Pulled By: jowlyzhang fbshipit-source-id: 1560465f40e44668d6d82a7439fe9012be0e74a8 --- db/db_impl/db_impl.cc | 38 +++++++------------ db/db_impl/db_impl_compaction_flush.cc | 27 ++++--------- table/sst_file_dumper.cc | 17 +++++++-- .../bug_fixes/sst_dump_for_udt.md | 1 + util/udt_util.cc | 34 +++++++++++++++++ util/udt_util.h | 11 ++++++ 6 files changed, 81 insertions(+), 47 deletions(-) create mode 100644 unreleased_history/bug_fixes/sst_dump_for_udt.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 8bda80bf4..228a3da4f 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -106,6 +106,7 @@ #include "util/mutexlock.h" #include "util/stop_watch.h" #include "util/string_util.h" +#include "util/udt_util.h" #include "utilities/trace/replayer_impl.h" namespace ROCKSDB_NAMESPACE { @@ -149,24 +150,6 @@ void DumpSupportInfo(Logger* logger) { ROCKS_LOG_HEADER(logger, "DMutex implementation: %s", DMutex::kName()); } - -// `start` is the inclusive lower user key bound without user-defined timestamp -// `limit` is the exclusive upper user key bound without user-defined timestamp -std::tuple MaybeAddTimestampsToRange(const Slice& start, - const Slice& limit, - size_t ts_sz, - std::string* start_with_ts, - std::string* limit_with_ts) { - if (ts_sz == 0) { - return std::make_tuple(start, limit); - } - // Maximum timestamp means including all key with any timestamp - AppendKeyWithMaxTimestamp(start_with_ts, start, ts_sz); - // Append a maximum timestamp as the range limit is exclusive: - // [start, limit) - AppendKeyWithMaxTimestamp(limit_with_ts, limit, ts_sz); - return std::make_tuple(Slice(*start_with_ts), Slice(*limit_with_ts)); -} } // namespace DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, @@ -4281,10 +4264,12 @@ void DBImpl::GetApproximateMemTableStats(ColumnFamilyHandle* column_family, // Add timestamp if needed std::string start_with_ts, limit_with_ts; auto [start, limit] = MaybeAddTimestampsToRange( - range.start, range.limit, ts_sz, &start_with_ts, &limit_with_ts); + &range.start, &range.limit, ts_sz, &start_with_ts, &limit_with_ts); + assert(start.has_value()); + assert(limit.has_value()); // Convert user_key into a corresponding internal key. - InternalKey k1(start, kMaxSequenceNumber, kValueTypeForSeek); - InternalKey k2(limit, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k1(start.value(), kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(limit.value(), kMaxSequenceNumber, kValueTypeForSeek); MemTable::MemTableStats memStats = sv->mem->ApproximateStats(k1.Encode(), k2.Encode()); MemTable::MemTableStats immStats = @@ -4317,11 +4302,14 @@ Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options, for (int i = 0; i < n; i++) { // Add timestamp if needed std::string start_with_ts, limit_with_ts; - auto [start, limit] = MaybeAddTimestampsToRange( - range[i].start, range[i].limit, ts_sz, &start_with_ts, &limit_with_ts); + auto [start, limit] = + MaybeAddTimestampsToRange(&range[i].start, &range[i].limit, ts_sz, + &start_with_ts, &limit_with_ts); + assert(start.has_value()); + assert(limit.has_value()); // Convert user_key into a corresponding internal key. - InternalKey k1(start, kMaxSequenceNumber, kValueTypeForSeek); - InternalKey k2(limit, kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k1(start.value(), kMaxSequenceNumber, kValueTypeForSeek); + InternalKey k2(limit.value(), kMaxSequenceNumber, kValueTypeForSeek); sizes[i] = 0; if (options.include_files) { sizes[i] += versions_->ApproximateSize( diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 732b0667d..a1a544eef 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -23,6 +23,7 @@ #include "util/cast_util.h" #include "util/coding.h" #include "util/concurrent_task_limiter_impl.h" +#include "util/udt_util.h" namespace ROCKSDB_NAMESPACE { @@ -947,26 +948,14 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options, end_without_ts, "" /*trim_ts*/); } - std::string begin_str; - std::string end_str; + std::string begin_str, end_str; + auto [begin, end] = + MaybeAddTimestampsToRange(begin_without_ts, end_without_ts, ts_sz, + &begin_str, &end_str, false /*exclusive_end*/); - // CompactRange compact all keys: [begin, end] inclusively. Add maximum - // timestamp to include all `begin` keys, and add minimal timestamp to include - // all `end` keys. - if (begin_without_ts != nullptr) { - AppendKeyWithMaxTimestamp(&begin_str, *begin_without_ts, ts_sz); - } - if (end_without_ts != nullptr) { - AppendKeyWithMinTimestamp(&end_str, *end_without_ts, ts_sz); - } - Slice begin(begin_str); - Slice end(end_str); - - Slice* begin_with_ts = begin_without_ts ? &begin : nullptr; - Slice* end_with_ts = end_without_ts ? &end : nullptr; - - return CompactRangeInternal(options, column_family, begin_with_ts, - end_with_ts, "" /*trim_ts*/); + return CompactRangeInternal( + options, column_family, begin.has_value() ? &begin.value() : nullptr, + end.has_value() ? &end.value() : nullptr, "" /*trim_ts*/); } Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family, diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 2334b3ee2..150776de1 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -38,6 +38,7 @@ #include "table/table_reader.h" #include "util/compression.h" #include "util/random.h" +#include "util/udt_util.h" namespace ROCKSDB_NAMESPACE { @@ -458,10 +459,20 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num, read_options_, moptions_.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kSSTDumpTool); + + const Comparator* ucmp = internal_comparator_.user_comparator(); + size_t ts_sz = ucmp->timestamp_size(); + + Slice from_slice = from_key; + Slice to_slice = to_key; + std::string from_key_buf, to_key_buf; + auto [from, to] = MaybeAddTimestampsToRange( + has_from ? &from_slice : nullptr, has_to ? &to_slice : nullptr, ts_sz, + &from_key_buf, &to_key_buf); uint64_t i = 0; - if (has_from) { + if (from.has_value()) { InternalKey ikey; - ikey.SetMinPossibleForUserKey(from_key); + ikey.SetMinPossibleForUserKey(from.value()); iter->Seek(ikey.Encode()); } else { iter->SeekToFirst(); @@ -485,7 +496,7 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num, } // If end marker was specified, we stop before it - if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) { + if (to.has_value() && ucmp->Compare(ikey.user_key, to.value()) >= 0) { break; } diff --git a/unreleased_history/bug_fixes/sst_dump_for_udt.md b/unreleased_history/bug_fixes/sst_dump_for_udt.md new file mode 100644 index 000000000..e8b483ebc --- /dev/null +++ b/unreleased_history/bug_fixes/sst_dump_for_udt.md @@ -0,0 +1 @@ +Fix an issue in sst dump tool to handle bounds specified for data with user-defined timestamps. \ No newline at end of file diff --git a/util/udt_util.cc b/util/udt_util.cc index 7d549acab..40cf1e496 100644 --- a/util/udt_util.cc +++ b/util/udt_util.cc @@ -346,6 +346,40 @@ void GetFullHistoryTsLowFromU64CutoffTs(Slice* cutoff_ts, std::string* full_history_ts_low) { uint64_t cutoff_udt_ts = 0; [[maybe_unused]] bool format_res = GetFixed64(cutoff_ts, &cutoff_udt_ts); + assert(format_res); PutFixed64(full_history_ts_low, cutoff_udt_ts + 1); } + +std::tuple, std::optional> +MaybeAddTimestampsToRange(const Slice* start, const Slice* end, size_t ts_sz, + std::string* start_with_ts, std::string* end_with_ts, + bool exclusive_end) { + std::optional ret_start, ret_end; + if (start) { + if (ts_sz == 0) { + ret_start = *start; + } else { + // Maximum timestamp means including all keys with any timestamp for start + AppendKeyWithMaxTimestamp(start_with_ts, *start, ts_sz); + ret_start = Slice(*start_with_ts); + } + } + if (end) { + if (ts_sz == 0) { + ret_end = *end; + } else { + if (exclusive_end) { + // Append a maximum timestamp as the range limit is exclusive: + // [start, end) + AppendKeyWithMaxTimestamp(end_with_ts, *end, ts_sz); + } else { + // Append a minimum timestamp to end so the range limit is inclusive: + // [start, end] + AppendKeyWithMinTimestamp(end_with_ts, *end, ts_sz); + } + ret_end = Slice(*end_with_ts); + } + } + return std::make_tuple(ret_start, ret_end); +} } // namespace ROCKSDB_NAMESPACE diff --git a/util/udt_util.h b/util/udt_util.h index 706b02e90..b524fceab 100644 --- a/util/udt_util.h +++ b/util/udt_util.h @@ -254,4 +254,15 @@ Status ValidateUserDefinedTimestampsOptions( // the effective `full_history_ts_low`. void GetFullHistoryTsLowFromU64CutoffTs(Slice* cutoff_ts, std::string* full_history_ts_low); + +// `start` is the inclusive lower user key bound without user-defined timestamp. +// `end` is the upper user key bound without user-defined timestamp. +// By default, `end` is treated as being exclusive. If `exclusive_end` is set to +// false, it's treated as an inclusive upper bound. +// If any of these two bounds is nullptr, an empty std::optional is +// returned for that bound. +std::tuple, std::optional> +MaybeAddTimestampsToRange(const Slice* start, const Slice* end, size_t ts_sz, + std::string* start_with_ts, std::string* end_with_ts, + bool exclusive_end = true); } // namespace ROCKSDB_NAMESPACE From 05daa123323b1471bde4723dc441763d687fd825 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 30 Aug 2023 14:57:08 -0700 Subject: [PATCH 080/386] Change compaction_readahead_size default value to 2MB (#11762) Summary: **Context/Summary:** After https://github.com/facebook/rocksdb/pull/11631, we rely on `compaction_readahead_size` for how much to read ahead for compaction read under non-direct IO case. https://github.com/facebook/rocksdb/pull/11658 therefore also sanitized 0 `compaction_readahead_size` to 2MB under non-direct IO, which is consistent with the existing sanitization with direct IO. However, this makes disabling compaction readahead impossible as well as add one more scenario to the inconsistent effects between `Options.compaction_readahead_size=0` during DB open and `SetDBOptions("compaction_readahead_size", "0")` . - `SetDBOptions("compaction_readahead_size", "0")` will disable compaction readahead as its logic never goes through sanitization above while `Options.compaction_readahead_size=0` will go through sanitization. Therefore we decided to do this PR. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11762 Test Plan: Modified existing UTs to cover this PR Reviewed By: ajkr Differential Revision: D48759560 Pulled By: hx235 fbshipit-source-id: b3f85e58bda362a6fa1dc26bd8a87aa0e171af79 --- db/db_compaction_test.cc | 7 +-- db/db_impl/db_impl_open.cc | 7 --- db/db_options_test.cc | 51 +++++++++++-------- include/rocksdb/options.h | 4 +- ...fered_io_compaction_readahead_size_zero.md | 1 + ...compaction_readahead_size_option_change.md | 1 + 6 files changed, 34 insertions(+), 37 deletions(-) create mode 100644 unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md create mode 100644 unreleased_history/public_api_changes/compaction_readahead_size_option_change.md diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 24445ecdb..8713b9b80 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -6025,23 +6025,18 @@ TEST_P(DBCompactionDirectIOTest, DirectIO) { options.use_direct_io_for_flush_and_compaction = GetParam(); options.env = MockEnv::Create(Env::Default()); Reopen(options); - bool readahead = false; SyncPoint::GetInstance()->SetCallBack( "CompactionJob::OpenCompactionOutputFile", [&](void* arg) { bool* use_direct_writes = static_cast(arg); ASSERT_EQ(*use_direct_writes, options.use_direct_io_for_flush_and_compaction); }); - if (options.use_direct_io_for_flush_and_compaction) { - SyncPoint::GetInstance()->SetCallBack( - "SanitizeOptions:direct_io", [&](void* /*arg*/) { readahead = true; }); - } SyncPoint::GetInstance()->EnableProcessing(); CreateAndReopenWithCF({"pikachu"}, options); MakeTables(3, "p", "q", 1); ASSERT_EQ("1,1,1", FilesPerLevel(1)); Compact(1, "p", "q"); - ASSERT_EQ(readahead, options.use_direct_reads); + ASSERT_EQ(false, options.use_direct_reads); ASSERT_EQ("0,0,1", FilesPerLevel(1)); Destroy(options); delete options.env; diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 8db53dac9..d9d1f932a 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -143,13 +143,6 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src, result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1); } - if (result.compaction_readahead_size == 0) { - if (result.use_direct_reads) { - TEST_SYNC_POINT_CALLBACK("SanitizeOptions:direct_io", nullptr); - } - result.compaction_readahead_size = 1024 * 1024 * 2; - } - // Force flush on DB open if 2PC is enabled, since with 2PC we have no // guarantee that consecutive log files have consecutive sequence id, which // make recovery complicated. diff --git a/db/db_options_test.cc b/db/db_options_test.cc index b7c132aee..c3910a978 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1034,30 +1034,37 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { } TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { - SpecialEnv env(env_); - Options options; - options.env = &env; - - options.compaction_readahead_size = 0; - options.level0_file_num_compaction_trigger = 2; - const std::string kValue(1024, 'v'); - Reopen(options); + for (bool use_direct_reads : {true, false}) { + SpecialEnv env(env_); + Options options; + options.env = &env; + + options.use_direct_reads = use_direct_reads; + options.level0_file_num_compaction_trigger = 2; + const std::string kValue(1024, 'v'); + Status s = TryReopen(options); + if (use_direct_reads && (s.IsNotSupported() || s.IsInvalidArgument())) { + continue; + } else { + ASSERT_OK(s); + } - ASSERT_EQ(1024 * 1024 * 2, - dbfull()->GetDBOptions().compaction_readahead_size); - ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); - ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); - for (int i = 0; i < 1024; i++) { - ASSERT_OK(Put(Key(i), kValue)); - } - ASSERT_OK(Flush()); - for (int i = 0; i < 1024 * 2; i++) { - ASSERT_OK(Put(Key(i), kValue)); + ASSERT_EQ(1024 * 1024 * 2, + dbfull()->GetDBOptions().compaction_readahead_size); + ASSERT_OK(dbfull()->SetDBOptions({{"compaction_readahead_size", "256"}})); + ASSERT_EQ(256, dbfull()->GetDBOptions().compaction_readahead_size); + for (int i = 0; i < 1024; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + for (int i = 0; i < 1024 * 2; i++) { + ASSERT_OK(Put(Key(i), kValue)); + } + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(256, env_->compaction_readahead_size_); + Close(); } - ASSERT_OK(Flush()); - ASSERT_OK(dbfull()->TEST_WaitForCompact()); - ASSERT_EQ(256, env_->compaction_readahead_size_); - Close(); } TEST_F(DBOptionsTest, FIFOTtlBackwardCompatible) { diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 8f62c2dfb..d11ccc62f 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -955,10 +955,10 @@ struct DBOptions { // running RocksDB on spinning disks, you should set this to at least 2MB. // That way RocksDB's compaction is doing sequential instead of random reads. // - // Default: 0 + // Default: 2MB // // Dynamically changeable through SetDBOptions() API. - size_t compaction_readahead_size = 0; + size_t compaction_readahead_size = 2 * 1024 * 1024; // This is a maximum buffer size that is used by WinMmapReadableFile in // unbuffered disk I/O mode. We need to maintain an aligned buffer for diff --git a/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md new file mode 100644 index 000000000..430101766 --- /dev/null +++ b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md @@ -0,0 +1 @@ +Compaction read performance will regress when `Options::compaction_readahead_size` is explicitly set to 0 diff --git a/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md new file mode 100644 index 000000000..f86fd82ea --- /dev/null +++ b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md @@ -0,0 +1 @@ +`Options::compaction_readahead_size` 's default value is changed from 0 to 2MB. From 83eb7b8c2cf13ad8c8326a2e7a869837a535f7e2 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 31 Aug 2023 08:39:09 -0700 Subject: [PATCH 081/386] Log host name (#11776) Summary: ... in info_log. Becoming more important with disaggregated storage. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11776 Test Plan: manual Reviewed By: jaykorean Differential Revision: D48849471 Pulled By: pdillinger fbshipit-source-id: 9a8fd8b2564a4f133526ecd7c1414cb667e4ba54 --- db/db_info_dumper.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db/db_info_dumper.cc b/db/db_info_dumper.cc index be8d5bee1..7dd647955 100644 --- a/db/db_info_dumper.cc +++ b/db/db_info_dumper.cc @@ -34,6 +34,12 @@ void DumpDBFileSummary(const ImmutableDBOptions& options, std::string file_info, wal_info; Header(options.info_log, "DB SUMMARY\n"); + { + std::string hostname; + if (env->GetHostNameString(&hostname).ok()) { + Header(options.info_log, "Host name (Env): %s\n", hostname.c_str()); + } + } Header(options.info_log, "DB Session ID: %s\n", session_id.c_str()); Status s; From 47be3ffffbcde1095cf50f6563fe5df5627af986 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Thu, 31 Aug 2023 16:17:03 -0700 Subject: [PATCH 082/386] Minor refactor on LDB command for wide column support and release note (#11777) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: As mentioned in https://github.com/facebook/rocksdb/issues/11754 , refactor to clean up some nearly identical logic. This PR changes the existing debugging string format of Scan command as the following. ``` ❯ ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/ scan --hex ``` Before ``` 0x6669727374 : :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F 0x617474725F6E616D6532:0x626172 0x7365636F6E64 : 0x617474725F6F6E65:0x74776F 0x617474725F7468726565:0x666F7572 0x7468697264 : 0x62617A ``` After ``` 0x6669727374 ==> :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F 0x617474725F6E616D6532:0x626172 0x7365636F6E64 ==> 0x617474725F6F6E65:0x74776F 0x617474725F7468726565:0x666F7572 0x7468697264 ==> 0x62617A ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11777 Test Plan: ``` ❯ ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/ dump first ==> :hello attr_name1:foo attr_name2:bar second ==> attr_one:two attr_three:four third ==> baz Keys in range: 3 ❯ ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/ scan first ==> :hello attr_name1:foo attr_name2:bar second ==> attr_one:two attr_three:four third ==> baz ❯ ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/ dump --hex 0x6669727374 ==> :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F 0x617474725F6E616D6532:0x626172 0x7365636F6E64 ==> 0x617474725F6F6E65:0x74776F 0x617474725F7468726565:0x666F7572 0x7468697264 ==> 0x62617A Keys in range: 3 ❯ ./ldb --db=/tmp/rocksdbtest-226125/db_wide_basic_test_2675429_2308393776696827948/ scan --hex 0x6669727374 ==> :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F 0x617474725F6E616D6532:0x626172 0x7365636F6E64 ==> 0x617474725F6F6E65:0x74776F 0x617474725F7468726565:0x666F7572 0x7468697264 ==> 0x62617A ``` Reviewed By: jowlyzhang Differential Revision: D48876755 Pulled By: jaykorean fbshipit-source-id: b1c608a810fe038999ac528b690d398abf5f21d7 --- db/wide/wide_columns_helper.cc | 4 +- include/rocksdb/utilities/ldb_cmd.h | 7 +- tools/ldb_cmd.cc | 106 ++++++++---------- tools/ldb_test.py | 103 +++++++++-------- .../ldb_scan_command_output_change.md | 1 + .../wide_column_support_in_ldb.md | 1 + 6 files changed, 108 insertions(+), 114 deletions(-) create mode 100644 unreleased_history/behavior_changes/ldb_scan_command_output_change.md create mode 100644 unreleased_history/new_features/wide_column_support_in_ldb.md diff --git a/db/wide/wide_columns_helper.cc b/db/wide/wide_columns_helper.cc index 80a6b5c43..4ef0c148e 100644 --- a/db/wide/wide_columns_helper.cc +++ b/db/wide/wide_columns_helper.cc @@ -23,12 +23,12 @@ void WideColumnsHelper::DumpWideColumns(const WideColumns& columns, } } Status WideColumnsHelper::DumpSliceAsWideColumns(const Slice& value, - std::ostream& oss, bool hex) { + std::ostream& os, bool hex) { WideColumns columns; Slice value_copy = value; const Status s = WideColumnSerialization::Deserialize(value_copy, columns); if (s.ok()) { - DumpWideColumns(columns, oss, hex); + DumpWideColumns(columns, os, hex); } return s; } diff --git a/include/rocksdb/utilities/ldb_cmd.h b/include/rocksdb/utilities/ldb_cmd.h index af5ee4ba9..ed4f5de7e 100644 --- a/include/rocksdb/utilities/ldb_cmd.h +++ b/include/rocksdb/utilities/ldb_cmd.h @@ -226,6 +226,12 @@ class LDBCommand { static std::string PrintKeyValue(const std::string& key, const std::string& value, bool is_hex); + static std::string PrintKeyValueOrWideColumns(const Slice& key, + const Slice& value, + const WideColumns& wide_columns, + bool is_key_hex, + bool is_value_hex); + /** * Return true if the specified flag is present in the specified flags vector */ @@ -313,4 +319,3 @@ class LDBCommandRunner { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index be48e0204..77096b113 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1082,6 +1082,29 @@ std::string LDBCommand::PrintKeyValue(const std::string& key, return PrintKeyValue(key, value, is_hex, is_hex); } +std::string LDBCommand::PrintKeyValueOrWideColumns( + const Slice& key, const Slice& value, const WideColumns& wide_columns, + bool is_key_hex, bool is_value_hex) { + if (wide_columns.empty() || + (wide_columns.size() == 1 && + wide_columns.front().name() == kDefaultWideColumnName)) { + return PrintKeyValue(key.ToString(), value.ToString(), is_key_hex, + is_value_hex); + } + /* + // Sample plaintext output (first column is kDefaultWideColumnName) + key_1 ==> :foo attr_name1:bar attr_name2:baz + + // Sample hex output (first column is kDefaultWideColumnName) + 0x6669727374 ==> :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F + */ + std::ostringstream oss; + WideColumnsHelper::DumpWideColumns(wide_columns, oss, is_value_hex); + return PrintKeyValue(key.ToString(), oss.str().c_str(), is_key_hex, + false); // is_value_hex_ is already honored in oss. + // avoid double-hexing it. +} + std::string LDBCommand::HelpRangeCmdArgs() { std::ostringstream str_stream; str_stream << " "; @@ -2200,30 +2223,14 @@ void DBDumperCommand::DoDumpCommand() { fprintf(stdout, "%s ", TimeToHumanString(rawtime).c_str()); } // (TODO) TTL Iterator does not support wide columns yet. - if (is_db_ttl_ || iter->columns().empty() || - (iter->columns().size() == 1 && - iter->columns().front().name() == kDefaultWideColumnName)) { - std::string str = - PrintKeyValue(iter->key().ToString(), iter->value().ToString(), - is_key_hex_, is_value_hex_); - fprintf(stdout, "%s\n", str.c_str()); - } else { - /* - // Sample plaintext output (first column is kDefaultWideColumnName) - key_1 ==> :foo attr_name1:bar attr_name2:baz - - // Sample hex output (first column is kDefaultWideColumnName) - 0x6669727374 ==> :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F - */ - - std::ostringstream oss; - WideColumnsHelper::DumpWideColumns(iter->columns(), oss, is_value_hex_); - std::string str = PrintKeyValue( - iter->key().ToString(), oss.str().c_str(), is_key_hex_, - false); // is_value_hex_ is already honored in oss. avoid - // double-hexing it. - fprintf(stdout, "%s\n", str.c_str()); - } + std::string str = + is_db_ttl_ + ? PrintKeyValue(iter->key().ToString(), iter->value().ToString(), + is_key_hex_, is_value_hex_) + : PrintKeyValueOrWideColumns(iter->key(), iter->value(), + iter->columns(), is_key_hex_, + is_value_hex_); + fprintf(stdout, "%s\n", str.c_str()); } } @@ -3073,47 +3080,22 @@ void ScanCommand::DoCommand() { } } - Slice key_slice = it->key(); - - std::string formatted_key; - if (is_key_hex_) { - formatted_key = "0x" + key_slice.ToString(true /* hex */); - key_slice = formatted_key; - } else if (ldb_options_.key_formatter) { - formatted_key = ldb_options_.key_formatter->Format(key_slice); - key_slice = formatted_key; - } - if (no_value_) { - fprintf(stdout, "%.*s\n", static_cast(key_slice.size()), - key_slice.data()); - // (TODO) TTL Iterator does not support wide columns yet. - } else if (is_db_ttl_ || it->columns().empty() || - (it->columns().size() == 1 && - it->columns().front().name() == kDefaultWideColumnName)) { - Slice val_slice = it->value(); - std::string formatted_value; - if (is_value_hex_) { - formatted_value = "0x" + val_slice.ToString(true /* hex */); - val_slice = formatted_value; + std::string key_str = it->key().ToString(); + if (is_key_hex_) { + key_str = StringToHex(key_str); + } else if (ldb_options_.key_formatter) { + key_str = ldb_options_.key_formatter->Format(key_str); } - fprintf(stdout, "%.*s : %.*s\n", static_cast(key_slice.size()), - key_slice.data(), static_cast(val_slice.size()), - val_slice.data()); + fprintf(stdout, "%s\n", key_str.c_str()); } else { - /* - // Sample plaintext output (first column is kDefaultWideColumnName) - key_1 : :foo attr_name1:bar attr_name2:baz - - // Sample hex output (first column is kDefaultWideColumnName) - 0x6669727374 : :0x68656C6C6F 0x617474725F6E616D6531:0x666F6F - */ - - std::ostringstream oss; - WideColumnsHelper::DumpWideColumns(it->columns(), oss, is_value_hex_); - fprintf(stdout, "%.*s : %.*s\n", static_cast(key_slice.size()), - key_slice.data(), static_cast(oss.str().length()), - oss.str().c_str()); + std::string str = is_db_ttl_ ? PrintKeyValue(it->key().ToString(), + it->value().ToString(), + is_key_hex_, is_value_hex_) + : PrintKeyValueOrWideColumns( + it->key(), it->value(), it->columns(), + is_key_hex_, is_value_hex_); + fprintf(stdout, "%s\n", str.c_str()); } num_keys_scanned++; diff --git a/tools/ldb_test.py b/tools/ldb_test.py index e243d69c0..ff39aff74 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -121,32 +121,32 @@ def testSimpleStringPutGet(self): self.assertRunOK("get x2", "y2") self.assertRunFAIL("get x3") - self.assertRunOK("scan --from=x1 --to=z", "x1 : y1\nx2 : y2") + self.assertRunOK("scan --from=x1 --to=z", "x1 ==> y1\nx2 ==> y2") self.assertRunOK("put x3 y3", "OK") - self.assertRunOK("scan --from=x1 --to=z", "x1 : y1\nx2 : y2\nx3 : y3") - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") - self.assertRunOK("scan --from=x", "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan --from=x1 --to=z", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") + self.assertRunOK("scan --from=x", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") - self.assertRunOK("scan --to=x2", "x1 : y1") - self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 : y1") - self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 : y1\nx2 : y2") + self.assertRunOK("scan --to=x2", "x1 ==> y1") + self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 ==> y1") + self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 ==> y1\nx2 ==> y2") self.assertRunOK( - "scan --from=x1 --to=z --max_keys=3", "x1 : y1\nx2 : y2\nx3 : y3" + "scan --from=x1 --to=z --max_keys=3", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3" ) self.assertRunOK( - "scan --from=x1 --to=z --max_keys=4", "x1 : y1\nx2 : y2\nx3 : y3" + "scan --from=x1 --to=z --max_keys=4", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3" ) - self.assertRunOK("scan --from=x1 --to=x2", "x1 : y1") - self.assertRunOK("scan --from=x2 --to=x4", "x2 : y2\nx3 : y3") + self.assertRunOK("scan --from=x1 --to=x2", "x1 ==> y1") + self.assertRunOK("scan --from=x2 --to=x4", "x2 ==> y2\nx3 ==> y3") self.assertRunFAIL("scan --from=x4 --to=z") # No results => FAIL self.assertRunFAIL("scan --from=x1 --to=z --max_keys=foo") - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") self.assertRunOK("delete x1", "OK") - self.assertRunOK("scan", "x2 : y2\nx3 : y3") + self.assertRunOK("scan", "x2 ==> y2\nx3 ==> y3") self.assertRunOK("delete NonExistentKey", "OK") # It is weird that GET and SCAN raise exception for @@ -171,9 +171,9 @@ def ingestExternSst(self, params, inputSst): def testStringBatchPut(self): print("Running testStringBatchPut...") self.assertRunOK("batchput x1 y1 --create_if_missing", "OK") - self.assertRunOK("scan", "x1 : y1") + self.assertRunOK("scan", "x1 ==> y1") self.assertRunOK('batchput x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK") - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 abc ==> y4 xyz") self.assertRunFAIL("batchput") self.assertRunFAIL("batchput k1") self.assertRunFAIL("batchput k1 v1 k2") @@ -183,11 +183,11 @@ def testBlobBatchPut(self): dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) self.assertRunOK("batchput x1 y1 --create_if_missing --enable_blob_files", "OK") - self.assertRunOK("scan", "x1 : y1") + self.assertRunOK("scan", "x1 ==> y1") self.assertRunOK( 'batchput --enable_blob_files x2 y2 x3 y3 "x4 abc" "y4 xyz"', "OK" ) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 abc : y4 xyz") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 abc ==> y4 xyz") blob_files = self.getBlobFiles(dbPath) self.assertTrue(len(blob_files) >= 1) @@ -278,12 +278,12 @@ def testInvalidCmdLines(self): def testHexPutGet(self): print("Running testHexPutGet...") self.assertRunOK("put a1 b1 --create_if_missing", "OK") - self.assertRunOK("scan", "a1 : b1") - self.assertRunOK("scan --hex", "0x6131 : 0x6231") + self.assertRunOK("scan", "a1 ==> b1") + self.assertRunOK("scan --hex", "0x6131 ==> 0x6231") self.assertRunFAIL("put --hex 6132 6232") self.assertRunOK("put --hex 0x6132 0x6232", "OK") - self.assertRunOK("scan --hex", "0x6131 : 0x6231\n0x6132 : 0x6232") - self.assertRunOK("scan", "a1 : b1\na2 : b2") + self.assertRunOK("scan --hex", "0x6131 ==> 0x6231\n0x6132 ==> 0x6232") + self.assertRunOK("scan", "a1 ==> b1\na2 ==> b2") self.assertRunOK("get a1", "b1") self.assertRunOK("get --hex 0x6131", "0x6231") self.assertRunOK("get a2", "b2") @@ -292,27 +292,28 @@ def testHexPutGet(self): self.assertRunOK("get --key_hex --value_hex 0x6132", "0x6232") self.assertRunOK("get --value_hex a2", "0x6232") self.assertRunOK( - "scan --key_hex --value_hex", "0x6131 : 0x6231\n0x6132 : 0x6232" + "scan --key_hex --value_hex", "0x6131 ==> 0x6231\n0x6132 ==> 0x6232" ) self.assertRunOK( - "scan --hex --from=0x6131 --to=0x6133", "0x6131 : 0x6231\n0x6132 : 0x6232" + "scan --hex --from=0x6131 --to=0x6133", + "0x6131 ==> 0x6231\n0x6132 ==> 0x6232", ) - self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 : 0x6231") - self.assertRunOK("scan --key_hex", "0x6131 : b1\n0x6132 : b2") - self.assertRunOK("scan --value_hex", "a1 : 0x6231\na2 : 0x6232") + self.assertRunOK("scan --hex --from=0x6131 --to=0x6132", "0x6131 ==> 0x6231") + self.assertRunOK("scan --key_hex", "0x6131 ==> b1\n0x6132 ==> b2") + self.assertRunOK("scan --value_hex", "a1 ==> 0x6231\na2 ==> 0x6232") self.assertRunOK("batchput --hex 0x6133 0x6233 0x6134 0x6234", "OK") - self.assertRunOK("scan", "a1 : b1\na2 : b2\na3 : b3\na4 : b4") + self.assertRunOK("scan", "a1 ==> b1\na2 ==> b2\na3 ==> b3\na4 ==> b4") self.assertRunOK("delete --hex 0x6133", "OK") - self.assertRunOK("scan", "a1 : b1\na2 : b2\na4 : b4") + self.assertRunOK("scan", "a1 ==> b1\na2 ==> b2\na4 ==> b4") self.assertRunOK("checkconsistency", "OK") def testTtlPutGet(self): print("Running testTtlPutGet...") self.assertRunOK("put a1 b1 --ttl --create_if_missing", "OK") - self.assertRunOK("scan --hex", "0x6131 : 0x6231", True) + self.assertRunOK("scan --hex", "0x6131 ==> 0x6231", True) self.assertRunOK("dump --ttl ", "a1 ==> b1", True) self.assertRunOK("dump --hex --ttl ", "0x6131 ==> 0x6231\nKeys in range: 1") - self.assertRunOK("scan --hex --ttl", "0x6131 : 0x6231") + self.assertRunOK("scan --hex --ttl", "0x6131 ==> 0x6231") self.assertRunOK("get --value_hex a1", "0x6231", True) self.assertRunOK("get --ttl a1", "b1") self.assertRunOK("put a3 b3 --create_if_missing", "OK") @@ -334,7 +335,7 @@ def testInvalidCmdLines(self): # noqa: F811 T25377293 Grandfathered in def testDumpLoad(self): print("Running testDumpLoad...") self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK") - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) # Dump and load without any additional params specified @@ -345,7 +346,7 @@ def testDumpLoad(self): self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) ) self.assertRunOKFull( - "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" ) # Dump and load in hex @@ -358,7 +359,7 @@ def testDumpLoad(self): ) ) self.assertRunOKFull( - "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" ) # Dump only a portion of the key range @@ -370,7 +371,7 @@ def testDumpLoad(self): self.assertTrue( self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) ) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2") + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2") # Dump upto max_keys rows dumpFilePath = os.path.join(self.TMP_DIR, "dump4") @@ -379,13 +380,15 @@ def testDumpLoad(self): self.assertTrue( self.loadDb("--db=%s --create_if_missing" % loadedDbPath, dumpFilePath) ) - self.assertRunOKFull("scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3") + self.assertRunOKFull( + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3" + ) # Load into an existing db, create_if_missing is not specified self.assertTrue(self.dumpDb("--db=%s" % origDbPath, dumpFilePath)) self.assertTrue(self.loadDb("--db=%s" % loadedDbPath, dumpFilePath)) self.assertRunOKFull( - "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" ) # Dump and load with WAL disabled @@ -398,7 +401,7 @@ def testDumpLoad(self): ) ) self.assertRunOKFull( - "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" ) # Dump and load with lots of extra params specified @@ -423,7 +426,7 @@ def testDumpLoad(self): ) ) self.assertRunOKFull( - "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" ) # Dump with count_only @@ -435,7 +438,7 @@ def testDumpLoad(self): ) # DB should have atleast one value for scan to work self.assertRunOKFull("put --db=%s k1 v1" % loadedDbPath, "OK") - self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 : v1") + self.assertRunOKFull("scan --db=%s" % loadedDbPath, "k1 ==> v1") # Dump command fails because of typo in params dumpFilePath = os.path.join(self.TMP_DIR, "dump8") @@ -458,7 +461,7 @@ def testDumpLoad(self): ) ) self.assertRunOKFull( - "scan --db=%s" % loadedDbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4" + "scan --db=%s" % loadedDbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" ) blob_files = self.getBlobFiles(loadedDbPath) self.assertTrue(len(blob_files) >= 1) @@ -498,26 +501,26 @@ def testMiscAdminTask(self): # These tests need to be improved; for example with asserts about # whether compaction or level reduction actually took place. self.assertRunOK("batchput --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4", "OK") - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") origDbPath = os.path.join(self.TMP_DIR, self.DB_NAME) self.assertTrue(0 == run_err_null("./ldb compact --db=%s" % origDbPath)) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") self.assertTrue( 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=2" % origDbPath) ) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") self.assertTrue( 0 == run_err_null("./ldb reduce_levels --db=%s --new_levels=3" % origDbPath) ) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") self.assertTrue( 0 == run_err_null("./ldb compact --db=%s --from=x1 --to=x3" % origDbPath) ) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") self.assertTrue( 0 @@ -525,7 +528,7 @@ def testMiscAdminTask(self): "./ldb compact --db=%s --hex --from=0x6131 --to=0x6134" % origDbPath ) ) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") # TODO(dilip): Not sure what should be passed to WAL.Currently corrupted. self.assertTrue( @@ -535,7 +538,7 @@ def testMiscAdminTask(self): % (origDbPath, os.path.join(origDbPath, "LOG")) ) ) - self.assertRunOK("scan", "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4") def testCheckConsistency(self): print("Running testCheckConsistency...") @@ -923,7 +926,9 @@ def testIngestExternalSst(self): "batchput --db=%s --create_if_missing x1 y1 x2 y2 x3 y3 x4 y4" % dbPath, "OK", ) - self.assertRunOK("scan --db=%s" % dbPath, "x1 : y1\nx2 : y2\nx3 : y3\nx4 : y4") + self.assertRunOK( + "scan --db=%s" % dbPath, "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> y4" + ) dumpFilePath = os.path.join(self.TMP_DIR, "dump1") with open(dumpFilePath, "w") as f: f.write("x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40") @@ -947,7 +952,7 @@ def testIngestExternalSst(self): ) ) self.assertRunOKFull( - "scan --db=%s" % dbPath, "x1 : y10\nx2 : y20\nx3 : y30\nx4 : y40" + "scan --db=%s" % dbPath, "x1 ==> y10\nx2 ==> y20\nx3 ==> y30\nx4 ==> y40" ) diff --git a/unreleased_history/behavior_changes/ldb_scan_command_output_change.md b/unreleased_history/behavior_changes/ldb_scan_command_output_change.md new file mode 100644 index 000000000..806abd4c0 --- /dev/null +++ b/unreleased_history/behavior_changes/ldb_scan_command_output_change.md @@ -0,0 +1 @@ +Change ldb scan command delimiter from ':' to '==>'. diff --git a/unreleased_history/new_features/wide_column_support_in_ldb.md b/unreleased_history/new_features/wide_column_support_in_ldb.md new file mode 100644 index 000000000..24e7621f6 --- /dev/null +++ b/unreleased_history/new_features/wide_column_support_in_ldb.md @@ -0,0 +1 @@ +Add wide column support to ldb commands (scan, dump, idump, dump_wal) and sst_dump tool's scan command From bd6a8340c3a2db764620e90b3ac5be173fc68a0c Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 1 Sep 2023 09:34:08 -0700 Subject: [PATCH 083/386] Fix a bug where iterator status is not checked (#11782) Summary: This happens in (Compaction)MergingIterator layer, and can cause data loss during compaction or read/scan return incorrect result Pull Request resolved: https://github.com/facebook/rocksdb/pull/11782 Reviewed By: ajkr Differential Revision: D48880575 Pulled By: cbi42 fbshipit-source-id: 2294ad284a6d653d3674bebe55380f12ee4b645b --- table/compaction_merging_iterator.cc | 1 + table/merging_iterator.cc | 8 ++++++++ .../bug_fixes/001_check_iter_status_data_loss.md | 1 + 3 files changed, 10 insertions(+) create mode 100644 unreleased_history/bug_fixes/001_check_iter_status_data_loss.md diff --git a/table/compaction_merging_iterator.cc b/table/compaction_merging_iterator.cc index 8a5c45240..98581b16d 100644 --- a/table/compaction_merging_iterator.cc +++ b/table/compaction_merging_iterator.cc @@ -329,6 +329,7 @@ void CompactionMergingIterator::FindNextVisibleKey() { assert(current->iter.status().ok()); minHeap_.replace_top(current); } else { + considerStatus(current->iter.status()); minHeap_.pop(); } if (range_tombstone_iters_[current->level]) { diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 0fa3fcd3e..ae92aa198 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -931,6 +931,7 @@ bool MergingIterator::SkipNextDeleted() { InsertRangeTombstoneToMinHeap(current->level, true /* start_key */, true /* replace_top */); } else { + // TruncatedRangeDelIterator does not have status minHeap_.pop(); } return true /* current key deleted */; @@ -988,6 +989,9 @@ bool MergingIterator::SkipNextDeleted() { if (current->iter.Valid()) { assert(current->iter.status().ok()); minHeap_.push(current); + } else { + // TODO(cbi): check status and early return if non-ok. + considerStatus(current->iter.status()); } // Invariants (rti) and (phi) if (range_tombstone_iters_[current->level] && @@ -1027,6 +1031,7 @@ bool MergingIterator::SkipNextDeleted() { if (current->iter.Valid()) { minHeap_.replace_top(current); } else { + considerStatus(current->iter.status()); minHeap_.pop(); } return true /* current key deleted */; @@ -1199,6 +1204,8 @@ bool MergingIterator::SkipPrevDeleted() { if (current->iter.Valid()) { assert(current->iter.status().ok()); maxHeap_->push(current); + } else { + considerStatus(current->iter.status()); } if (range_tombstone_iters_[current->level] && @@ -1241,6 +1248,7 @@ bool MergingIterator::SkipPrevDeleted() { if (current->iter.Valid()) { maxHeap_->replace_top(current); } else { + considerStatus(current->iter.status()); maxHeap_->pop(); } return true /* current key deleted */; diff --git a/unreleased_history/bug_fixes/001_check_iter_status_data_loss.md b/unreleased_history/bug_fixes/001_check_iter_status_data_loss.md new file mode 100644 index 000000000..1cedc7215 --- /dev/null +++ b/unreleased_history/bug_fixes/001_check_iter_status_data_loss.md @@ -0,0 +1 @@ +* Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results. \ No newline at end of file From 9bd1a6fa29651e9b9d1e64b2711726981ae8e566 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 1 Sep 2023 11:33:15 -0700 Subject: [PATCH 084/386] Fix a bug where iterator can return incorrect data for DeleteRange() users (#11786) Summary: This should only affect iterator when - user uses DeleteRange(), - An iterator from level L has a non-ok status (such non-ok status may not be caught before the bug fix in https://github.com/facebook/rocksdb/pull/11783), and - A range tombstone covers a key from level > L and triggers a reseek sets the status_ to OK in SeekImpl()/SeekPrevImpl() e.g. https://github.com/facebook/rocksdb/blob/bd6a8340c3a2db764620e90b3ac5be173fc68a0c/table/merging_iterator.cc#L801 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11786 Differential Revision: D48908830 Pulled By: cbi42 fbshipit-source-id: eb564be375af4e33dc27542eff753260186e6d5d --- table/merging_iterator.cc | 4 ++-- .../bug_fixes/010_check_more_iter_status_for_delete_range.md | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index ae92aa198..505cd76d3 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -308,6 +308,7 @@ class MergingIterator : public InternalIterator { // holds after this call, and minHeap_.top().iter points to the // first key >= target among children_ that is not covered by any range // tombstone. + status_ = Status::OK(); SeekImpl(target); FindNextVisibleKey(); @@ -321,6 +322,7 @@ class MergingIterator : public InternalIterator { void SeekForPrev(const Slice& target) override { assert(range_tombstone_iters_.empty() || range_tombstone_iters_.size() == children_.size()); + status_ = Status::OK(); SeekForPrevImpl(target); FindPrevVisibleKey(); @@ -798,7 +800,6 @@ void MergingIterator::SeekImpl(const Slice& target, size_t starting_level, active_.erase(active_.lower_bound(starting_level), active_.end()); } - status_ = Status::OK(); IterKey current_search_key; current_search_key.SetInternalKey(target, false /* copy */); // Seek target might change to some range tombstone end key, so @@ -1083,7 +1084,6 @@ void MergingIterator::SeekForPrevImpl(const Slice& target, active_.erase(active_.lower_bound(starting_level), active_.end()); } - status_ = Status::OK(); IterKey current_search_key; current_search_key.SetInternalKey(target, false /* copy */); // Seek target might change to some range tombstone end key, so diff --git a/unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md b/unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md new file mode 100644 index 000000000..3e060b658 --- /dev/null +++ b/unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md @@ -0,0 +1 @@ +* Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file. \ No newline at end of file From fe3405e80f9f8a35975077e58cef899e682510ef Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 1 Sep 2023 15:44:38 -0700 Subject: [PATCH 085/386] Automatic table sizing for HyperClockCache (AutoHCC) (#11738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: This change add an experimental next-generation HyperClockCache (HCC) with automatic sizing of the underlying hash table. Both the existing version (stable) and the new version (experimental for now) of HCC are available depending on whether an estimated average entry charge is provided in HyperClockCacheOptions. Internally, we call the two implementations AutoHyperClockCache (new) and FixedHyperClockCache (existing). The performance characteristics and much of the underlying logic are similar enough that AutoHCC is likely to make FixedHCC obsolete, and so it's best considered an evolution of the same technology or solution rather than an alternative. More specifically, both implementations share essentially the same logic for managing the state of individual entries in the cache, including metadata for reference counting and counting clocks for eviction. This metadata, which I like to call the "low-level HCC protocol," includes a read-write lock on entries, but relaxed consistency requirements on the cache (e.g. allowing rare duplication) means high-level cache operations never wait for these low-level per-entry locks. FixedHCC is fully wait-free. AutoHCC is different in how entries are indexed into an efficient hash table. AutoHCC is "essentially wait-free" as there is no pattern of typical high-level operations on a large cache that can lead to one thread waiting on another to complete some work, though it can happen in some unusual/unlucky cases, or atypical uses such as erasing specific cache keys. Table growth and entry reclamation is more complex in AutoHCC compared to FixedHCC, so uses some localized locking to manage that. AutoHCC uses linear hashing to grow the table as needed, with low latency and to a precise size. AutoHCC depends on anonymous mmap support from the OS (currently verified working on Linux, MacOS, and Windows) to allow the array underlying a hash table to grow in place without wasting resident memory on space reserved but unused. AutoHCC uses a form of chaining while FixedHCC uses open addressing and double hashing. More specifics: * In developing this PR, a rare availability bug (minor) was noticed in the existing HCC implementation of Release()+erase_if_last_ref, which is now inherited into AutoHCC. Fixing this without a performance regression will not be simple, so is left for follow-up work. * Some existing unit tests required adjustment of operational parameters or conditions to work with the new behaviors of AutoHCC. A number of bugs were found and fixed in the validation process, including getting unit tests in good working order. * Added an option to cache_bench, `-degenerate_hash_bits` for correctness stress testing described below. For this, the tool uses the reverse-engineered hash function for HCC to generate keys in which the specified number of hash bits, in critical positions, have a fixed value. Essentially each degenerate hash bit will half the number of chain heads utilized and double the average chain length. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11738 Test Plan: unit tests updated, and already added to db crash test. Also ## Correctness The code includes generous assertions to check for unexpected states, especially at destruction time, so should be able to detect critical concurrency bugs. Less serious "availability bugs" in which cache data is hidden or cleanly lost are more difficult to detect, but also less scary for data correctness (as long as performance is good and the design is sound). In average operation, the structure is extremely low stress and low contention (see next section) so stressing the corner case logic requires artificially stressing the operating conditions. First, we keep the structure small to increase the number of threads hitting the same chain or entry, and just one cache shard. Second, we artificially degrade the hashing so that chains are much longer than typical, using the new `-degenerate_hash_bits` option to cache_bench. Third, we re-create the structure from scratch frequently in order to exercise the Grow logic repeatedly and to get the benefit of the consistency checks in the structure's destructor in debug builds. For cache_bench this also means disabling the single-threaded "populate cache" step (normally used for steady state performance testing). And of course use many more threads than cores to have many preemptions. An effective test for working out bugs was this (using debug build of course): ``` while ./cache_bench -cache_type=auto_hyper_clock_cache -histograms=0 -cache_size=8000000 -threads=100 -populate_cache=0 -ops_per_thread=10000 -degenerate_hash_bits=6 -num_shard_bits=0; do :; done ``` Or even smaller cases. This setup has around 27 utilized chains, with around 35 entries each, and yield-waits more than 1 million times per second (very high contention; see next section). I have let this run for hours searching for any lingering issues. I've also run cache_bench under ASAN, UBSAN, and TSAN. ## Essentially wait free There is a counter for number of yield() calls when one thread is waiting on another. When we pre-populate the structure in a single thread, ``` ./cache_bench -cache_type=auto_hyper_clock_cache -histograms=0 -populate_cache=1 -ops_per_thread=200000 2>&1 | grep Yield ``` We see something on the order of 1 yield call per second across 16 threads, even when we load the system other other jobs (parallel compilation). With -populate_cache=0, there are more yield opportunities with parallel table growth. On an otherwise unloaded system, we still see very small (single digit) yield counts, with a chance of getting into the thousands, and getting into 10s of thousands per second during table growth phase if the system is loaded with other jobs. However, I am not worried about this if performance is still good (see next section). ## Overall performance Although cache_bench initially suggested performance very close to FixedHCC, there was a very noticeable performance hit under a db_bench setup like used in validating https://github.com/facebook/rocksdb/issues/10626. Much of the difference has been reduced by optimizing Lookup with a "naive" pass that will almost always find entries quickly, and only falling back to the careful Lookup algorithm when not found in the first pass. Setups (chosen to be sensitive to block cache performance), and compiled with USE_CLANG=1 JEMALLOC=1 PORTABLE=0 DEBUG_LEVEL=0: ``` TEST_TMPDIR=/dev/shm base/db_bench -benchmarks=fillrandom -num=30000000 -disable_wal=1 -bloom_bits=16 ``` ### No regression on FixedHCC Running before & after builds at the same time on a 48 core machine. ``` TEST_TMPDIR=/dev/shm /usr/bin/time ./db_bench -benchmarks=readrandom[-X10],block_cache_entry_stats,cache_report_problems -readonly -num=30000000 -bloom_bits=16 -cache_index_and_filter_blocks=1 -cache_size=610000000 -duration 20 -threads=24 -cache_type=fixed_hyper_clock_cache -seed=1234 ``` Before: readrandom [AVG 10 runs] : 847234 (± 8150) ops/sec; 59.2 (± 0.6) MB/sec 703MB max RSS After: readrandom [AVG 10 runs] : 851021 (± 7929) ops/sec; 59.5 (± 0.6) MB/sec 706MB max RSS Probably no material difference. ### Single-threaded performance Using `[-X2]` and `-threads=1` and `-duration=30`, running all three at the same time: lru_cache: 55100 ops/sec, then 55862 ops/sec (627MB max RSS) fixed_hyper_clock_cache: 60496 ops/sec, then 61231 ops/sec (626MB max RSS) auto_hyper_clock_cache: 47560 ops/sec, then 56081 ops/sec (626MB max RSS) So AutoHCC has more ramp-up cost in the first pass as the cache grows to the appropriate size. (In single-threaded operation, the parallelizability and per-op low latency of table growth is overall slower.) However, once up to size, its performance is comparable to LRUCache. FixedHCC's lean operations still win overall when a good estimate is available. If we look at HCC table stats, we can see that this configuration is not favorable to AutoHCC (and I have verified that other memory sizes do not yield substantially different results, until shards are under-sized for the full filters): FixedHCC: Slot occupancy stats: Overall 47% (124991/262144), Min/Max/Window = 28%/64%/500, MaxRun{Pos/Neg} = 17/22 AutoHCC: Slot occupancy stats: Overall 59% (125781/209682), Min/Max/Window = 43%/82%/500, MaxRun{Pos/Neg} = 76/16 Head occupancy stats: Overall 43% (92259/209682), Min/Max/Window = 24%/74%/500, MaxRun{Pos/Neg} = 19/26 Entries at home count: 53350 FixedHCC configuration is relatively good for speed, and not ideal for space utilization. As is typical, AutoHCC has tighter control on metadata usage (209682 x 64 bytes rather than 262144 x 64 bytes), and the higher load factor is slightly worse for speed. LRUCache also has more metadata usage, at 199680 x 96 bytes of tracked metadata (plus roughly another 10% of that untracked in the head pointers), and that metadata is subject to fragmentation. ### Parallel performance, high hit rate Now using `[-X10]` and `-threads=10`, all three at the same time lru_cache: [AVG 10 runs] : 263629 (± 1425) ops/sec; 18.4 (± 0.1) MB/sec 655MB max RSS, 97.1% cache hit rate fixed_hyper_clock_cache: [AVG 10 runs] : 479590 (± 8114) ops/sec; 33.5 (± 0.6) MB/sec 651MB max RSS, 97.1% cache hit rate auto_hyper_clock_cache: [AVG 10 runs] : 418687 (± 5915) ops/sec; 29.3 (± 0.4) MB/sec 657MB max RSS, 97.1% cache hit rate Even with just 10-way parallelism for each cache (though 30+/48 cores busy overall), LRUCache is already showing performance degradation, while AutoHCC is in the neighborhood of FixedHCC. And that brings us to the question of how AutoHCC holds up under extreme parallelism, so now independent runs with `-threads=100` (overloading 48 cores). lru_cache: 438613 ops/sec, 827MB max RSS fixed_hyper_clock_cache: 1651310 ops/sec, 812MB max RSS auto_hyper_clock_cache: 1505875 ops/sec, 821MB max RSS (Yield count: 1089 over 30s) Clearly, AutoHCC holds up extremely well under extreme parallelism, even closing some of the modest performance gap with FixedHCC. ### Parallel performance, low hit rate To get down to roughly 50% cache hit rate, we use `-cache_index_and_filter_blocks=0 -cache_size=1650000000` with `-threads=10`. Here the extra cost of running counting clock eviction, especially on the chains of AutoHCC, are evident, especially with the lower contention of cache_index_and_filter_blocks=0: lru_cache: 725231 ops/sec, 1770MB max RSS, 51.3% hit rate fixed_hyper_clock_cache: 638620 ops/sec, 1765MB max RSS, 50.2% hit rate auto_hyper_clock_cache: 541018 ops/sec, 1777MB max RSS, 50.8% hit rate Reviewed By: jowlyzhang Differential Revision: D48784755 Pulled By: pdillinger fbshipit-source-id: e79813dc087474ac427637dd282a14fa3011a6e4 --- cache/cache_bench_tool.cc | 17 + cache/cache_test.cc | 20 +- cache/clock_cache.cc | 1984 ++++++++++++++++++++++++++++++++++++- cache/clock_cache.h | 371 ++++++- cache/lru_cache_test.cc | 4 +- include/rocksdb/cache.h | 33 +- tools/db_bench_tool.cc | 9 +- 7 files changed, 2389 insertions(+), 49 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index 4c0f366a9..e33e36162 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -57,6 +57,9 @@ DEFINE_uint32(value_bytes_estimate, 0, "If > 0, overrides estimated_entry_charge or " "min_avg_entry_charge depending on cache_type."); +DEFINE_int32( + degenerate_hash_bits, 0, + "With HCC, fix this many hash bits to increase table hash collisions"); DEFINE_uint32(skew, 5, "Degree of skew in key selection. 0 = no skew"); DEFINE_bool(populate_cache, true, "Populate cache before operations"); @@ -242,6 +245,20 @@ struct KeyGen { raw = std::min(raw, rnd.Next()); } uint64_t key = FastRange64(raw, max_key); + if (FLAGS_degenerate_hash_bits) { + uint64_t key_hash = + Hash64(reinterpret_cast(&key), sizeof(key)); + // HCC uses the high 64 bits and a lower bit mask for starting probe + // location, so we fix hash bits starting at the bottom of that word. + auto hi_hash = uint64_t{0x9e3779b97f4a7c13U} ^ + (key_hash << 1 << (FLAGS_degenerate_hash_bits - 1)); + uint64_t un_hi, un_lo; + BijectiveUnhash2x64(hi_hash, key_hash, &un_hi, &un_lo); + un_lo ^= BitwiseAnd(FLAGS_seed, INT32_MAX); + EncodeFixed64(key_data, un_lo); + EncodeFixed64(key_data + 8, un_hi); + return Slice(key_data, kCacheKeySize); + } // Variable size and alignment size_t off = key % 8; key_data[0] = char{42}; diff --git a/cache/cache_test.cc b/cache/cache_test.cc index 969ab597b..f21efc47a 100644 --- a/cache/cache_test.cc +++ b/cache/cache_test.cc @@ -175,7 +175,7 @@ class LRUCacheTest : public CacheTest {}; TEST_P(CacheTest, UsageTest) { // cache is std::shared_ptr and will be automatically cleaned up. const size_t kCapacity = 100000; - auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata); + auto cache = NewCache(kCapacity, 6, false, kDontChargeCacheMetadata); auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata); ASSERT_EQ(0, cache->GetUsage()); size_t baseline_meta_usage = precise_cache->GetUsage(); @@ -193,9 +193,13 @@ TEST_P(CacheTest, UsageTest) { ASSERT_OK(precise_cache->Insert(key, value, &kDumbHelper, kv_size)); usage += kv_size; ASSERT_EQ(usage, cache->GetUsage()); - if (IsHyperClock()) { + if (GetParam() == kFixedHyperClock) { ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage()); } else { + // AutoHyperClockCache meta usage grows in proportion to lifetime + // max number of entries. LRUCache in proportion to resident number of + // entries, though there is an untracked component proportional to + // lifetime max number of entries. ASSERT_LT(usage, precise_cache->GetUsage()); } } @@ -203,7 +207,11 @@ TEST_P(CacheTest, UsageTest) { cache->EraseUnRefEntries(); precise_cache->EraseUnRefEntries(); ASSERT_EQ(0, cache->GetUsage()); - ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage()); + if (GetParam() != kAutoHyperClock) { + // NOTE: AutoHyperClockCache meta usage grows in proportion to lifetime + // max number of entries. + ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage()); + } // make sure the cache will be overloaded for (size_t i = 1; i < kCapacity; ++i) { @@ -318,7 +326,11 @@ TEST_P(CacheTest, PinnedUsageTest) { cache->EraseUnRefEntries(); precise_cache->EraseUnRefEntries(); ASSERT_EQ(0, cache->GetUsage()); - ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage()); + if (GetParam() != kAutoHyperClock) { + // NOTE: AutoHyperClockCache meta usage grows in proportion to lifetime + // max number of entries. + ASSERT_EQ(baseline_meta_usage, precise_cache->GetUsage()); + } } TEST_P(CacheTest, HitAndMiss) { diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index cdb97e296..c6a35d59f 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -13,8 +13,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -94,9 +94,32 @@ inline void Unref(const ClockHandle& h, uint64_t count = 1) { (void)old_meta; } -inline bool ClockUpdate(ClockHandle& h) { - uint64_t meta = h.meta.load(std::memory_order_relaxed); +inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { + uint64_t meta; + if (purgeable) { + assert(*purgeable == false); + // In AutoHCC, our eviction process follows the chain structure, so we + // should ensure that we see the latest state of each entry, at least for + // assertion checking. + meta = h.meta.load(std::memory_order_acquire); + } else { + // In FixedHCC, our eviction process is a simple iteration without regard + // to probing order, displacements, etc., so it doesn't matter if we see + // somewhat stale data. + meta = h.meta.load(std::memory_order_relaxed); + } + if (((meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit) == + 0) { + // Only clock update Shareable entries + if (purgeable) { + *purgeable = true; + // AutoHCC only: make sure we only attempt to update non-empty slots + assert((meta >> ClockHandle::kStateShift) & + ClockHandle::kStateOccupiedBit); + } + return false; + } uint64_t acquire_count = (meta >> ClockHandle::kAcquireCounterShift) & ClockHandle::kCounterMask; uint64_t release_count = @@ -105,10 +128,6 @@ inline bool ClockUpdate(ClockHandle& h) { // Only clock update entries with no outstanding refs return false; } - if (!((meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit)) { - // Only clock update Shareable entries - return false; - } if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) && acquire_count > 0) { // Decrement clock @@ -886,6 +905,9 @@ bool FixedHyperClockTable::Release(HandleImpl* h, bool useful, if (erase_if_last_ref || UNLIKELY(old_meta >> ClockHandle::kStateShift == ClockHandle::kStateInvisible)) { + // FIXME: There's a chance here that another thread could replace this + // entry and we end up erasing the wrong one. + // Update for last fetch_add op if (useful) { old_meta += ClockHandle::kReleaseIncrement; @@ -1463,7 +1485,7 @@ class LoadVarianceStats { "), Min/Max/Window = " + PercentStr(min_, N) + "/" + PercentStr(max_, N) + "/" + std::to_string(N) + ", MaxRun{Pos/Neg} = " + std::to_string(max_pos_run_) + "/" + - std::to_string(max_neg_run_) + "\n"; + std::to_string(max_neg_run_); } void Add(bool positive) { @@ -1498,7 +1520,11 @@ class LoadVarianceStats { std::bitset recent_; static std::string PercentStr(size_t a, size_t b) { - return std::to_string(uint64_t{100} * a / b) + "%"; + if (b == 0) { + return "??%"; + } else { + return std::to_string(uint64_t{100} * a / b) + "%"; + } } }; @@ -1613,6 +1639,1941 @@ void FixedHyperClockCache::ReportProblems( } } +// ======================================================================= +// AutoHyperClockCache +// ======================================================================= + +// See AutoHyperClockTable::length_info_ etc. for how the linear hashing +// metadata is encoded. Here are some example values: +// +// Used length | min shift | threshold | max shift +// 2 | 1 | 0 | 1 +// 3 | 1 | 1 | 2 +// 4 | 2 | 0 | 2 +// 5 | 2 | 1 | 3 +// 6 | 2 | 2 | 3 +// 7 | 2 | 3 | 3 +// 8 | 3 | 0 | 3 +// 9 | 3 | 1 | 4 +// ... +// Note: +// * min shift = floor(log2(used length)) +// * max shift = ceil(log2(used length)) +// * used length == (1 << shift) + threshold +// Also, shift=0 is never used in practice, so is reserved for "unset" + +namespace { + +inline int LengthInfoToMinShift(uint64_t length_info) { + int mask_shift = BitwiseAnd(length_info, int{255}); + assert(mask_shift <= 63); + assert(mask_shift > 0); + return mask_shift; +} + +inline size_t LengthInfoToThreshold(uint64_t length_info) { + return static_cast(length_info >> 8); +} + +inline size_t LengthInfoToUsedLength(uint64_t length_info) { + size_t threshold = LengthInfoToThreshold(length_info); + int shift = LengthInfoToMinShift(length_info); + assert(threshold < (size_t{1} << shift)); + size_t used_length = (size_t{1} << shift) + threshold; + assert(used_length >= 2); + return used_length; +} + +inline uint64_t UsedLengthToLengthInfo(size_t used_length) { + assert(used_length >= 2); + int shift = FloorLog2(used_length); + uint64_t threshold = BottomNBits(used_length, shift); + uint64_t length_info = + (uint64_t{threshold} << 8) + static_cast(shift); + assert(LengthInfoToUsedLength(length_info) == used_length); + assert(LengthInfoToMinShift(length_info) == shift); + assert(LengthInfoToThreshold(length_info) == threshold); + return length_info; +} + +inline size_t GetStartingLength(size_t capacity) { + if (capacity > port::kPageSize) { + // Start with one memory page + return port::kPageSize / sizeof(AutoHyperClockTable::HandleImpl); + } else { + // Mostly to make unit tests happy + return 4; + } +} + +inline size_t GetHomeIndex(uint64_t hash, int shift) { + return static_cast(BottomNBits(hash, shift)); +} + +inline void GetHomeIndexAndShift(uint64_t length_info, uint64_t hash, + size_t* home, int* shift) { + int min_shift = LengthInfoToMinShift(length_info); + size_t threshold = LengthInfoToThreshold(length_info); + bool extra_shift = GetHomeIndex(hash, min_shift) < threshold; + *home = GetHomeIndex(hash, min_shift + extra_shift); + *shift = min_shift + extra_shift; + assert(*home < LengthInfoToUsedLength(length_info)); +} + +inline int GetShiftFromNextWithShift(uint64_t next_with_shift) { + return BitwiseAnd(next_with_shift, + AutoHyperClockTable::HandleImpl::kShiftMask); +} + +inline size_t GetNextFromNextWithShift(uint64_t next_with_shift) { + return static_cast(next_with_shift >> + AutoHyperClockTable::HandleImpl::kNextShift); +} + +inline uint64_t MakeNextWithShift(size_t next, int shift) { + return (uint64_t{next} << AutoHyperClockTable::HandleImpl::kNextShift) | + static_cast(shift); +} + +inline uint64_t MakeNextWithShiftEnd(size_t head, int shift) { + return AutoHyperClockTable::HandleImpl::kNextEndFlags | + MakeNextWithShift(head, shift); +} + +// Helper function for Lookup +inline bool MatchAndRef(const UniqueId64x2* hashed_key, const ClockHandle& h, + int shift = 0, size_t home = 0, + bool* full_match_or_unknown = nullptr) { + // Must be at least something to match + assert(hashed_key || shift > 0); + + uint64_t old_meta; + // (Optimistically) increment acquire counter. + old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement, + std::memory_order_acquire); + // Check if it's a referencable (sharable) entry + if ((old_meta & (uint64_t{ClockHandle::kStateShareableBit} + << ClockHandle::kStateShift)) == 0) { + // For non-sharable states, incrementing the acquire counter has no effect + // so we don't need to undo it. Furthermore, we cannot safely undo + // it because we did not acquire a read reference to lock the + // entry in a Shareable state. + if (full_match_or_unknown) { + *full_match_or_unknown = true; + } + return false; + } + // Else acquired a read reference + assert(GetRefcount(old_meta + ClockHandle::kAcquireIncrement) > 0); + if (hashed_key && h.hashed_key == *hashed_key && + LIKELY(old_meta & (uint64_t{ClockHandle::kStateVisibleBit} + << ClockHandle::kStateShift))) { + // Match on full key, visible + if (full_match_or_unknown) { + *full_match_or_unknown = true; + } + return true; + } else if (shift > 0 && home == BottomNBits(h.hashed_key[1], shift)) { + // NOTE: upper 32 bits of hashed_key[0] is used for sharding + // Match on home address, possibly invisible + if (full_match_or_unknown) { + *full_match_or_unknown = false; + } + return true; + } else { + // Mismatch. Pretend we never took the reference + Unref(h); + if (full_match_or_unknown) { + *full_match_or_unknown = false; + } + return false; + } +} + +void UpgradeShiftsOnRange(AutoHyperClockTable::HandleImpl* arr, + size_t& frontier, uint64_t stop_before_or_new_tail, + int old_shift, int new_shift) { + assert(frontier != SIZE_MAX); + assert(new_shift == old_shift + 1); + (void)old_shift; + (void)new_shift; + using HandleImpl = AutoHyperClockTable::HandleImpl; + for (;;) { + uint64_t next_with_shift = + arr[frontier].chain_next_with_shift.load(std::memory_order_acquire); + assert(GetShiftFromNextWithShift(next_with_shift) == old_shift); + if (next_with_shift == stop_before_or_new_tail) { + // Stopping at entry with pointer matching "stop before" + assert(!HandleImpl::IsEnd(next_with_shift)); + // We need to keep a reference to it also to keep it stable. + return; + } + if (HandleImpl::IsEnd(next_with_shift)) { + // Also update tail to new tail + assert(HandleImpl::IsEnd(stop_before_or_new_tail)); + arr[frontier].chain_next_with_shift.store(stop_before_or_new_tail, + std::memory_order_release); + // Mark nothing left to upgrade + frontier = SIZE_MAX; + return; + } + // Next is another entry to process, so upgrade and unref and advance + // frontier + arr[frontier].chain_next_with_shift.fetch_add(1U, + std::memory_order_acq_rel); + assert(GetShiftFromNextWithShift(next_with_shift + 1) == new_shift); + frontier = GetNextFromNextWithShift(next_with_shift); + } +} + +size_t CalcOccupancyLimit(size_t used_length) { + return static_cast(used_length * AutoHyperClockTable::kMaxLoadFactor + + 0.999); +} + +} // namespace + +// An RAII wrapper for locking a chain of entries (flag bit on the head) +// so that there is only one thread allowed to remove entries from the +// chain, or to rewrite it by splitting for Grow. Without the lock, +// all lookups and insertions at the head can proceed wait-free. +// The class also provides functions for safely manipulating the head pointer +// while holding the lock--or wanting to should it become non-empty. +// +// The flag bits on the head are such that the head cannot be locked if it +// is an empty chain, so that a "blind" fetch_or will try to lock a non-empty +// chain but have no effect on an empty chain. When a potential rewrite +// operation see an empty head pointer, there is no need to lock as the +// operation is a no-op. However, there are some cases such as CAS-update +// where locking might be required after initially not being needed, if the +// operation is forced to revisit the head pointer. +class AutoHyperClockTable::ChainRewriteLock { + public: + using HandleImpl = AutoHyperClockTable::HandleImpl; + explicit ChainRewriteLock(HandleImpl* h, std::atomic& yield_count, + bool already_locked_or_end = false) + : head_ptr_(&h->head_next_with_shift) { + if (already_locked_or_end) { + new_head_ = head_ptr_->load(std::memory_order_acquire); + // already locked or end + assert(new_head_ & HandleImpl::kHeadLocked); + return; + } + Acquire(yield_count); + } + + ~ChainRewriteLock() { + if (!IsEnd()) { + // Release lock + uint64_t old = head_ptr_->fetch_and(~HandleImpl::kHeadLocked, + std::memory_order_release); + (void)old; + assert((old & HandleImpl::kNextEndFlags) == HandleImpl::kHeadLocked); + } + } + + void Reset(HandleImpl* h, std::atomic& yield_count) { + this->~ChainRewriteLock(); + new (this) ChainRewriteLock(h, yield_count); + } + + // Expected current state, assuming no parallel updates. + uint64_t GetNewHead() const { return new_head_; } + + // Only safe if we know that the value hasn't changed from other threads + void SimpleUpdate(uint64_t next_with_shift) { + assert(head_ptr_->load(std::memory_order_acquire) == new_head_); + new_head_ = next_with_shift | HandleImpl::kHeadLocked; + head_ptr_->store(new_head_, std::memory_order_release); + } + + bool CasUpdate(uint64_t next_with_shift, std::atomic& yield_count) { + uint64_t new_head = next_with_shift | HandleImpl::kHeadLocked; + uint64_t expected = GetNewHead(); + bool success = head_ptr_->compare_exchange_strong( + expected, new_head, std::memory_order_acq_rel); + if (success) { + // Ensure IsEnd() is kept up-to-date, including for dtor + new_head_ = new_head; + } else { + // Parallel update to head, such as Insert() + if (IsEnd()) { + // Didn't previously hold a lock + if (HandleImpl::IsEnd(expected)) { + // Still don't need to + new_head_ = expected; + } else { + // Need to acquire lock before proceeding + Acquire(yield_count); + } + } else { + // Parallel update must preserve our lock + assert((expected & HandleImpl::kNextEndFlags) == + HandleImpl::kHeadLocked); + new_head_ = expected; + } + } + return success; + } + + bool IsEnd() const { return HandleImpl::IsEnd(new_head_); } + + private: + void Acquire(std::atomic& yield_count) { + for (;;) { + // Acquire removal lock on the chain + uint64_t old_head = head_ptr_->fetch_or(HandleImpl::kHeadLocked, + std::memory_order_acq_rel); + if ((old_head & HandleImpl::kNextEndFlags) != HandleImpl::kHeadLocked) { + // Either acquired the lock or lock not needed (end) + assert((old_head & HandleImpl::kNextEndFlags) == 0 || + (old_head & HandleImpl::kNextEndFlags) == + HandleImpl::kNextEndFlags); + + new_head_ = old_head | HandleImpl::kHeadLocked; + break; + } + // NOTE: one of the few yield-wait loops, which is rare enough in practice + // for its performance to be insignificant. (E.g. using C++20 atomic + // wait/notify would likely be worse because of wasted notify costs.) + yield_count.fetch_add(1, std::memory_order_relaxed); + std::this_thread::yield(); + } + } + + std::atomic* head_ptr_; + uint64_t new_head_; +}; + +AutoHyperClockTable::AutoHyperClockTable( + size_t capacity, bool /*strict_capacity_limit*/, + CacheMetadataChargePolicy metadata_charge_policy, + MemoryAllocator* allocator, + const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed, + const Opts& opts) + : BaseClockTable(metadata_charge_policy, allocator, eviction_callback, + hash_seed), + array_(MemMapping::AllocateLazyZeroed( + sizeof(HandleImpl) * CalcMaxUsableLength(capacity, + opts.min_avg_value_size, + metadata_charge_policy))), + length_info_(UsedLengthToLengthInfo(GetStartingLength(capacity))), + occupancy_limit_( + CalcOccupancyLimit(LengthInfoToUsedLength(length_info_.load()))), + clock_pointer_mask_( + BottomNBits(UINT64_MAX, LengthInfoToMinShift(length_info_.load()))) { + if (metadata_charge_policy == + CacheMetadataChargePolicy::kFullChargeCacheMetadata) { + // NOTE: ignoring page boundaries for simplicity + usage_ += size_t{GetTableSize()} * sizeof(HandleImpl); + } + + static_assert(sizeof(HandleImpl) == 64U, + "Expecting size / alignment with common cache line size"); + + // Populate head pointers + uint64_t length_info = length_info_.load(); + int min_shift = LengthInfoToMinShift(length_info); + int max_shift = min_shift + 1; + size_t major = uint64_t{1} << min_shift; + size_t used_length = GetTableSize(); + + assert(major <= used_length); + assert(used_length <= major * 2); + + // Initialize the initial usable set of slots. This slightly odd iteration + // order makes it easier to get the correct shift amount on each head. + for (size_t i = 0; i < major; ++i) { +#ifndef NDEBUG + int shift; + size_t home; +#endif + if (major + i < used_length) { + array_[i].head_next_with_shift = MakeNextWithShiftEnd(i, max_shift); + array_[major + i].head_next_with_shift = + MakeNextWithShiftEnd(major + i, max_shift); +#ifndef NDEBUG // Extra invariant checking + GetHomeIndexAndShift(length_info, i, &home, &shift); + assert(home == i); + assert(shift == max_shift); + GetHomeIndexAndShift(length_info, major + i, &home, &shift); + assert(home == major + i); + assert(shift == max_shift); +#endif + } else { + array_[i].head_next_with_shift = MakeNextWithShiftEnd(i, min_shift); +#ifndef NDEBUG // Extra invariant checking + GetHomeIndexAndShift(length_info, i, &home, &shift); + assert(home == i); + assert(shift == min_shift); + GetHomeIndexAndShift(length_info, major + i, &home, &shift); + assert(home == i); + assert(shift == min_shift); +#endif + } + } +} + +AutoHyperClockTable::~AutoHyperClockTable() { + // Assumes there are no references or active operations on any slot/element + // in the table. + size_t end = GetTableSize(); +#ifndef NDEBUG + std::vector was_populated(end); + std::vector was_pointed_to(end); +#endif + for (size_t i = 0; i < end; i++) { + HandleImpl& h = array_[i]; + switch (h.meta >> ClockHandle::kStateShift) { + case ClockHandle::kStateEmpty: + // noop + break; + case ClockHandle::kStateInvisible: // rare but possible + case ClockHandle::kStateVisible: + assert(GetRefcount(h.meta) == 0); + h.FreeData(allocator_); +#ifndef NDEBUG // Extra invariant checking + usage_.fetch_sub(h.total_charge, std::memory_order_relaxed); + occupancy_.fetch_sub(1U, std::memory_order_relaxed); + was_populated[i] = true; + if (!HandleImpl::IsEnd(h.chain_next_with_shift)) { + assert((h.chain_next_with_shift & HandleImpl::kHeadLocked) == 0); + size_t next = GetNextFromNextWithShift(h.chain_next_with_shift); + assert(!was_pointed_to[next]); + was_pointed_to[next] = true; + } +#endif + break; + // otherwise + default: + assert(false); + break; + } +#ifndef NDEBUG // Extra invariant checking + if (!HandleImpl::IsEnd(h.head_next_with_shift)) { + size_t next = GetNextFromNextWithShift(h.head_next_with_shift); + assert(!was_pointed_to[next]); + was_pointed_to[next] = true; + } +#endif + } +#ifndef NDEBUG // Extra invariant checking + // This check is not perfect, but should detect most reasonable cases + // of abandonned or floating entries, etc. (A floating cycle would not + // be reported as bad.) + for (size_t i = 0; i < end; i++) { + if (was_populated[i]) { + assert(was_pointed_to[i]); + } else { + assert(!was_pointed_to[i]); + } + } +#endif + + assert(usage_.load() == 0 || + usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl)); + assert(occupancy_ == 0); +} + +size_t AutoHyperClockTable::GetTableSize() const { + return LengthInfoToUsedLength(length_info_.load(std::memory_order_acquire)); +} + +size_t AutoHyperClockTable::GetOccupancyLimit() const { + return occupancy_limit_.load(std::memory_order_acquire); +} + +void AutoHyperClockTable::StartInsert(InsertState& state) { + state.saved_length_info = length_info_.load(std::memory_order_acquire); +} + +// Because we have linked lists, bugs or even hardware errors can make it +// possible to create a cycle, which would lead to infinite loop. +// Furthermore, when we have retry cases in the code, we want to be sure +// these are not (and do not become) spin-wait loops. Given the assumption +// of quality hashing and the infeasibility of consistently recurring +// concurrent modifications to an entry or chain, we can safely bound the +// number of loop iterations in feasible operation, whether following chain +// pointers or retrying with some backtracking. A smaller limit is used for +// stress testing, to detect potential issues such as cycles or spin-waits, +// and a larger limit is used to break cycles should they occur in production. +#define CHECK_TOO_MANY_ITERATIONS(i) \ + { \ + assert(i < 0x2000); \ + if (UNLIKELY(i >= 0x8000)) { \ + std::terminate(); \ + } \ + } + +bool AutoHyperClockTable::GrowIfNeeded(size_t new_occupancy, + InsertState& state) { + // new_occupancy has taken into account other threads that are also trying + // to insert, so as soon as we see sufficient *published* usable size, we + // can declare success even if we aren't the one that grows the table. + // However, there's an awkward state where other threads own growing the + // table to sufficient usable size, but the udpated size is not yet + // published. If we wait, then that likely slows the ramp-up cache + // performance. If we unblock ourselves by ensure we grow by at least one + // slot, we could technically overshoot required size by number of parallel + // threads accessing block cache. On balance considering typical cases and + // the modest consequences of table being slightly too large, the latter + // seems preferable. + // + // So if the published occupancy limit is too small, we unblock ourselves + // by committing to growing the table by at least one slot. Also note that + // we might need to grow more than once to actually increase the occupancy + // limit (due to max load factor < 1.0) + + while (UNLIKELY(new_occupancy > + occupancy_limit_.load(std::memory_order_relaxed))) { + // At this point we commit the thread to growing unless we've reached the + // limit (returns false). + if (!Grow(state)) { + return false; + } + } + // Success (didn't need to grow, or did successfully) + return true; +} + +bool AutoHyperClockTable::Grow(InsertState& state) { + size_t used_length = LengthInfoToUsedLength(state.saved_length_info); + + // Try to take ownership of a grow slot as the first thread to set its + // head_next_with_shift to non-zero, specifically a valid empty chain + // in case that is to be the final value. + // (We don't need to be super efficient here.) + size_t grow_home = used_length; + int old_shift; + for (;; ++grow_home) { + if (grow_home >= array_.Count()) { + // Can't grow any more. + // (Tested by unit test ClockCacheTest/Limits) + return false; + } + + old_shift = FloorLog2(grow_home); + assert(old_shift >= 1); + + uint64_t empty_head = MakeNextWithShiftEnd(grow_home, old_shift + 1); + uint64_t expected_zero = HandleImpl::kUnusedMarker; + bool own = array_[grow_home].head_next_with_shift.compare_exchange_strong( + expected_zero, empty_head, std::memory_order_acq_rel); + if (own) { + break; + } else { + // Taken by another thread. Try next slot. + assert(expected_zero != 0); + } + } + // Basically, to implement https://en.wikipedia.org/wiki/Linear_hashing + // entries that belong in a new chain starting at grow_home will be + // split off from the chain starting at old_home, which is computed here. + size_t old_home = BottomNBits(grow_home, old_shift); + assert(old_home + (size_t{1} << old_shift) == grow_home); + + // Wait here to ensure any Grow operations that would directly feed into + // this one are finished, though the full waiting actually completes in + // acquiring the rewrite lock for old_home in SplitForGrow. + size_t old_old_home = BottomNBits(grow_home, old_shift - 1); + for (;;) { + uint64_t old_old_head = array_[old_old_home].head_next_with_shift.load( + std::memory_order_acquire); + if (GetShiftFromNextWithShift(old_old_head) >= old_shift) { + if ((old_old_head & HandleImpl::kNextEndFlags) != + HandleImpl::kHeadLocked) { + break; + } + } + // NOTE: one of the few yield-wait loops, which is rare enough in practice + // for its performance to be insignificant. + yield_count_.fetch_add(1, std::memory_order_relaxed); + std::this_thread::yield(); + } + + // Do the dirty work of splitting the chain, including updating heads and + // chain nexts for new shift amounts. + SplitForGrow(grow_home, old_home, old_shift); + + // length_info_ can be updated any time after the new shift amount is + // published to both heads, potentially before the end of SplitForGrow. + // But we also can't update length_info_ until the previous Grow operation + // (with grow_home := this grow_home - 1) has published the new shift amount + // to both of its heads. However, we don't want to artificially wait here + // on that Grow that is otherwise irrelevant. + // + // We could have each Grow operation advance length_info_ here as far as it + // can without waiting, by checking for updated shift on the corresponding + // old home and also stopping at an empty head value for possible grow_home. + // However, this could increase CPU cache line sharing and in 1/64 cases + // bring in an extra page from our mmap. + // + // Instead, part of the strategy is delegated to DoInsert(): + // * Here we try to bring length_info_ up to date with this grow_home as + // much as we can without waiting. It will fall short if a previous Grow + // is still between reserving the grow slot and making the first big step + // to publish the new shift amount. + // * To avoid length_info_ being perpetually out-of-date (for a small number + // of heads) after our last Grow, we do the same when Insert has to "fall + // forward" due to length_info_ being out-of-date. + CatchUpLengthInfoNoWait(grow_home); + + // Success + return true; +} + +// See call in Grow() +void AutoHyperClockTable::CatchUpLengthInfoNoWait( + size_t known_usable_grow_home) { + uint64_t current_length_info = length_info_.load(std::memory_order_acquire); + size_t published_usable_size = LengthInfoToUsedLength(current_length_info); + while (published_usable_size <= known_usable_grow_home) { + // For when published_usable_size was grow_home + size_t next_usable_size = published_usable_size + 1; + uint64_t next_length_info = UsedLengthToLengthInfo(next_usable_size); + + // known_usable_grow_home is known to be ready for Lookup/Insert with + // the new shift amount, but between that and published usable size, we + // need to check. + if (published_usable_size < known_usable_grow_home) { + int old_shift = FloorLog2(next_usable_size - 1); + size_t old_home = BottomNBits(published_usable_size, old_shift); + int shift = + GetShiftFromNextWithShift(array_[old_home].head_next_with_shift.load( + std::memory_order_acquire)); + if (shift <= old_shift) { + // Not ready + break; + } + } + // CAS update length_info_. This only moves in one direction, so if CAS + // fails, someone else made progress like we are trying, and we can just + // pick up the new value and keep going as appropriate. + if (length_info_.compare_exchange_strong( + current_length_info, next_length_info, std::memory_order_acq_rel)) { + current_length_info = next_length_info; + // Update usage_ if metadata charge policy calls for it + if (metadata_charge_policy_ == + CacheMetadataChargePolicy::kFullChargeCacheMetadata) { + // NOTE: ignoring page boundaries for simplicity + usage_.fetch_add(sizeof(HandleImpl), std::memory_order_relaxed); + } + } + published_usable_size = LengthInfoToUsedLength(current_length_info); + } + + // After updating lengh_info_ we can update occupancy_limit_, + // allowing for later operations to update it before us. + // Note: there is no std::atomic max operation, so we have to use a CAS loop + size_t old_occupancy_limit = occupancy_limit_.load(std::memory_order_acquire); + size_t new_occupancy_limit = CalcOccupancyLimit(published_usable_size); + while (old_occupancy_limit < new_occupancy_limit) { + if (occupancy_limit_.compare_exchange_weak(old_occupancy_limit, + new_occupancy_limit, + std::memory_order_acq_rel)) { + break; + } + } +} + +void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, + int old_shift) { + int new_shift = old_shift + 1; + HandleImpl* const arr = array_.Get(); + + // We implement a somewhat complicated splitting algorithm to ensure that + // entries are always wait-free visible to Lookup, without Lookup needing + // to double-check length_info_ to ensure every potentially relevant + // existing entry is seen. This works step-by-step, carefully sharing + // unmigrated parts of the chain between the source chain and the new + // destination chain. This means that Lookup might see a partially migrated + // chain so has to take that into consideration when checking that it hasn't + // "jumped off" its intended chain (due to a parallel modification to an + // "under (de)construction" entry that was found on the chain but has + // been reassigned). + // + // We use a "rewrite lock" on the source and desination chains to exclude + // removals from those, and we have a prior waiting step that ensures any Grow + // operations feeding into this one have completed. But this process does have + // to gracefully handle concurrent insertions to the head of the source chain, + // and once marked ready, the destination chain. + // + // With those considerations, the migration starts with one "big step," + // potentially with retries to deal with insertions in parallel. Part of the + // big step is to mark the two chain heads as updated with the new shift + // amount, which redirects Lookups to the appropriate new chain. + // + // After that big step that updates the heads, the rewrite lock makes it + // relatively easy to deal with the rest of the migration. Big + // simplifications come from being able to read the hashed_key of each + // entry on the chain without needing to hold a read reference, and + // from never "jumping our to another chain." Concurrent insertions only + // happen at the chain head, which is outside of what is left to migrate. + // + // A series of smaller steps finishes splitting apart the existing chain into + // two distinct chains, followed by some steps to fully commit the result. + // + // Except for trivial cases in which all entries (or remaining entries) + // on the input chain go to one output chain, there is an important invariant + // after each step of migration, including after the initial "big step": + // For each output chain, the "zero chain" (new hash bit is zero) and the + // "one chain" (new hash bit is one) we have a "frontier" entry marking the + // boundary between what has been migrated and what has not. One of the + // frontiers is along the old chain after the other, and all entries between + // them are for the same target chain as the earlier frontier. Thus, the + // chains share linked list tails starting at the latter frontier. All + // pointers from the new head locations to the frontier entries are marked + // with the new shift amount, while all pointers after the frontiers use the + // old shift amount. + // + // And after each step there is a strengthening step to reach a stronger + // invariant: the frontier earlier in the original chain is advanced to be + // immediately before the other frontier. + // + // Consider this original input chain, + // + // OldHome -Old-> A0 -Old-> B0 -Old-> A1 -Old-> C0 -Old-> OldHome(End) + // GrowHome (empty) + // + // == BIG STEP == + // The initial big step finds the first entry that will be on the each + // output chain (in this case A0 and A1). We use brackets ([]) to mark them + // as our prospective frontiers. + // + // OldHome -Old-> [A0] -Old-> B0 -Old-> [A1] -Old-> C0 -Old-> OldHome(End) + // GrowHome (empty) + // + // Next we speculatively update grow_home head to point to the first entry for + // the one chain. This will not be used by Lookup until the head at old_home + // uses the new shift amount. + // + // OldHome -Old-> [A0] -Old-> B0 -Old-> [A1] -Old-> C0 -Old-> OldHome(End) + // GrowHome --------------New------------/ + // + // Observe that if Lookup were to use the new head at GrowHome, it would be + // able to find all relevant entries. Finishing the initial big step + // requires a CAS (compare_exchange) of the OldHome head because there + // might have been parallel insertions there, in which case we roll back + // and try again. (We might need to point GrowHome head differently.) + // + // OldHome -New-> [A0] -Old-> B0 -Old-> [A1] -Old-> C0 -Old-> OldHome(End) + // GrowHome --------------New------------/ + // + // Upgrading the OldHome head pointer with the new shift amount, with a + // compare_exchange, completes the initial big step, with [A0] as zero + // chain frontier and [A1] as one chain frontier. Links before the frontiers + // use the new shift amount and links after use the old shift amount. + // == END BIG STEP== + // == STRENGTHENING == + // Zero chain frontier is advanced to [B0] (immediately before other + // frontier) by updating pointers with new shift amounts. + // + // OldHome -New-> A0 -New-> [B0] -Old-> [A1] -Old-> C0 -Old-> OldHome(End) + // GrowHome -------------New-----------/ + // + // == END STRENGTHENING == + // == SMALL STEP #1 == + // From the strong invariant state, we need to find the next entry for + // the new chain with the earlier frontier. In this case, we need to find + // the next entry for the zero chain that comes after [B0], which in this + // case is C0. This will be our next zero chain frontier, at least under + // the weak invariant. To get there, we simply update the link between + // the current two frontiers to skip over the entries irreleveant to the + // ealier frontier chain. In this case, the zero chain skips over A1. As a + // result, he other chain is now the "earlier." + // + // OldHome -New-> A0 -New-> B0 -New-> [C0] -Old-> OldHome(End) + // GrowHome -New-> [A1] ------Old-----/ + // + // == END SMALL STEP #1 == + // + // Repeating the cycle and end handling is not as interesting. + + // Acquire rewrite lock on zero chain (if it's non-empty) + ChainRewriteLock zero_head_lock(&arr[old_home], yield_count_); + // Create an RAII wrapper for one chain rewrite lock, for once it becomes + // non-empty. This head is unused by Lookup and DoInsert until the zero + // head is updated with new shift amount. + ChainRewriteLock one_head_lock(&arr[grow_home], yield_count_, + /*already_locked_or_end=*/true); + assert(one_head_lock.IsEnd()); + + // old_home will also the head of the new "zero chain" -- all entries in the + // "from" chain whose next hash bit is 0. grow_home will be head of the new + // "one chain". + + // For these, SIZE_MAX is like nullptr (unknown) + size_t zero_chain_frontier = SIZE_MAX; + size_t one_chain_frontier = SIZE_MAX; + size_t cur = SIZE_MAX; + + // Set to 0 (zero chain frontier earlier), 1 (one chain), or -1 (unknown) + int chain_frontier_first = -1; + + // Might need to retry initial update of heads + for (int i = 0;; ++i) { + CHECK_TOO_MANY_ITERATIONS(i); + assert(zero_chain_frontier == SIZE_MAX); + assert(one_chain_frontier == SIZE_MAX); + assert(cur == SIZE_MAX); + assert(chain_frontier_first == -1); + + uint64_t next_with_shift = zero_head_lock.GetNewHead(); + + // Find a single representative for each target chain, or scan the whole + // chain if some target chain has no representative. + for (;; ++i) { + CHECK_TOO_MANY_ITERATIONS(i); + + // Loop invariants + assert((chain_frontier_first < 0) == (zero_chain_frontier == SIZE_MAX && + one_chain_frontier == SIZE_MAX)); + assert((cur == SIZE_MAX) == (zero_chain_frontier == SIZE_MAX && + one_chain_frontier == SIZE_MAX)); + + assert(GetShiftFromNextWithShift(next_with_shift) == old_shift); + + // Check for end of original chain + if (HandleImpl::IsEnd(next_with_shift)) { + cur = SIZE_MAX; + break; + } + + // next_with_shift is not End + cur = GetNextFromNextWithShift(next_with_shift); + + if (BottomNBits(arr[cur].hashed_key[1], new_shift) == old_home) { + // Entry for zero chain + if (zero_chain_frontier == SIZE_MAX) { + zero_chain_frontier = cur; + if (one_chain_frontier != SIZE_MAX) { + // Ready to update heads + break; + } + // Nothing yet for one chain + chain_frontier_first = 0; + } + } else { + assert(BottomNBits(arr[cur].hashed_key[1], new_shift) == grow_home); + // Entry for one chain + if (one_chain_frontier == SIZE_MAX) { + one_chain_frontier = cur; + if (zero_chain_frontier != SIZE_MAX) { + // Ready to update heads + break; + } + // Nothing yet for zero chain + chain_frontier_first = 1; + } + } + + next_with_shift = + arr[cur].chain_next_with_shift.load(std::memory_order_acquire); + } + + // Try to update heads for initial migration info + // We only reached the end of the migrate-from chain already if one of the + // target chains will be empty. + assert((cur == SIZE_MAX) == + (zero_chain_frontier == SIZE_MAX || one_chain_frontier == SIZE_MAX)); + assert((chain_frontier_first < 0) == + (zero_chain_frontier == SIZE_MAX && one_chain_frontier == SIZE_MAX)); + + // Always update one chain's head first (safe). + one_head_lock.SimpleUpdate( + one_chain_frontier != SIZE_MAX + ? MakeNextWithShift(one_chain_frontier, new_shift) + : MakeNextWithShiftEnd(grow_home, new_shift)); + + // Make sure length_info_ hasn't been updated too early, as we're about + // to make the change that makes it safe to update (e.g. in DoInsert()) + assert(LengthInfoToUsedLength( + length_info_.load(std::memory_order_acquire)) <= grow_home); + + // Try to set zero's head. + if (zero_head_lock.CasUpdate( + zero_chain_frontier != SIZE_MAX + ? MakeNextWithShift(zero_chain_frontier, new_shift) + : MakeNextWithShiftEnd(old_home, new_shift), + yield_count_)) { + // Both heads successfully updated to new shift + break; + } else { + // Concurrent insertion. This should not happen too many times. + CHECK_TOO_MANY_ITERATIONS(i); + // The easiest solution is to restart. + zero_chain_frontier = SIZE_MAX; + one_chain_frontier = SIZE_MAX; + cur = SIZE_MAX; + chain_frontier_first = -1; + continue; + } + } + + // Except for trivial cases, we have something like + // AHome -New-> [A0] -Old-> [B0] -Old-> [C0] \ | + // BHome --------------------New------------> [A1] -Old-> ... + // And we need to upgrade as much as we can on the "first" chain + // (the one eventually pointing to the other's frontier). This will + // also finish off any case in which one of the targer chains will be empty. + if (chain_frontier_first >= 0) { + size_t& first_frontier = chain_frontier_first == 0 + ? /*&*/ zero_chain_frontier + : /*&*/ one_chain_frontier; + size_t& other_frontier = chain_frontier_first != 0 + ? /*&*/ zero_chain_frontier + : /*&*/ one_chain_frontier; + uint64_t stop_before_or_new_tail = + other_frontier != SIZE_MAX + ? /*stop before*/ MakeNextWithShift(other_frontier, old_shift) + : /*new tail*/ MakeNextWithShiftEnd( + chain_frontier_first == 0 ? old_home : grow_home, new_shift); + UpgradeShiftsOnRange(arr, first_frontier, stop_before_or_new_tail, + old_shift, new_shift); + } + + if (zero_chain_frontier == SIZE_MAX) { + // Already finished migrating + assert(one_chain_frontier == SIZE_MAX); + assert(cur == SIZE_MAX); + } else { + // Still need to migrate between two target chains + for (int i = 0;; ++i) { + CHECK_TOO_MANY_ITERATIONS(i); + // Overall loop invariants + assert(zero_chain_frontier != SIZE_MAX); + assert(one_chain_frontier != SIZE_MAX); + assert(cur != SIZE_MAX); + assert(chain_frontier_first >= 0); + size_t& first_frontier = chain_frontier_first == 0 + ? /*&*/ zero_chain_frontier + : /*&*/ one_chain_frontier; + size_t& other_frontier = chain_frontier_first != 0 + ? /*&*/ zero_chain_frontier + : /*&*/ one_chain_frontier; + assert(cur != first_frontier); + assert(GetNextFromNextWithShift( + arr[first_frontier].chain_next_with_shift.load( + std::memory_order_acquire)) == other_frontier); + + uint64_t next_with_shift = + arr[cur].chain_next_with_shift.load(std::memory_order_acquire); + + // Check for end of original chain + if (HandleImpl::IsEnd(next_with_shift)) { + // Can set upgraded tail on first chain + uint64_t first_new_tail = MakeNextWithShiftEnd( + chain_frontier_first == 0 ? old_home : grow_home, new_shift); + arr[first_frontier].chain_next_with_shift.store( + first_new_tail, std::memory_order_release); + // And upgrade remainder of other chain + uint64_t other_new_tail = MakeNextWithShiftEnd( + chain_frontier_first != 0 ? old_home : grow_home, new_shift); + UpgradeShiftsOnRange(arr, other_frontier, other_new_tail, old_shift, + new_shift); + assert(other_frontier == SIZE_MAX); // Finished + break; + } + + // next_with_shift is not End + cur = GetNextFromNextWithShift(next_with_shift); + + int target_chain; + if (BottomNBits(arr[cur].hashed_key[1], new_shift) == old_home) { + // Entry for zero chain + target_chain = 0; + } else { + assert(BottomNBits(arr[cur].hashed_key[1], new_shift) == grow_home); + // Entry for one chain + target_chain = 1; + } + if (target_chain == chain_frontier_first) { + // Found next entry to skip to on the first chain + uint64_t skip_to = MakeNextWithShift(cur, new_shift); + arr[first_frontier].chain_next_with_shift.store( + skip_to, std::memory_order_release); + first_frontier = cur; + // Upgrade other chain up to entry before that one + UpgradeShiftsOnRange(arr, other_frontier, next_with_shift, old_shift, + new_shift); + // Swap which is marked as first + chain_frontier_first = 1 - chain_frontier_first; + } else { + // Nothing to do yet, as we need to keep old generation pointers in + // place for lookups + } + } + } +} + +// Variant of PurgeImplLocked: Removes all "under (de) construction" entries +// from a chain where already holding a rewrite lock +using PurgeLockedOpData = void; +// Variant of PurgeImplLocked: Clock-updates all entries in a chain, in +// addition to functionality of PurgeLocked, where already holding a rewrite +// lock. (Caller finalizes eviction on entries added to the autovector, in part +// so that we don't hold the rewrite lock while doing potentially expensive +// callback and allocator free.) +using ClockUpdateChainLockedOpData = + autovector; + +template +void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, + ChainRewriteLock& rewrite_lock, + size_t home) { + constexpr bool kIsPurge = std::is_same_v; + constexpr bool kIsClockUpdateChain = + std::is_same_v; + + // Exactly one op specified + static_assert(kIsPurge + kIsClockUpdateChain == 1); + + HandleImpl* const arr = array_.Get(); + + uint64_t next_with_shift = rewrite_lock.GetNewHead(); + assert(!HandleImpl::IsEnd(next_with_shift)); + int home_shift = GetShiftFromNextWithShift(next_with_shift); + (void)home; + (void)home_shift; + HandleImpl* h = &arr[GetNextFromNextWithShift(next_with_shift)]; + HandleImpl* prev_to_keep = nullptr; +#ifndef NDEBUG + uint64_t prev_to_keep_next_with_shift = 0; +#endif + // Whether there are entries between h and prev_to_keep that should be + // purged from the chain. + bool pending_purge = false; + + // Walk the chain, and stitch together any entries that are still + // "shareable," possibly after clock update. prev_to_keep tells us where + // the last "stitch back to" location is (nullptr => head). + for (size_t i = 0;; ++i) { + CHECK_TOO_MANY_ITERATIONS(i); + + bool purgeable = false; + // In last iteration, h will be nullptr, to stitch together the tail of + // the chain. + if (h) { + // NOTE: holding a rewrite lock on the chain prevents any "under + // (de)construction" entries in the chain from being marked empty, which + // allows us to access the hashed_keys without holding a read ref. + assert(home == BottomNBits(h->hashed_key[1], home_shift)); + if constexpr (kIsClockUpdateChain) { + // Clock update and/or check for purgeable (under (de)construction) + if (ClockUpdate(*h, &purgeable)) { + // Remember for finishing eviction + op_data->push_back(h); + // Entries for eviction become purgeable + purgeable = true; + assert((h->meta.load(std::memory_order_acquire) >> + ClockHandle::kStateShift) & + ClockHandle::kStateOccupiedBit); + } + } else { + (void)op_data; + purgeable = ((h->meta.load(std::memory_order_acquire) >> + ClockHandle::kStateShift) & + ClockHandle::kStateShareableBit) == 0; + } + } + + if (purgeable) { + assert((h->meta.load(std::memory_order_acquire) >> + ClockHandle::kStateShift) & + ClockHandle::kStateOccupiedBit); + pending_purge = true; + } else if (pending_purge) { + if (prev_to_keep) { + // Update chain next to skip purgeable entries + assert(prev_to_keep->chain_next_with_shift.load( + std::memory_order_acquire) == prev_to_keep_next_with_shift); + prev_to_keep->chain_next_with_shift.store(next_with_shift, + std::memory_order_release); + } else if (rewrite_lock.CasUpdate(next_with_shift, yield_count_)) { + // Managed to update head without any parallel insertions + } else { + // Parallel insertion must have interfered. Need to do a purge + // from updated head to here. Since we have no prev_to_keep, there's + // no risk of duplicate clock updates to entries. Any entries already + // updated must have been evicted (purgeable) and it's OK to clock + // update any new entries just inserted in parallel. + // Can simply restart (GetNewHead() already updated from CAS failure). + next_with_shift = rewrite_lock.GetNewHead(); + assert(!HandleImpl::IsEnd(next_with_shift)); + h = &arr[GetNextFromNextWithShift(next_with_shift)]; + pending_purge = false; + assert(prev_to_keep == nullptr); + continue; + } + pending_purge = false; + prev_to_keep = h; + } else { + prev_to_keep = h; + } + + if (h == nullptr) { + // Reached end of the chain + return; + } + + // Read chain pointer + next_with_shift = h->chain_next_with_shift.load(std::memory_order_acquire); +#ifndef NDEBUG + if (prev_to_keep == h) { + prev_to_keep_next_with_shift = next_with_shift; + } +#endif + + assert(GetShiftFromNextWithShift(next_with_shift) == home_shift); + + // Check for end marker + if (HandleImpl::IsEnd(next_with_shift)) { + h = nullptr; + } else { + h = &arr[GetNextFromNextWithShift(next_with_shift)]; + assert(h != prev_to_keep); + } + } +} + +// Variant of PurgeImpl: Removes all "under (de) construction" entries in a +// chain, such that any entry with the given key must have been purged. +using PurgeOpData = const UniqueId64x2; +// Variant of PurgeImpl: Clock-updates all entries in a chain, in addition to +// purging as appropriate. (Caller finalizes eviction on entries added to the +// autovector, in part so that we don't hold the rewrite lock while doing +// potentially expensive callback and allocator free.) +using ClockUpdateChainOpData = ClockUpdateChainLockedOpData; + +template +void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { + // Early efforts to make AutoHCC fully wait-free ran into too many problems + // that needed obscure and potentially inefficient work-arounds to have a + // chance at working. + // + // The implementation settled on "essentially wait-free" which can be + // achieved by locking at the level of each probing chain and only for + // operations that might remove entries from the chain. Because parallel + // clock updates and Grow operations are ordered, contention is very rare. + // However, parallel insertions at any chain head have to be accommodated + // to keep them wait-free. + // + // This function implements Purge and ClockUpdateChain functions (see above + // OpData type definitions) as part of higher-level operations. This function + // ensures the correct chain is (eventually) covered and handles rewrite + // locking the chain. PurgeImplLocked has lower level details. + // + // In general, these operations and Grow are kept simpler by allowing eager + // purging of under (de-)construction entries. For example, an Erase + // operation might find that another thread has purged the entry from the + // chain by the time its own purge operation acquires the rewrite lock and + // proceeds. This is OK, and potentially reduces the number of lock/unlock + // cycles because empty chains are not rewrite-lockable. + + constexpr bool kIsPurge = std::is_same_v; + constexpr bool kIsClockUpdateChain = + std::is_same_v; + + // Exactly one op specified + static_assert(kIsPurge + kIsClockUpdateChain == 1); + + int home_shift = 0; + if constexpr (kIsPurge) { + // Purge callers leave home unspecified, to be determined from key + assert(home == SIZE_MAX); + GetHomeIndexAndShift(length_info_.load(std::memory_order_acquire), + (*op_data)[1], &home, &home_shift); + assert(home_shift > 0); + } else { + // Evict callers must specify home + assert(home < SIZE_MAX); + } + + HandleImpl* const arr = array_.Get(); + + // Acquire the RAII rewrite lock (if not an empty chain) + ChainRewriteLock rewrite_lock(&arr[home], yield_count_); + + int shift; + for (;;) { + shift = GetShiftFromNextWithShift(rewrite_lock.GetNewHead()); + + if constexpr (kIsPurge) { + if (shift > home_shift) { + // At head. Thus, we know the newer shift applies to us. + // Newer shift might not yet be reflected in length_info_ (an atomicity + // gap in Grow), so operate as if it is. Note that other insertions + // could happen using this shift before length_info_ is updated, and + // it's possible (though unlikely) that multiple generations of Grow + // have occurred. If shift is more than one generation ahead of + // home_shift, it's possible that not all descendent homes have + // reached the `shift` generation. Thus, we need to advance only one + // shift at a time looking for a home+head with a matching shift + // amount. + home_shift++; + home = GetHomeIndex((*op_data)[1], home_shift); + rewrite_lock.Reset(&arr[home], yield_count_); + continue; + } else { + assert(shift == home_shift); + } + } else { + assert(home_shift == 0); + home_shift = shift; + } + break; + } + + // If the chain is empty, nothing to do + if (!rewrite_lock.IsEnd()) { + if constexpr (kIsPurge) { + PurgeLockedOpData* locked_op_data{}; + PurgeImplLocked(locked_op_data, rewrite_lock, home); + } else { + PurgeImplLocked(op_data, rewrite_lock, home); + } + } +} + +AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( + const ClockHandleBasicData& proto, uint64_t initial_countdown, + bool take_ref, InsertState& state) { + size_t home; + int orig_home_shift; + GetHomeIndexAndShift(state.saved_length_info, proto.hashed_key[1], &home, + &orig_home_shift); + HandleImpl* const arr = array_.Get(); + + // We could go searching through the chain for any duplicate, but that's + // not typically helpful, except for the REDUNDANT block cache stats. + // (Inferior duplicates will age out with eviction.) However, we do skip + // insertion if the home slot already has a match (already_matches below), + // so that we keep better CPU cache locality when we can. + // + // And we can do that as part of searching for an available slot to + // insert the new entry, because our preferred location and first slot + // checked will be the home slot. + // + // As the table initially grows to size few entries will be in the same + // cache line as the chain head. However, churn in the cache relatively + // quickly improves the proportion of entries sharing that cache line with + // the chain head. Data: + // + // Initial population only: (cache_bench with -ops_per_thread=1) + // Entries at home count: 29,202 (out of 129,170 entries in 94,411 chains) + // Approximate average cache lines read to find an existing entry: + // 129.2 / 94.4 [without the heads] + // + (94.4 - 29.2) / 94.4 [the heads not included with entries] + // = 2.06 cache lines + // + // After 10 million ops: (-threads=10 -ops_per_thread=100000) + // Entries at home count: 67,556 (out of 129,359 entries in 94,756 chains) + // That's a majority of entries and more than 2/3rds of chains. + // Approximate average cache lines read to find an existing entry: + // = 1.65 cache lines + + size_t used_length = LengthInfoToUsedLength(state.saved_length_info); + assert(home < used_length); + + size_t idx = home; + bool already_matches = false; + if (!TryInsert(proto, arr[idx], initial_countdown, take_ref, + &already_matches)) { + if (already_matches) { + return nullptr; + } + + // We need to search for an available slot outside of the home. + // Linear hashing provides nice resizing but does typically mean + // that some heads (home locations) have (in expectation) twice as + // many entries mapped to them as other heads. For example if the + // usable length is 80, then heads 16-63 are (in expectation) twice + // as loaded as heads 0-15 and 64-79, which are using another hash bit. + // + // This means that if we just use linear probing (by a small constant) + // to find an available slot, part of the structure could easily fill up + // and resot to linear time operations even when the overall load factor + // is only modestly high, like 70%. Even though each slot has its own CPU + // cache line, there is likely a small locality benefit (e.g. TLB and + // paging) to iterating one by one, but obviously not with the linear + // hashing imbalance. + // + // In a traditional non-concurrent structure, we could keep a "free list" + // to ensure immediate access to an available slot, but maintaining such + // a structure could require more cross-thread coordination to ensure + // all entries are eventually available to all threads. + // + // The way we solve this problem is to use linear probing but try to + // correct for the linear hashing imbalance (when probing beyond the + // home slot). If the home is high load (minimum shift) we choose an + // alternate location, uniformly among all slots, to linear probe from. + // + // Supporting data: we can use FixedHyperClockCache to get a baseline + // of near-ideal distribution of occupied slots, with its uniform + // distribution and double hashing. + // $ ./cache_bench -cache_type=fixed_hyper_clock_cache -histograms=0 + // -cache_size=1300000000 + // ... + // Slot occupancy stats: Overall 59% (156629/262144), + // Min/Max/Window = 47%/70%/500, MaxRun{Pos/Neg} = 22/15 + // + // Now we can try various sizes between powers of two with AutoHCC to see + // how bad the MaxRun can be. + // $ for I in `seq 8 15`; do + // ./cache_bench -cache_type=auto_hyper_clock_cache -histograms=0 + // -cache_size=${I}00000000 2>&1 | grep clock_cache.cc; done + // where the worst case MaxRun was with I=11: + // Slot occupancy stats: Overall 59% (132528/221094), + // Min/Max/Window = 44%/73%/500, MaxRun{Pos/Neg} = 64/19 + // + // The large table size offers a large sample size to be confident that + // this is an acceptable level of clustering (max ~3x probe length) + // compared to no clustering. Increasing the max load factor to 0.7 + // increases the MaxRun above 100, potentially much closer to a tipping + // point. + + // TODO? remember a freed entry from eviction, possibly in thread local + + size_t start = home; + if (orig_home_shift == LengthInfoToMinShift(state.saved_length_info)) { + start = FastRange64(proto.hashed_key[0], used_length); + } + idx = start; + for (int cycles = 0;;) { + if (TryInsert(proto, arr[idx], initial_countdown, take_ref, + &already_matches)) { + break; + } + if (already_matches) { + return nullptr; + } + ++idx; + if (idx >= used_length) { + // In case the structure has grown, double-check + StartInsert(state); + used_length = LengthInfoToUsedLength(state.saved_length_info); + if (idx >= used_length) { + idx = 0; + } + } + if (idx == start) { + // Cycling back should not happen unless there is enough random churn + // in parallel that we happen to hit each slot at a time that it's + // occupied, which is really only feasible for small structures, though + // with linear probing to find empty slots, "small" here might be + // larger than for double hashing. + assert(used_length <= 256); + ++cycles; + if (cycles > 2) { + // Fall back on standalone insert in case something goes awry to + // cause this + return nullptr; + } + } + } + } + + // Now insert into chain using head pointer + uint64_t next_with_shift; + int home_shift = orig_home_shift; + + // Might need to retry + for (int i = 0;; ++i) { + CHECK_TOO_MANY_ITERATIONS(i); + next_with_shift = + arr[home].head_next_with_shift.load(std::memory_order_acquire); + int shift = GetShiftFromNextWithShift(next_with_shift); + + if (UNLIKELY(shift != home_shift)) { + // NOTE: shift increases with table growth + if (shift > home_shift) { + // Must be grow in progress or completed since reading length_info. + // Pull out one more hash bit. (See Lookup() for why we can't + // safely jump to the shift that was read.) + home_shift++; + uint64_t hash_bit_mask = uint64_t{1} << (home_shift - 1); + assert((home & hash_bit_mask) == 0); + // BEGIN leftover updates to length_info_ for Grow() + size_t grow_home = home + hash_bit_mask; + assert(arr[grow_home].head_next_with_shift.load( + std::memory_order_acquire) != HandleImpl::kUnusedMarker); + CatchUpLengthInfoNoWait(grow_home); + // END leftover updates to length_info_ for Grow() + home += proto.hashed_key[1] & hash_bit_mask; + continue; + } else { + // Should not happen because length_info_ is only updated after both + // old and new home heads are marked with new shift + assert(false); + } + } + + // Values to update to + uint64_t head_next_with_shift = MakeNextWithShift(idx, home_shift); + uint64_t chain_next_with_shift = next_with_shift; + + // Preserve the locked state in head, without propagating to chain next + // where it is meaningless (and not allowed) + if (UNLIKELY((next_with_shift & HandleImpl::kNextEndFlags) == + HandleImpl::kHeadLocked)) { + head_next_with_shift |= HandleImpl::kHeadLocked; + chain_next_with_shift &= ~HandleImpl::kHeadLocked; + } + + arr[idx].chain_next_with_shift.store(chain_next_with_shift, + std::memory_order_release); + if (arr[home].head_next_with_shift.compare_exchange_weak( + next_with_shift, head_next_with_shift, std::memory_order_acq_rel)) { + // Success + return arr + idx; + } + } +} + +AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( + const UniqueId64x2& hashed_key) { + // Lookups are wait-free with low occurrence of retries, back-tracking, + // and fallback. We do not have the benefit of holding a rewrite lock on + // the chain so must be prepared for many kinds of mayhem, most notably + // "falling off our chain" where a slot that Lookup has identified but + // has not read-referenced is removed from one chain and inserted into + // another. The full algorithm uses the following mitigation strategies to + // ensure every relevant entry inserted before this Lookup, and not yet + // evicted, is seen by Lookup, without excessive backtracking etc.: + // * Keep a known good read ref in the chain for "island hopping." When + // we observe that a concurrent write takes us off to another chain, we + // only need to fall back to our last known good read ref (most recent + // entry on the chain that is not "under construction," which is a transient + // state). We don't want to compound the CPU toil of a long chain with + // operations that might need to retry from scratch, with probability + // in proportion to chain length. + // * Only detect a chain is potentially incomplete because of a Grow in + // progress by looking at shift in the next pointer tags (rather than + // re-checking length_info_). + // * SplitForGrow, Insert, and PurgeImplLocked ensure that there are no + // transient states that might cause this full Lookup algorithm to skip over + // live entries. + + // Reading length_info_ is not strictly required for Lookup, if we were + // to increment shift sizes until we see a shift size match on the + // relevant head pointer. Thus, reading with relaxed memory order gives + // us a safe and almost always up-to-date jump into finding the correct + // home and head. + size_t home; + int home_shift; + GetHomeIndexAndShift(length_info_.load(std::memory_order_relaxed), + hashed_key[1], &home, &home_shift); + assert(home_shift > 0); + + // The full Lookup algorithm however is not great for hot path efficiency, + // because of the extra careful tracking described above. Overwhelmingly, + // we can find what we're looking for with a naive linked list traversal + // of the chain. Even if we "fall off our chain" to another, we don't + // violate memory safety. We just won't match the key we're looking for. + // And we would eventually reach an end state, possibly even experiencing a + // cycle as an entry is freed and reused during our traversal (though at + // any point in time the structure doesn't have cycles). + // + // So for hot path efficiency, we start with a naive Lookup attempt, and + // then fall back on full Lookup if we don't find the correct entry. To + // cap how much we invest into the naive Lookup, we simply cap the traversal + // length before falling back. Also, when we do fall back on full Lookup, + // we aren't paying much penalty by starting over. Much or most of the cost + // of Lookup is memory latency in following the chain pointers, and the + // naive Lookup has warmed the CPU cache for these entries, using as tight + // of a loop as possible. + + HandleImpl* const arr = array_.Get(); + uint64_t next_with_shift = arr[home].head_next_with_shift; + for (size_t i = 0; !HandleImpl::IsEnd(next_with_shift) && i < 10; ++i) { + HandleImpl* h = &arr[GetNextFromNextWithShift(next_with_shift)]; + // Attempt cheap key match without acquiring a read ref. This could give a + // false positive, which is re-checked after acquiring read ref, or false + // negative, which is re-checked in the full Lookup. + + // We need to make the reads relaxed atomic to avoid TSAN reporting + // race conditions. And we can skip the cheap key match optimization + // altogether if 64-bit atomics not supported lock-free. Also, using + // & rather than && to give more flexibility to the compiler and CPU. + if (!std::atomic::is_always_lock_free || + sizeof(std::atomic) != sizeof(uint64_t) || + (int{reinterpret_cast&>(h->hashed_key[0]) + .load(std::memory_order_relaxed) == hashed_key[0]} & + int{reinterpret_cast&>(h->hashed_key[1]) + .load(std::memory_order_relaxed) == hashed_key[1]})) { + // Increment acquire counter for definitive check + uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, + std::memory_order_acquire); + // Check if it's a referencable (sharable) entry + if (LIKELY(old_meta & (uint64_t{ClockHandle::kStateShareableBit} + << ClockHandle::kStateShift))) { + assert(GetRefcount(old_meta + ClockHandle::kAcquireIncrement) > 0); + if (LIKELY(h->hashed_key == hashed_key) && + LIKELY(old_meta & (uint64_t{ClockHandle::kStateVisibleBit} + << ClockHandle::kStateShift))) { + return h; + } else { + Unref(*h); + } + } else { + // For non-sharable states, incrementing the acquire counter has no + // effect so we don't need to undo it. Furthermore, we cannot safely + // undo it because we did not acquire a read reference to lock the entry + // in a Shareable state. + } + } + + next_with_shift = h->chain_next_with_shift.load(std::memory_order_relaxed); + } + + // If we get here, falling back on full Lookup algorithm. + HandleImpl* h = nullptr; + HandleImpl* read_ref_on_chain = nullptr; + + for (size_t i = 0;; ++i) { + CHECK_TOO_MANY_ITERATIONS(i); + // Read head or chain pointer + next_with_shift = + h ? h->chain_next_with_shift : arr[home].head_next_with_shift; + int shift = GetShiftFromNextWithShift(next_with_shift); + + // Make sure it's usable + size_t effective_home = home; + if (UNLIKELY(shift != home_shift)) { + // We have potentially gone awry somehow, but it's possible we're just + // hitting old data that is not yet completed Grow. + // NOTE: shift bits goes up with table growth. + if (shift < home_shift) { + // To avoid waiting on Grow in progress, an old shift amount needs + // to be processed as if we were still using it and (potentially + // different or the same) the old home. + // We can assert it's not too old, because each generation of Grow + // waits on its ancestor in the previous generation. + assert(shift + 1 == home_shift); + effective_home = GetHomeIndex(home, shift); + } else if (h == read_ref_on_chain) { + assert(shift > home_shift); + // At head or coming from an entry on our chain where we're holding + // a read reference. Thus, we know the newer shift applies to us. + // Newer shift might not yet be reflected in length_info_ (an atomicity + // gap in Grow), so operate as if it is. Note that other insertions + // could happen using this shift before length_info_ is updated, and + // it's possible (though unlikely) that multiple generations of Grow + // have occurred. If shift is more than one generation ahead of + // home_shift, it's possible that not all descendent homes have + // reached the `shift` generation. Thus, we need to advance only one + // shift at a time looking for a home+head with a matching shift + // amount. + home_shift++; + // Update home in case it has changed + home = GetHomeIndex(hashed_key[1], home_shift); + // This should be rare enough occurrence that it's simplest just + // to restart (TODO: improve in some cases?) + h = nullptr; + if (read_ref_on_chain) { + Unref(*read_ref_on_chain); + read_ref_on_chain = nullptr; + } + // Didn't make progress & retry + continue; + } else { + assert(shift > home_shift); + assert(h != nullptr); + // An "under (de)construction" entry has a new shift amount, which + // means we have either gotten off our chain or our home shift is out + // of date. If we revert back to saved ref, we will get updated info. + h = read_ref_on_chain; + // Didn't make progress & retry + continue; + } + } + + // Check for end marker + if (HandleImpl::IsEnd(next_with_shift)) { + // To ensure we didn't miss anything in the chain, the end marker must + // point back to the correct home. + if (LIKELY(GetNextFromNextWithShift(next_with_shift) == effective_home)) { + // Complete, clean iteration of the chain, not found. + // Clean up. + if (read_ref_on_chain) { + Unref(*read_ref_on_chain); + } + return nullptr; + } else { + // Something went awry. Revert back to a safe point (if we have it) + h = read_ref_on_chain; + // Didn't make progress & retry + continue; + } + } + + // Follow the next and check for full key match, home match, or neither + h = &arr[GetNextFromNextWithShift(next_with_shift)]; + bool full_match_or_unknown = false; + if (MatchAndRef(&hashed_key, *h, home_shift, home, + &full_match_or_unknown)) { + // Got a read ref on next (h). + // + // There is a very small chance that between getting the next pointer + // (now h) and doing MatchAndRef on it, another thread erased/evicted it + // reinserted it into the same chain, causing us to cycle back in the + // same chain and potentially see some entries again if we keep walking. + // Newly-inserted entries are inserted before older ones, so we are at + // least guaranteed not to miss anything. + // * For kIsLookup, this is ok, as it's just a transient, slight hiccup + // in performance. + // * For kIsRemove, we are careful in overwriting the next pointer. The + // replacement value comes from the next pointer on an entry that we + // exclusively own. If that entry is still connected to the chain, its + // next must be valid for the chain. If it's not still connected to the + // chain (e.g. to unblock another thread Grow op), we will either not + // find the entry to remove on the chain or the CAS attempt to replace + // the appropriate next will fail, in which case we'll try again to find + // the removal target on the chain. + // * For kIsClockUpdateChain, we essentially have a special case of + // kIsRemove, as we only need to remove entries where we have taken + // ownership of one for eviction. In rare cases, we might + // double-clock-update some entries (ok as long as it's rare). + + // With new usable read ref, can release old one if applicable + if (read_ref_on_chain) { + // Pretend we never took the reference. + Unref(*read_ref_on_chain); + } + if (full_match_or_unknown) { + // Full match. + // Update the hit bit + if (eviction_callback_) { + h->meta.fetch_or(uint64_t{1} << ClockHandle::kHitBitShift, + std::memory_order_relaxed); + } + // All done. + return h; + } else { + // Correct home location, so we are on the right chain + read_ref_on_chain = h; + } + } else { + if (full_match_or_unknown) { + // Must have been an "under construction" entry. Can safely skip it, + // but there's a chance we'll have to backtrack later + } else { + // Home mismatch! Revert back to a safe point (if we have it) + h = read_ref_on_chain; + // Didn't make progress & retry + } + } + } +} + +void AutoHyperClockTable::Remove(HandleImpl* h) { + assert((h->meta.load() >> ClockHandle::kStateShift) == + ClockHandle::kStateConstruction); + + const HandleImpl& c_h = *h; + PurgeImpl(&c_h.hashed_key); +} + +bool AutoHyperClockTable::TryEraseHandle(HandleImpl* h, bool holding_ref, + bool mark_invisible) { + uint64_t meta; + if (mark_invisible) { + // Set invisible + meta = h->meta.fetch_and( + ~(uint64_t{ClockHandle::kStateVisibleBit} << ClockHandle::kStateShift), + std::memory_order_acq_rel); + // To local variable also + meta &= + ~(uint64_t{ClockHandle::kStateVisibleBit} << ClockHandle::kStateShift); + } else { + meta = h->meta.load(std::memory_order_acquire); + } + + // Take ownership if no other refs + do { + if (GetRefcount(meta) != uint64_t{holding_ref}) { + // Not last ref at some point in time during this call + return false; + } + if ((meta & (uint64_t{ClockHandle::kStateShareableBit} + << ClockHandle::kStateShift)) == 0) { + // Someone else took ownership + return false; + } + // Note that if !holding_ref, there's a small chance that we release, + // another thread replaces this entry with another, reaches zero refs, and + // then we end up erasing that other entry. That's an acceptable risk / + // imprecision. + } while (!h->meta.compare_exchange_weak( + meta, + uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift, + std::memory_order_acquire)); + // Took ownership + // TODO? Delay freeing? + h->FreeData(allocator_); + size_t total_charge = h->total_charge; + if (UNLIKELY(h->IsStandalone())) { + // Delete detached handle + delete h; + standalone_usage_.fetch_sub(total_charge, std::memory_order_relaxed); + } else { + Remove(h); + MarkEmpty(*h); + occupancy_.fetch_sub(1U, std::memory_order_release); + } + usage_.fetch_sub(total_charge, std::memory_order_relaxed); + assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); + return true; +} + +bool AutoHyperClockTable::Release(HandleImpl* h, bool useful, + bool erase_if_last_ref) { + // In contrast with LRUCache's Release, this function won't delete the handle + // when the cache is above capacity and the reference is the last one. Space + // is only freed up by Evict/PurgeImpl (called by Insert when space + // is needed) and Erase. We do this to avoid an extra atomic read of the + // variable usage_. + + uint64_t old_meta; + if (useful) { + // Increment release counter to indicate was used + old_meta = h->meta.fetch_add(ClockHandle::kReleaseIncrement, + std::memory_order_release); + // Correct for possible (but rare) overflow + CorrectNearOverflow(old_meta, h->meta); + } else { + // Decrement acquire counter to pretend it never happened + old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement, + std::memory_order_release); + } + + assert((old_meta >> ClockHandle::kStateShift) & + ClockHandle::kStateShareableBit); + // No underflow + assert(((old_meta >> ClockHandle::kAcquireCounterShift) & + ClockHandle::kCounterMask) != + ((old_meta >> ClockHandle::kReleaseCounterShift) & + ClockHandle::kCounterMask)); + + if ((erase_if_last_ref || UNLIKELY(old_meta >> ClockHandle::kStateShift == + ClockHandle::kStateInvisible))) { + // FIXME: There's a chance here that another thread could replace this + // entry and we end up erasing the wrong one. + return TryEraseHandle(h, /*holding_ref=*/false, /*mark_invisible=*/false); + } else { + return false; + } +} + +#ifndef NDEBUG +void AutoHyperClockTable::TEST_ReleaseN(HandleImpl* h, size_t n) { + if (n > 0) { + // Do n-1 simple releases first + TEST_ReleaseNMinus1(h, n); + + // Then the last release might be more involved + Release(h, /*useful*/ true, /*erase_if_last_ref*/ false); + } +} +#endif + +void AutoHyperClockTable::Erase(const UniqueId64x2& hashed_key) { + // Don't need to be efficient. + // Might be one match masking another, so loop. + while (HandleImpl* h = Lookup(hashed_key)) { + bool gone = + TryEraseHandle(h, /*holding_ref=*/true, /*mark_invisible=*/true); + if (!gone) { + // Only marked invisible, which is ok. + // Pretend we never took the reference from Lookup. + Unref(*h); + } + } +} + +void AutoHyperClockTable::EraseUnRefEntries() { + size_t usable_size = GetTableSize(); + for (size_t i = 0; i < usable_size; i++) { + HandleImpl& h = array_[i]; + + uint64_t old_meta = h.meta.load(std::memory_order_relaxed); + if (old_meta & (uint64_t{ClockHandle::kStateShareableBit} + << ClockHandle::kStateShift) && + GetRefcount(old_meta) == 0 && + h.meta.compare_exchange_strong(old_meta, + uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift, + std::memory_order_acquire)) { + // Took ownership + h.FreeData(allocator_); + usage_.fetch_sub(h.total_charge, std::memory_order_relaxed); + // NOTE: could be more efficient with a dedicated variant of + // PurgeImpl, but this is not a common operation + Remove(&h); + MarkEmpty(h); + occupancy_.fetch_sub(1U, std::memory_order_release); + } + } +} + +void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, + EvictionData* data) { + // precondition + assert(requested_charge > 0); + + // We need the clock pointer to seemlessly "wrap around" at the end of the + // table, and to be reasonably stable under Grow operations. This is + // challenging when the linear hashing progressively opens additional + // most-significant-hash-bits in determining home locations. + + // TODO: make a tuning parameter? + // Up to 2x this number of homes will be evicted per step. In very rare + // cases, possibly more, as homes of an out-of-date generation will be + // resolved to multiple in a newer generation. + constexpr size_t step_size = 4; + + // A clock_pointer_mask_ field separate from length_info_ enables us to use + // the same mask (way of dividing up the space among evicting threads) for + // iterating over the whole structure before considering changing the mask + // at the beginning of each pass. This ensures we do not have a large portion + // of the space that receives redundant or missed clock updates. However, + // with two variables, for each update to clock_pointer_mask (< 64 ever in + // the life of the cache), there will be a brief period where concurrent + // eviction threads could use the old mask value, possibly causing redundant + // or missed clock updates for a *small* portion of the table. + size_t clock_pointer_mask = + clock_pointer_mask_.load(std::memory_order_relaxed); + + uint64_t max_clock_pointer = 0; // unset + + // TODO: consider updating during a long eviction + size_t used_length = LengthInfoToUsedLength(state.saved_length_info); + + autovector to_finish_eviction; + + // Loop until enough freed, or limit reached (see bottom of loop) + for (;;) { + // First (concurrent) increment clock pointer + uint64_t old_clock_pointer = + clock_pointer_.fetch_add(step_size, std::memory_order_relaxed); + + if (UNLIKELY((old_clock_pointer & clock_pointer_mask) == 0)) { + // Back at the beginning. See if clock_pointer_mask should be updated. + uint64_t mask = BottomNBits( + UINT64_MAX, LengthInfoToMinShift(state.saved_length_info)); + if (clock_pointer_mask != mask) { + clock_pointer_mask = static_cast(mask); + clock_pointer_mask_.store(clock_pointer_mask, + std::memory_order_relaxed); + } + } + + size_t major_step = clock_pointer_mask + 1; + assert((major_step & clock_pointer_mask) == 0); + + for (size_t base_home = old_clock_pointer & clock_pointer_mask; + base_home < used_length; base_home += major_step) { + for (size_t i = 0; i < step_size; i++) { + size_t home = base_home + i; + if (home >= used_length) { + break; + } + PurgeImpl(&to_finish_eviction, home); + } + } + + for (HandleImpl* h : to_finish_eviction) { + TrackAndReleaseEvictedEntry(h, data); + } + to_finish_eviction.clear(); + + // Loop exit conditions + if (data->freed_charge >= requested_charge) { + return; + } + + if (max_clock_pointer == 0) { + // Cap the eviction effort at this thread (along with those operating in + // parallel) circling through the whole structure kMaxCountdown times. + // In other words, this eviction run must find something/anything that is + // unreferenced at start of and during the eviction run that isn't + // reclaimed by a concurrent eviction run. + // TODO: Does HyperClockCache need kMaxCountdown + 1? + max_clock_pointer = + old_clock_pointer + + (uint64_t{ClockHandle::kMaxCountdown + 1} * major_step); + } + + if (old_clock_pointer + step_size >= max_clock_pointer) { + return; + } + } +} + +size_t AutoHyperClockTable::CalcMaxUsableLength( + size_t capacity, size_t min_avg_value_size, + CacheMetadataChargePolicy metadata_charge_policy) { + double min_avg_slot_charge = min_avg_value_size * kMaxLoadFactor; + if (metadata_charge_policy == kFullChargeCacheMetadata) { + min_avg_slot_charge += sizeof(HandleImpl); + } + assert(min_avg_slot_charge > 0.0); + size_t num_slots = + static_cast(capacity / min_avg_slot_charge + 0.999999); + + const size_t slots_per_page = port::kPageSize / sizeof(HandleImpl); + + // Round up to page size + return ((num_slots + slots_per_page - 1) / slots_per_page) * slots_per_page; +} + +namespace { +bool IsHeadNonempty(const AutoHyperClockTable::HandleImpl& h) { + return !AutoHyperClockTable::HandleImpl::IsEnd( + h.head_next_with_shift.load(std::memory_order_relaxed)); +} +bool IsEntryAtHome(const AutoHyperClockTable::HandleImpl& h, int shift, + size_t home) { + if (MatchAndRef(nullptr, h, shift, home)) { + Unref(h); + return true; + } else { + return false; + } +} +} // namespace + +void AutoHyperClockCache::ReportProblems( + const std::shared_ptr& info_log) const { + BaseHyperClockCache::ReportProblems(info_log); + + if (info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) { + LoadVarianceStats head_stats; + size_t entry_at_home_count = 0; + uint64_t yield_count = 0; + this->ForEachShard([&](const Shard* shard) { + size_t count = shard->GetTableAddressCount(); + uint64_t length_info = UsedLengthToLengthInfo(count); + for (size_t i = 0; i < count; ++i) { + const auto& h = *shard->GetTable().HandlePtr(i); + head_stats.Add(IsHeadNonempty(h)); + int shift; + size_t home; + GetHomeIndexAndShift(length_info, i, &home, &shift); + assert(home == i); + entry_at_home_count += IsEntryAtHome(h, shift, home); + } + yield_count += shard->GetTable().GetYieldCount(); + }); + ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL, + "Head occupancy stats: %s", head_stats.Report().c_str()); + ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL, + "Entries at home count: %zu", entry_at_home_count); + ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL, + "Yield count: %" PRIu64, yield_count); + } +} + } // namespace clock_cache // DEPRECATED (see public API) @@ -1640,11 +3601,6 @@ std::shared_ptr HyperClockCacheOptions::MakeSharedCache() const { } std::shared_ptr cache; if (opts.estimated_entry_charge == 0) { - // BEGIN placeholder logic to be removed - // This is sufficient to get the placeholder Auto working in unit tests - // much like the Fixed version. - opts.estimated_entry_charge = opts.min_avg_entry_charge; - // END placeholder logic to be removed cache = std::make_shared(opts); } else { cache = std::make_shared(opts); diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 65f8662a8..eb0253199 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -20,6 +20,7 @@ #include "cache/sharded_cache.h" #include "port/lang.h" #include "port/malloc.h" +#include "port/mmap.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/secondary_cache.h" @@ -39,24 +40,31 @@ class ClockCacheTest; // // Benefits // -------- -// * Fully lock free (no waits or spins) for efficiency under high concurrency +// * Lock/wait free (no waits or spins) for efficiency under high concurrency +// * Fixed version (estimated_entry_charge > 0) is fully lock/wait free +// * Automatic version (estimated_entry_charge = 0) has rare waits among +// certain insertion or erase operations that involve the same very small +// set of entries. // * Optimized for hot path reads. For concurrency control, most Lookup() and // essentially all Release() are a single atomic add operation. -// * Eviction on insertion is fully parallel and lock-free. +// * Eviction on insertion is fully parallel. // * Uses a generalized + aging variant of CLOCK eviction that might outperform // LRU in some cases. (For background, see // https://en.wikipedia.org/wiki/Page_replacement_algorithm) // // Costs // ----- -// * Hash table is not resizable (for lock-free efficiency) so capacity is not -// dynamically changeable. Rely on an estimated average value (block) size for +// * FixedHyperClockCache (estimated_entry_charge > 0) - Hash table is not +// resizable (for lock-free efficiency) so capacity is not dynamically +// changeable. Rely on an estimated average value (block) size for // space+time efficiency. (See estimated_entry_charge option details.) +// EXPERIMENTAL - This limitation is fixed in AutoHyperClockCache, activated +// with estimated_entry_charge == 0. // * Insert usually does not (but might) overwrite a previous entry associated -// with a cache key. This is OK for RocksDB uses of Cache. +// with a cache key. This is OK for RocksDB uses of Cache, though it does mess +// up our REDUNDANT block cache insertion statistics. // * Only supports keys of exactly 16 bytes, which is what RocksDB uses for -// block cache (not row cache or table cache). -// * SecondaryCache is not supported. +// block cache (but not row cache or table cache). // * Cache priorities are less aggressively enforced. Unlike LRUCache, enough // transient LOW or BOTTOM priority items can evict HIGH priority entries that // are not referenced recently (or often) enough. @@ -139,7 +147,8 @@ class ClockCacheTest; // * Empty - slot is not in use and unowned. All other metadata and data is // in an undefined state. // * Construction - slot is exclusively owned by one thread, the thread -// successfully entering this state, for populating or freeing data. +// successfully entering this state, for populating or freeing data +// (de-construction, same state marker). // * Shareable (group) - slot holds an entry with counted references for // pinning and reading, including // * Visible - slot holds an entry that can be returned by Lookup @@ -187,15 +196,19 @@ class ClockCacheTest; // know from our "redundant" stats that overwrites are very rare for the block // cache, so we should not spend much to make them effective. // -// So instead we Insert as soon as we find an empty slot in the probing -// sequence without seeing an existing (visible) entry for the same key. This -// way we only insert if we can improve the probing performance, and we don't -// need to probe beyond our insert position, assuming we are willing to let -// the previous entry for the same key die of old age (eventual eviction from -// not being used). We can reach a similar state with concurrent insertions, -// where one will pass over the other while it is "under construction." -// This temporary duplication is acceptable for RocksDB block cache because -// we know redundant insertion is rare. +// FixedHyperClockCache: Instead we Insert as soon as we find an empty slot in +// the probing sequence without seeing an existing (visible) entry for the same +// key. This way we only insert if we can improve the probing performance, and +// we don't need to probe beyond our insert position, assuming we are willing +// to let the previous entry for the same key die of old age (eventual eviction +// from not being used). We can reach a similar state with concurrent +// insertions, where one will pass over the other while it is "under +// construction." This temporary duplication is acceptable for RocksDB block +// cache because we know redundant insertion is rare. +// AutoHyperClockCache: Similar, except we only notice and return an existing +// match if it is found in the search for a suitable empty slot (starting with +// the same slot as the head pointer), not by following the existing chain of +// entries. Insertions are always made to the head of the chain. // // Another problem to solve is what to return to the caller when we find an // existing entry whose probing position we cannot improve on, or when the @@ -322,7 +335,6 @@ struct ClockHandle : public ClockHandleBasicData { // For setting the hit bit static constexpr uint8_t kHitBitShift = 2U * kCounterNumBits; static constexpr uint64_t kHitBitMask = uint64_t{1} << kHitBitShift; - ; // For reading or updating the state marker in meta word static constexpr uint8_t kStateShift = kHitBitShift + 1; @@ -395,6 +407,8 @@ class BaseClockTable { uint32_t GetHashSeed() const { return hash_seed_; } + uint64_t GetYieldCount() const { return yield_count_.load(); } + struct EvictionData { size_t freed_charge = 0; size_t freed_count = 0; @@ -448,6 +462,9 @@ class BaseClockTable { // Clock algorithm sweep pointer. std::atomic clock_pointer_{}; + // Counter for number of times we yield to wait on another thread. + std::atomic yield_count_{}; + // TODO: is this separation needed if we don't do background evictions? ALIGN_AS(CACHE_LINE_SIZE) // Number of elements in the table. @@ -472,6 +489,10 @@ class BaseClockTable { const uint32_t& hash_seed_; }; +// Hash table for cache entries with size determined at creation time. +// Uses open addressing and double hashing. Since entries cannot be moved, +// the "displacements" count ensures probing sequences find entries even when +// entries earlier in the probing sequence have been removed. class FixedHyperClockTable : public BaseClockTable { public: // Target size to be exactly a common cache line size (see static_assert in @@ -626,11 +647,313 @@ class FixedHyperClockTable : public BaseClockTable { const std::unique_ptr array_; }; // class FixedHyperClockTable -// Placeholder for future automatic table variant -// For now, just use FixedHyperClockTable. -class AutoHyperClockTable : public FixedHyperClockTable { +// Hash table for cache entries that resizes automatically based on occupancy. +// However, it depends on a contiguous memory region to grow into +// incrementally, using linear hashing, so uses an anonymous mmap so that +// only the used portion of the memory region is mapped to physical memory +// (part of RSS). +// +// This table implementation uses the same "low-level protocol" for managing +// the contens of an entry slot as FixedHyperClockTable does, captured in the +// ClockHandle struct. The provides most of the essential data safety, but +// AutoHyperClockTable is another "high-level protocol" for organizing entries +// into a hash table, with automatic resizing. +// +// This implementation is not fully wait-free but we can call it "essentially +// wait-free," and here's why. First, like FixedHyperClockCache, there is no +// locking nor other forms of waiting at the cache or shard level. Also like +// FixedHCC there is essentially an entry-level read-write lock implemented +// with atomics, but our relaxed atomicity/consistency guarantees (e.g. +// duplicate inserts are possible) mean we do not need to wait for entry +// locking. Lookups, non-erasing Releases, and non-evicting non-growing Inserts +// are all fully wait-free. Of course, these waits are not dependent on any +// external factors such as I/O. +// +// For operations that remove entries from a chain or grow the table by +// splitting a chain, there is a chain-level locking mechanism that we call a +// "rewrite" lock, and the only waits are for these locks. On average, each +// chain lock is relevant to < 2 entries each. (The average would be less than +// one entry each, but we do not lock when there's no entry to remove or +// migrate.) And a given thread can only hold two such chain locks at a time, +// more typically just one. So in that sense alone, the waiting that does exist +// is very localized. +// +// If we look closer at the operations utilizing that locking mechanism, we +// can see why it's "essentially wait-free." +// * Grow operations to increase the size of the table: each operation splits +// an existing chain into two, and chains for splitting are chosen in table +// order. Grow operations are fully parallel except for the chain locking, but +// for one Grow operation to wait on another, it has to be feeding into the +// other, which means the table has doubled in size already from other Grow +// operations without the original one finishing. So Grow operations are very +// low latency (unlike LRUCache doubling the table size in one operation) and +// very parallelizeable. (We use some tricks to break up dependencies in +// updating metadata on the usable size of the table.) And obviously Grow +// operations are very rare after the initial population of the table. +// * Evict operations (part of many Inserts): clock updates and evictions +// sweep through the structure in table order, so like Grow operations, +// parallel Evict can only wait on each other if an Evict has lingered (slept) +// long enough that the clock pointer has wrapped around the entire structure. +// * Random erasures (Erase, Release with erase_if_last_ref, etc.): these +// operations are rare and not really considered performance critical. +// Currently they're mostly used for removing placeholder cache entries, e.g. +// for memory tracking, though that could use standalone entries instead to +// avoid potential contention in table operations. It's possible that future +// enhancements could pro-actively remove cache entries from obsolete files, +// but that's not yet implemented. +class AutoHyperClockTable : public BaseClockTable { public: - using FixedHyperClockTable::FixedHyperClockTable; + // Target size to be exactly a common cache line size (see static_assert in + // clock_cache.cc) + struct ALIGN_AS(64U) HandleImpl : public ClockHandle { + // To orgainize AutoHyperClockTable entries into a hash table while + // allowing the table size to grow without existing entries being moved, + // a version of chaining is used. Rather than being heap allocated (and + // incurring overheads to ensure memory safety) entries must go into + // Handles ("slots") in the pre-allocated array. To improve CPU cache + // locality, the chain head pointers are interleved with the entries; + // specifically, a Handle contains + // * A head pointer for a chain of entries with this "home" location. + // * A ClockHandle, for an entry that may or may not be in the chain + // starting from that head (but for performance ideally is on that + // chain). + // * A next pointer for the continuation of the chain containing this + // entry. + // + // The pointers are not raw pointers, but are indices into the array, + // and are decorated in two ways to help detect and recover from + // relevant concurrent modifications during Lookup, so that Lookup is + // fully wait-free: + // * Each "with_shift" pointer contains a shift count that indicates + // how many hash bits were used in chosing the home address for the + // chain--specifically the next entry in the chain. + // * The end of a chain is given a special "end" marker and refers back + // to the head of the chain. + // + // Why do we need shift on each pointer? To make Lookup wait-free, we need + // to be able to query a chain without missing anything, and preferably + // avoid synchronously double-checking the length_info. Without the shifts, + // there is a risk that we start down a chain and while paused on an entry + // that goes to a new home, we then follow the rest of the + // partially-migrated chain to see the shared ending with the old home, but + // for a time were following the chain for the new home, missing some + // entries for the old home. + // + // Why do we need the end of the chain to loop back? If Lookup pauses + // at an "under construction" entry, and sees that "next" is null after + // waking up, we need something to tell whether the "under construction" + // entry was freed and reused for another chain. Otherwise, we could + // miss entries still on the original chain due in the presence of a + // concurrent modification. Until an entry is fully erased from a chain, + // it is normal to see "under construction" entries on the chain, and it + // is not safe to read their hashed key without either a read reference + // on the entry or a rewrite lock on the chain. + + // Marker in a "with_shift" head pointer for some thread owning writes + // to the chain structure (except for inserts), but only if not an + // "end" pointer. Also called the "rewrite lock." + static constexpr uint64_t kHeadLocked = uint64_t{1} << 7; + + // Marker in a "with_shift" pointer for the end of a chain. Must also + // point back to the head of the chain (with end marker removed). + // Also includes the "locked" bit so that attempting to lock an empty + // chain has no effect (not needed, as the lock is only needed for + // removals). + static constexpr uint64_t kNextEndFlags = (uint64_t{1} << 6) | kHeadLocked; + + static inline bool IsEnd(uint64_t next_with_shift) { + // Assuming certain values never used, suffices to check this one bit + constexpr auto kCheckBit = kNextEndFlags ^ kHeadLocked; + return next_with_shift & kCheckBit; + } + + // Bottom bits to right shift away to get an array index from a + // "with_shift" pointer. + static constexpr int kNextShift = 8; + + // A bit mask for the "shift" associated with each "with_shift" pointer. + // Always bottommost bits. + static constexpr int kShiftMask = 63; + + // A marker for head_next_with_shift that indicates this HandleImpl is + // heap allocated (standalone) rather than in the table. + static constexpr uint64_t kStandaloneMarker = UINT64_MAX; + + // A marker for head_next_with_shift indicating the head is not yet part + // of the usable table, or for chain_next_with_shift indicating that the + // entry is not present or is not yet part of a chain (must not be + // "shareable" state). + static constexpr uint64_t kUnusedMarker = 0; + + // See above. The head pointer is logically independent of the rest of + // the entry, including the chain next pointer. + std::atomic head_next_with_shift{kUnusedMarker}; + std::atomic chain_next_with_shift{kUnusedMarker}; + + // For supporting CreateStandalone and some fallback cases. + inline bool IsStandalone() const { + return head_next_with_shift.load(std::memory_order_acquire) == + kStandaloneMarker; + } + + inline void SetStandalone() { + head_next_with_shift.store(kStandaloneMarker, std::memory_order_release); + } + }; // struct HandleImpl + + struct Opts { + explicit Opts(size_t _min_avg_value_size) + : min_avg_value_size(_min_avg_value_size) {} + + explicit Opts(const HyperClockCacheOptions& opts) { + assert(opts.estimated_entry_charge == 0); + min_avg_value_size = opts.min_avg_entry_charge; + } + size_t min_avg_value_size; + }; + + AutoHyperClockTable(size_t capacity, bool strict_capacity_limit, + CacheMetadataChargePolicy metadata_charge_policy, + MemoryAllocator* allocator, + const Cache::EvictionCallback* eviction_callback, + const uint32_t* hash_seed, const Opts& opts); + ~AutoHyperClockTable(); + + // For BaseClockTable::Insert + struct InsertState { + uint64_t saved_length_info = 0; + }; + + void StartInsert(InsertState& state); + + // Does initial check for whether there's hash table room for another + // inserted entry, possibly growing if needed. Returns true iff (after + // the call) there is room for the proposed number of entries. + bool GrowIfNeeded(size_t new_occupancy, InsertState& state); + + HandleImpl* DoInsert(const ClockHandleBasicData& proto, + uint64_t initial_countdown, bool take_ref, + InsertState& state); + + // Runs the clock eviction algorithm trying to reclaim at least + // requested_charge. Returns how much is evicted, which could be less + // if it appears impossible to evict the requested amount without blocking. + void Evict(size_t requested_charge, InsertState& state, EvictionData* data); + + HandleImpl* Lookup(const UniqueId64x2& hashed_key); + + bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref); + + void Erase(const UniqueId64x2& hashed_key); + + void EraseUnRefEntries(); + + size_t GetTableSize() const; + + size_t GetOccupancyLimit() const; + + const HandleImpl* HandlePtr(size_t idx) const { return &array_[idx]; } + +#ifndef NDEBUG + size_t& TEST_MutableOccupancyLimit() { + return *reinterpret_cast(&occupancy_limit_); + } + + // Release N references + void TEST_ReleaseN(HandleImpl* handle, size_t n); +#endif + + // Maximum ratio of number of occupied slots to number of usable slots. The + // actual load factor should float pretty close to this number, which should + // be a nice space/time trade-off, though large swings in WriteBufferManager + // memory could lead to low (but very much safe) load factors (only after + // seeing high load factors). Linear hashing along with (modified) linear + // probing to find an available slot increases potential risks of high + // load factors, so are disallowed. + static constexpr double kMaxLoadFactor = 0.60; + + private: // functions + // Returns true iff increased usable length. Due to load factor + // considerations, GrowIfNeeded might call this more than once to make room + // for one more entry. + bool Grow(InsertState& state); + + // Operational details of splitting a chain into two for Grow(). + void SplitForGrow(size_t grow_home, size_t old_home, int old_shift); + + // Takes an "under construction" entry and ensures it is no longer connected + // to its home chain (in preparaion for completing erasure and freeing the + // slot). Note that previous operations might have already noticed it being + // "under (de)construction" and removed it from its chain. + void Remove(HandleImpl* h); + + // Try to take ownership of an entry and erase+remove it from the table. + // Returns true if successful. Could fail if + // * There are other references to the entry + // * Some other thread has exclusive ownership or has freed it. + bool TryEraseHandle(HandleImpl* h, bool holding_ref, bool mark_invisible); + + // Calculates the appropriate maximum table size, for creating the memory + // mapping. + static size_t CalcMaxUsableLength( + size_t capacity, size_t min_avg_value_size, + CacheMetadataChargePolicy metadata_charge_policy); + + // Shared helper function that implements removing entries from a chain + // with proper handling to ensure all existing data is seen even in the + // presence of concurrent insertions, etc. (See implementation.) + template + void PurgeImpl(OpData* op_data, size_t home = SIZE_MAX); + + // An RAII wrapper for locking a chain of entries for removals. See + // implementation. + class ChainRewriteLock; + + // Helper function for PurgeImpl while holding a ChainRewriteLock. See + // implementation. + template + void PurgeImplLocked(OpData* op_data, ChainRewriteLock& rewrite_lock, + size_t home); + + // Update length_info_ as much as possible without waiting, given a known + // usable (ready for inserts and lookups) grow_home. (Previous grow_homes + // might not be usable yet, but we can check if they are by looking at + // the corresponding old home.) + void CatchUpLengthInfoNoWait(size_t known_usable_grow_home); + + private: // data + // mmaped area holding handles + const TypedMemMapping array_; + + // Metadata for table size under linear hashing. + // + // Lowest 8 bits are the minimum number of lowest hash bits to use + // ("min shift"). The upper 56 bits are a threshold. If that minumum number + // of bits taken from a hash value is < this threshold, then one more bit of + // hash value is taken and used. + // + // Other mechanisms (shift amounts on pointers) ensure complete availability + // of data already in the table even if a reader only sees a completely + // out-of-date version of this value. In the worst case, it could take + // log time to find the correct chain, but normally this value enables + // readers to find the correct chain on the first try. + // + // NOTES: length_info_ is only updated at the end of a Grow operation, + // so that waiting in Grow operations isn't done while entries are pinned + // for internal operation purposes. Thus, Lookup and Insert have to + // detect and support cases where length_info hasn't caught up to updated + // chains. Winning grow thread is the one that transitions + // head_next_with_shift from zeros. Grow threads can spin/yield wait for + // preconditions and postconditions to be met. + std::atomic length_info_; + + // An already-computed version of the usable length times the max load + // factor. Could be slightly out of date but GrowIfNeeded()/Grow() handle + // that internally. + std::atomic occupancy_limit_; + + // See explanation in AutoHyperClockTable::Evict + std::atomic clock_pointer_mask_; }; // class AutoHyperClockTable // A single shard of sharded cache. @@ -785,7 +1108,6 @@ class FixedHyperClockCache const std::shared_ptr& /*info_log*/) const override; }; // class FixedHyperClockCache -// Placeholder for future automatic HCC variant class AutoHyperClockCache #ifdef NDEBUG final @@ -795,6 +1117,9 @@ class AutoHyperClockCache using BaseHyperClockCache::BaseHyperClockCache; const char* Name() const override { return "AutoHyperClockCache"; } + + void ReportProblems( + const std::shared_ptr& /*info_log*/) const override; }; // class AutoHyperClockCache } // namespace clock_cache diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index d00e63055..047f5b80b 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -534,7 +534,7 @@ TYPED_TEST(ClockCacheTest, Limits) { // (Cleverly using mostly zero-charge entries, but some non-zero to // verify usage tracking on detached entries.) { - size_t n = shard.GetTableAddressCount() + 1; + size_t n = kCapacity * 5 + 1; std::unique_ptr ha { new HandleImpl* [n] {} }; Status s; for (size_t i = 0; i < n && s.ok(); ++i) { @@ -560,6 +560,8 @@ TYPED_TEST(ClockCacheTest, Limits) { EXPECT_OK(s); } + EXPECT_EQ(shard.GetOccupancyCount(), shard.GetOccupancyLimit()); + // Regardless, we didn't allow table to actually get full EXPECT_LT(shard.GetOccupancyCount(), shard.GetTableAddressCount()); diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 2891c9d47..a85595e4f 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -375,7 +375,8 @@ inline std::shared_ptr NewCompressedSecondaryCache( // compatible with HyperClockCache. // * Requires an extra tuning parameter: see estimated_entry_charge below. // Similarly, substantially changing the capacity with SetCapacity could -// harm efficiency. +// harm efficiency. -> EXPERIMENTAL: the tuning parameter can be set to 0 +// to find the appropriate balance automatically. // * Cache priorities are less aggressively enforced, which could cause // cache dilution from long range scans (unless they use fill_cache=false). // * Can be worse for small caches, because if almost all of a cache shard is @@ -384,10 +385,16 @@ inline std::shared_ptr NewCompressedSecondaryCache( // // See internal cache/clock_cache.h for full description. struct HyperClockCacheOptions : public ShardedCacheOptions { - // The estimated average `charge` associated with cache entries. This is a - // critical configuration parameter for good performance from the hyper - // cache, because having a table size that is fixed at creation time greatly - // reduces the required synchronization between threads. + // The estimated average `charge` associated with cache entries. + // + // EXPERIMENTAL: the field can be set to 0 to size the table dynamically + // and automatically. See also min_avg_entry_charge. This feature requires + // platform support for lazy anonymous memory mappings (incl Linux, Windows). + // Performance is very similar to choosing the best configuration parameter. + // + // PRODUCTION-TESTED: This is a critical configuration parameter for good + // performance, because having a table size that is fixed at creation time + // greatly reduces the required synchronization between threads. // * If the estimate is substantially too low (e.g. less than half the true // average) then metadata space overhead with be substantially higher (e.g. // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this @@ -416,7 +423,21 @@ struct HyperClockCacheOptions : public ShardedCacheOptions { // to estimate toward the lower side than the higher side. size_t estimated_entry_charge; - // FOR A FUTURE FEATURE (NOT YET USED) + // EXPERIMENTAL: When estimated_entry_charge == 0, this parameter establishes + // a promised lower bound on the average charge of all entries in the table, + // which is roughly the average uncompressed SST block size of block cache + // entries, typically > 4KB. The default should generally suffice with almost + // no cost. (This option is ignored for estimated_entry_charge > 0.) + // + // More detail: The table for indexing cache entries will grow automatically + // as needed, but a hard upper bound on that size is needed at creation time. + // The reason is that a contiguous memory mapping for the maximum size is + // created, but memory pages are only mapped to physical (RSS) memory as + // needed. If the average charge of all entries in the table falls below + // this value, the table will operate below its full logical capacity (total + // memory usage) because it has reached its physical capacity for efficiently + // indexing entries. The hash table is never allowed to exceed a certain safe + // load factor for efficient Lookup, Insert, etc. size_t min_avg_entry_charge = 450; HyperClockCacheOptions( diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 996650f20..fe9ba9b11 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -3606,6 +3606,8 @@ class Benchmark { } else if (name == "block_cache_entry_stats") { // DB::Properties::kBlockCacheEntryStats PrintStats("rocksdb.block-cache-entry-stats"); + } else if (name == "cache_report_problems") { + CacheReportProblems(); } else if (name == "stats") { PrintStats("rocksdb.stats"); } else if (name == "resetstats") { @@ -4168,7 +4170,7 @@ class Benchmark { } } if (FLAGS_use_stderr_info_logger) { - options.info_log.reset(new StderrLogger()); + options.info_log = std::make_shared(); } options.memtable_huge_page_size = FLAGS_memtable_use_huge_page ? 2048 : 0; options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_bloom_size_ratio; @@ -8302,6 +8304,11 @@ class Benchmark { } } + void CacheReportProblems() { + auto debug_logger = std::make_shared(DEBUG_LEVEL); + cache_->ReportProblems(debug_logger); + } + void PrintStats(const char* key) { if (db_.db != nullptr) { PrintStats(db_.db, key, false); From f66df58b9e16f6f39d4ebc46554db9b7103db2d1 Mon Sep 17 00:00:00 2001 From: hulk Date: Fri, 1 Sep 2023 23:39:41 -0700 Subject: [PATCH 086/386] Add Apache Kvrocks RocksDB use case in USERS.md (#11779) Summary: [Apache Kvrocks](https://github.com/apache/kvrocks) is an open-source distributed key-value NoSQL database built on top of RocksDB. It serves as a cost-saving and capacity-increasing alternative drop-in replacement for Redis Pull Request resolved: https://github.com/facebook/rocksdb/pull/11779 Reviewed By: ajkr Differential Revision: D48872257 Pulled By: cbi42 fbshipit-source-id: 507f67d69b826607a1464a22ec7c60abe11c5124 --- USERS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/USERS.md b/USERS.md index 43c9cac1e..ac91f7ede 100644 --- a/USERS.md +++ b/USERS.md @@ -161,5 +161,9 @@ LzLabs is using RocksDB as a storage engine in their multi-database distributed ## Solana Labs [Solana](https://github.com/solana-labs/solana) is a fast, secure, scalable, and decentralized blockchain. It uses RocksDB as the underlying storage for its ledger store. +## Apache Kvrocks + +[Apache Kvrocks](https://github.com/apache/kvrocks) is an open-source distributed key-value NoSQL database built on top of RocksDB. It serves as a cost-saving and capacity-increasing alternative drop-in replacement for Redis. + ## Others More databases using RocksDB can be found at [dbdb.io](https://dbdb.io/browse?embeds=rocksdb). From d01b1215bd8b879b07bca33d5668cf1fc621c7f7 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 4 Sep 2023 10:59:16 -0700 Subject: [PATCH 087/386] Fix TSAN reports on AutoHCC (#11792) Summary: Forgot to run TSAN test on latest revision of https://github.com/facebook/rocksdb/issues/11738 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11792 Test Plan: Use cache_bench to reproduce TSAN errors and observe fix Reviewed By: ajkr Differential Revision: D48953196 Pulled By: pdillinger fbshipit-source-id: 9e358b4768d8ddde86f84b451863263f661d7b80 --- cache/clock_cache.cc | 15 +++++---------- port/lang.h | 7 +++++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index c6a35d59f..caa7db2f4 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -3091,16 +3091,11 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( // false positive, which is re-checked after acquiring read ref, or false // negative, which is re-checked in the full Lookup. - // We need to make the reads relaxed atomic to avoid TSAN reporting - // race conditions. And we can skip the cheap key match optimization - // altogether if 64-bit atomics not supported lock-free. Also, using - // & rather than && to give more flexibility to the compiler and CPU. - if (!std::atomic::is_always_lock_free || - sizeof(std::atomic) != sizeof(uint64_t) || - (int{reinterpret_cast&>(h->hashed_key[0]) - .load(std::memory_order_relaxed) == hashed_key[0]} & - int{reinterpret_cast&>(h->hashed_key[1]) - .load(std::memory_order_relaxed) == hashed_key[1]})) { + // ReadAllowRace suppresses TSAN report on these reads. Also, using + // & rather than && to give more flexibility to the compiler and CPU, + // as it is safe to access [1] even if [0] doesn't match. + if ((int{ReadAllowRace(h->hashed_key[0]) == hashed_key[0]} & + int{ReadAllowRace(h->hashed_key[1]) == hashed_key[1]})) { // Increment acquire counter for definitive check uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, std::memory_order_acquire); diff --git a/port/lang.h b/port/lang.h index a4201ca3b..e0cb8da5d 100644 --- a/port/lang.h +++ b/port/lang.h @@ -69,6 +69,13 @@ constexpr bool kMustFreeHeapAllocations = false; #define TSAN_SUPPRESSION #endif // TSAN_SUPPRESSION +// Read memory while allowing data races. Only use where it is OK to read +// the wrong value, e.g. where reading the latest value improves performance. +template +TSAN_SUPPRESSION inline T ReadAllowRace(const T& v) { + return v; +} + // Compile-time CPU feature testing compatibility // // A way to be extra sure these defines have been included. From 392d6957cd2069a2874a2f049347e2a0de752ec4 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 5 Sep 2023 10:41:29 -0700 Subject: [PATCH 088/386] Added compaction read errors to `db_stress` (#11789) Summary: - Fixed misspellings of "inject" - Made user read errors retryable when `FLAGS_inject_error_severity == 1` - Added compaction read errors when `FLAGS_read_fault_one_in > 0`. These are always retryable so that the DB will keep accepting writes - Reenabled setting `compaction_readahead_size` in crash test. The reason for disabling it was to "keep the test clean", which is not a good enough reason to skip testing it Pull Request resolved: https://github.com/facebook/rocksdb/pull/11789 Test Plan: With https://github.com/facebook/rocksdb/issues/11782 reverted, reproduced the bug: - Build: `make -j56 db_stress` - Command: `TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --simple --write_buffer_size=524288 --target_file_size_base=524288 --max_bytes_for_level_base=2097152 --interval=10 --max_key=1000000` - Output: ``` stderr has error message: ***put or merge error: Corruption: Compaction number of input keys does not match number of keys processed.*** ``` Reviewed By: cbi42 Differential Revision: D48939994 Pulled By: ajkr fbshipit-source-id: a1efb799efecdfd5d9cfd185e4a6321db8fccfbb --- db_stress_tool/db_stress_gflags.cc | 4 +-- db_stress_tool/db_stress_listener.h | 15 +++++++++ db_stress_tool/db_stress_shared_state.h | 2 +- db_stress_tool/db_stress_test_base.cc | 43 +++++++++++++------------ db_stress_tool/no_batched_ops_stress.cc | 8 ++--- tools/db_crashtest.py | 5 ++- utilities/fault_injection_fs.cc | 10 ++++-- utilities/fault_injection_fs.h | 8 +++-- 8 files changed, 59 insertions(+), 36 deletions(-) diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 4c3a2b84f..776ef357a 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1027,8 +1027,8 @@ DEFINE_int32(open_write_fault_one_in, 0, DEFINE_int32(open_read_fault_one_in, 0, "On non-zero, enables fault injection on file reads " "during DB reopen."); -DEFINE_int32(injest_error_severity, 1, - "The severity of the injested IO Error. 1 is soft error (e.g. " +DEFINE_int32(inject_error_severity, 1, + "The severity of the injected IO Error. 1 is soft error (e.g. " "retryable error), 2 is fatal error, and the default is " "retryable error."); DEFINE_int32(prepopulate_block_cache, diff --git a/db_stress_tool/db_stress_listener.h b/db_stress_tool/db_stress_listener.h index 97bbdaefa..998a6cc74 100644 --- a/db_stress_tool/db_stress_listener.h +++ b/db_stress_tool/db_stress_listener.h @@ -9,6 +9,7 @@ #include #include +#include "db_stress_tool/db_stress_shared_state.h" #include "file/filename.h" #include "file/writable_file_writer.h" #include "rocksdb/db.h" @@ -19,9 +20,12 @@ #include "rocksdb/unique_id.h" #include "util/gflags_compat.h" #include "util/random.h" +#include "utilities/fault_injection_fs.h" DECLARE_int32(compact_files_one_in); +extern std::shared_ptr fault_fs_guard; + namespace ROCKSDB_NAMESPACE { // Verify across process executions that all seen IDs are unique @@ -95,6 +99,17 @@ class DbStressListener : public EventListener { RandomSleep(); } + void OnSubcompactionBegin(const SubcompactionJobInfo& /* si */) override { + if (FLAGS_read_fault_one_in) { + // Hardcoded to inject retryable error as a non-retryable error would put + // the DB in read-only mode and then it would crash on the next write. + fault_fs_guard->SetThreadLocalReadErrorContext( + static_cast(FLAGS_seed), FLAGS_read_fault_one_in, + true /* retryable */); + fault_fs_guard->EnableErrorInjection(); + } + } + void OnTableFileCreationStarted( const TableFileCreationBriefInfo& /*info*/) override { ++num_pending_file_creations_; diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h index 604e8c631..bad6a77e1 100644 --- a/db_stress_tool/db_stress_shared_state.h +++ b/db_stress_tool/db_stress_shared_state.h @@ -35,7 +35,7 @@ DECLARE_int32(open_metadata_write_fault_one_in); DECLARE_int32(open_write_fault_one_in); DECLARE_int32(open_read_fault_one_in); -DECLARE_int32(injest_error_severity); +DECLARE_int32(inject_error_severity); namespace ROCKSDB_NAMESPACE { class StressTest; diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index aff559a63..0195971c0 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -773,17 +773,18 @@ void StressTest::OperateDb(ThreadState* thread) { #ifndef NDEBUG if (FLAGS_read_fault_one_in) { - fault_fs_guard->SetThreadLocalReadErrorContext(thread->shared->GetSeed(), - FLAGS_read_fault_one_in); + fault_fs_guard->SetThreadLocalReadErrorContext( + thread->shared->GetSeed(), FLAGS_read_fault_one_in, + FLAGS_inject_error_severity == 1 /* retryable */); } #endif // NDEBUG if (FLAGS_write_fault_one_in) { IOStatus error_msg; - if (FLAGS_injest_error_severity <= 1 || FLAGS_injest_error_severity > 2) { + if (FLAGS_inject_error_severity <= 1 || FLAGS_inject_error_severity > 2) { error_msg = IOStatus::IOError("Retryable IO Error"); error_msg.SetRetryable(true); - } else if (FLAGS_injest_error_severity == 2) { - // Ingest the fatal error + } else if (FLAGS_inject_error_severity == 2) { + // Inject a fatal error error_msg = IOStatus::IOError("Fatal IO Error"); error_msg.SetDataLoss(true); } @@ -2684,14 +2685,14 @@ void StressTest::Open(SharedState* shared, bool reopen) { RegisterAdditionalListeners(); if (!FLAGS_use_txn) { - // Determine whether we need to ingest file metadata write failures + // Determine whether we need to inject file metadata write failures // during DB reopen. If it does, enable it. - // Only ingest metadata error if it is reopening, as initial open + // Only inject metadata error if it is reopening, as initial open // failure doesn't need to be handled. // TODO cover transaction DB is not covered in this fault test too. - bool ingest_meta_error = false; - bool ingest_write_error = false; - bool ingest_read_error = false; + bool inject_meta_error = false; + bool inject_write_error = false; + bool inject_read_error = false; if ((FLAGS_open_metadata_write_fault_one_in || FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) && fault_fs_guard @@ -2704,15 +2705,15 @@ void StressTest::Open(SharedState* shared, bool reopen) { // solve it, skip WAL from failure injection. fault_fs_guard->SetSkipDirectWritableTypes({kWalFile}); } - ingest_meta_error = FLAGS_open_metadata_write_fault_one_in; - ingest_write_error = FLAGS_open_write_fault_one_in; - ingest_read_error = FLAGS_open_read_fault_one_in; - if (ingest_meta_error) { + inject_meta_error = FLAGS_open_metadata_write_fault_one_in; + inject_write_error = FLAGS_open_write_fault_one_in; + inject_read_error = FLAGS_open_read_fault_one_in; + if (inject_meta_error) { fault_fs_guard->EnableMetadataWriteErrorInjection(); fault_fs_guard->SetRandomMetadataWriteError( FLAGS_open_metadata_write_fault_one_in); } - if (ingest_write_error) { + if (inject_write_error) { fault_fs_guard->SetFilesystemDirectWritable(false); fault_fs_guard->EnableWriteErrorInjection(); fault_fs_guard->SetRandomWriteError( @@ -2720,7 +2721,7 @@ void StressTest::Open(SharedState* shared, bool reopen) { IOStatus::IOError("Injected Open Error"), /*inject_for_all_file_types=*/true, /*types=*/{}); } - if (ingest_read_error) { + if (inject_read_error) { fault_fs_guard->SetRandomReadError(FLAGS_open_read_fault_one_in); } } @@ -2752,14 +2753,14 @@ void StressTest::Open(SharedState* shared, bool reopen) { } } - if (ingest_meta_error || ingest_write_error || ingest_read_error) { + if (inject_meta_error || inject_write_error || inject_read_error) { fault_fs_guard->SetFilesystemDirectWritable(true); fault_fs_guard->DisableMetadataWriteErrorInjection(); fault_fs_guard->DisableWriteErrorInjection(); fault_fs_guard->SetSkipDirectWritableTypes({}); fault_fs_guard->SetRandomReadError(0); if (s.ok()) { - // Ingested errors might happen in background compactions. We + // Injected errors might happen in background compactions. We // wait for all compactions to finish to make sure DB is in // clean state before executing queries. s = db_->GetRootDB()->WaitForCompact(WaitForCompactOptions()); @@ -2776,9 +2777,9 @@ void StressTest::Open(SharedState* shared, bool reopen) { // After failure to opening a DB due to IO error, retry should // successfully open the DB with correct data if no IO error shows // up. - ingest_meta_error = false; - ingest_write_error = false; - ingest_read_error = false; + inject_meta_error = false; + inject_write_error = false; + inject_read_error = false; // TODO: Unsynced data loss during DB reopen is not supported yet in // stress test. Will need to recreate expected state if we decide diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index f822a7663..1d2e4a3b5 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1312,7 +1312,7 @@ class NonBatchedOpsStressTest : public StressTest { pending_expected_value.Commit(); if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { + if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { is_db_stopped_ = true; } else if (!is_db_stopped_ || @@ -1371,7 +1371,7 @@ class NonBatchedOpsStressTest : public StressTest { thread->stats.AddDeletes(1); if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { + if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { is_db_stopped_ = true; @@ -1402,7 +1402,7 @@ class NonBatchedOpsStressTest : public StressTest { pending_expected_value.Commit(); thread->stats.AddSingleDeletes(1); if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { + if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { is_db_stopped_ = true; @@ -1464,7 +1464,7 @@ class NonBatchedOpsStressTest : public StressTest { s = db_->DeleteRange(write_opts, cfh, key, end_key); } if (!s.ok()) { - if (FLAGS_injest_error_severity >= 2) { + if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { is_db_stopped_ = true; } else if (!is_db_stopped_ || diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 72ee14865..8f3219ee6 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -158,9 +158,8 @@ "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]), "bytes_per_sync": lambda: random.choice([0, 262144]), "wal_bytes_per_sync": lambda: random.choice([0, 524288]), - # Disable compaction_readahead_size because the test is not passing. - # "compaction_readahead_size" : lambda : random.choice( - # [0, 0, 1024 * 1024]), + "compaction_readahead_size" : lambda : random.choice( + [0, 0, 1024 * 1024]), "db_write_buffer_size": lambda: random.choice( [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024] ), diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index fa15fc4a5..8db8be45f 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -956,6 +956,7 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( return IOStatus::OK(); } + IOStatus ret; if (ctx->rand.OneIn(ctx->one_in)) { if (ctx->count == 0) { ctx->message = ""; @@ -972,7 +973,7 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( // Likely non-per read status code for MultiRead ctx->message += "error; "; ret_fault_injected = true; - return IOStatus::IOError(); + ret = IOStatus::IOError(); } else if (Random::GetTLSInstance()->OneIn(8)) { assert(result); // For a small chance, set the failure to status but turn the @@ -1000,10 +1001,13 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( } else { ctx->message += "error result multiget single; "; ret_fault_injected = true; - return IOStatus::IOError(); + ret = IOStatus::IOError(); } } - return IOStatus::OK(); + if (ctx->retryable) { + ret.SetRetryable(true); + } + return ret; } bool FaultInjectionTestFS::TryParseFileName(const std::string& file_name, diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index cab0051bd..a481d86af 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -402,7 +402,8 @@ class FaultInjectionTestFS : public FileSystemWrapper { // seed is the seed for the random number generator, and one_in determines // the probability of injecting error (i.e an error is injected with // 1/one_in probability) - void SetThreadLocalReadErrorContext(uint32_t seed, int one_in) { + void SetThreadLocalReadErrorContext(uint32_t seed, int one_in, + bool retryable) { struct ErrorContext* ctx = static_cast(thread_local_error_->Get()); if (ctx == nullptr) { @@ -411,6 +412,7 @@ class FaultInjectionTestFS : public FileSystemWrapper { } ctx->one_in = one_in; ctx->count = 0; + ctx->retryable = retryable; } static void DeleteThreadLocalErrorContext(void* p) { @@ -556,12 +558,14 @@ class FaultInjectionTestFS : public FileSystemWrapper { std::string message; int frames; ErrorType type; + bool retryable; explicit ErrorContext(uint32_t seed) : rand(seed), enable_error_injection(false), callstack(nullptr), - frames(0) {} + frames(0), + retryable(false) {} ~ErrorContext() { if (callstack) { free(callstack); From 6a98471ae57a52784cf63c6a8a1d58edb340ec74 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 5 Sep 2023 14:44:17 -0700 Subject: [PATCH 089/386] Fix bad include (#11797) Summary: There was a `#include "port/lang.h"` situated inside an `extern "C" {` which just started causing the header to be unusuable in some contexts. This was a regression on the CircleCI job build-linux-unity-and-headers in https://github.com/facebook/rocksdb/issues/11792 The include, and another like it, now appears obsolete so removed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11797 Test Plan: local `make check-headers` and `make`, CI Reviewed By: jaykorean Differential Revision: D48976826 Pulled By: pdillinger fbshipit-source-id: 131d66969e045f2ded0f8936924ee30c9ef2655a --- util/xxhash.h | 3 --- util/xxph3.h | 4 ---- 2 files changed, 7 deletions(-) diff --git a/util/xxhash.h b/util/xxhash.h index ad49bab81..2b9c22883 100644 --- a/util/xxhash.h +++ b/util/xxhash.h @@ -11,9 +11,6 @@ #ifndef XXH_NAMESPACE #define XXH_NAMESPACE ROCKSDB_ #endif // !defined(XXH_NAMESPACE) - -// for FALLTHROUGH_INTENDED, inserted as appropriate -#include "port/lang.h" /* END RocksDB customizations */ // clang-format off diff --git a/util/xxph3.h b/util/xxph3.h index 968000c3a..2933b74db 100644 --- a/util/xxph3.h +++ b/util/xxph3.h @@ -386,10 +386,6 @@ typedef struct { #define XXPH_STATIC_LINKING_ONLY #endif -/* BEGIN RocksDB customizations */ -#include "port/lang.h" /* for FALLTHROUGH_INTENDED, inserted as appropriate */ -/* END RocksDB customizations */ - /* ************************************* * Compiler Specific Options ***************************************/ From 3f54b9696c964393b310c5c239420d2397a13430 Mon Sep 17 00:00:00 2001 From: git-hulk Date: Wed, 6 Sep 2023 10:07:21 -0700 Subject: [PATCH 090/386] Export GetCompactionReasonString/GetFlushReasonString by moving them into listener.h (#11778) Summary: Currently, rocksdb users would use the event listener to catch the compaction/flush event and log them if any. But now the reason is an integer type instead of a human-readable string, so we would like to convert them into a human-readable string. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11778 Reviewed By: jaykorean Differential Revision: D49012934 Pulled By: ajkr fbshipit-source-id: a4935b95d70c1be02aec65da7bf1c98a8cf8b933 --- include/rocksdb/listener.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/rocksdb/listener.h b/include/rocksdb/listener.h index 87bc67869..27f7d8a17 100644 --- a/include/rocksdb/listener.h +++ b/include/rocksdb/listener.h @@ -161,6 +161,8 @@ enum class CompactionReason : int { kNumOfReasons, }; +const char* GetCompactionReasonString(CompactionReason compaction_reason); + enum class FlushReason : int { kOthers = 0x00, kGetLiveFiles = 0x01, @@ -180,6 +182,8 @@ enum class FlushReason : int { kWalFull = 0xd, }; +const char* GetFlushReasonString(FlushReason flush_reason); + // TODO: In the future, BackgroundErrorReason will only be used to indicate // why the BG Error is happening (e.g., flush, compaction). We may introduce // other data structure to indicate other essential information such as From 458acf816983a10919f43f197ef1a843123bf44f Mon Sep 17 00:00:00 2001 From: Changyu Bi <102700264+cbi42@users.noreply.github.com> Date: Wed, 6 Sep 2023 10:23:41 -0700 Subject: [PATCH 091/386] Add some unit tests when file read returns error during compaction/scanning (#11788) Summary: Some repro unit tests for the bug fixed in https://github.com/facebook/rocksdb/pull/11782. Ran on main without https://github.com/facebook/rocksdb/pull/11782: ``` ./db_compaction_test --gtest_filter='*ErrorWhenReadFileHead' Note: Google Test filter = *ErrorWhenReadFileHead [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from DBCompactionTest [ RUN ] DBCompactionTest.ErrorWhenReadFileHead db/db_compaction_test.cc:10105: Failure Value of: s.IsIOError() Actual: false Expected: true [ FAILED ] DBCompactionTest.ErrorWhenReadFileHead (3960 ms) ./db_iterator_test --gtest_filter="*ErrorWhenReadFile*" Note: Google Test filter = *ErrorWhenReadFile* [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from DBIteratorTest [ RUN ] DBIteratorTest.ErrorWhenReadFile db/db_iterator_test.cc:3399: Failure Value of: (iter->status()).ok() Actual: true Expected: false [ FAILED ] DBIteratorTest.ErrorWhenReadFile (280 ms) [----------] 1 test from DBIteratorTest (280 ms total) ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11788 Reviewed By: ajkr Differential Revision: D48940284 Pulled By: cbi42 fbshipit-source-id: 06f3c5963f576db3f85d305ffb2745ee13d209bb --- db/db_compaction_test.cc | 77 ++++++++++++++ db/db_iterator_test.cc | 170 ++++++++++++++++++++++++++++++ db/error_handler.cc | 2 + file/random_access_file_reader.cc | 8 ++ 4 files changed, 257 insertions(+) diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 8713b9b80..bc7e2f552 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -10037,6 +10037,83 @@ TEST_F(DBCompactionTest, VerifyRecordCount) { "processed."; ASSERT_TRUE(std::strstr(s.getState(), expect)); } + +TEST_F(DBCompactionTest, ErrorWhenReadFileHead) { + // This is to test a bug that is fixed in + // https://github.com/facebook/rocksdb/pull/11782. + // + // Ingest error when reading from a file with offset = 0, + // See if compaction handles it correctly. + Options opts = CurrentOptions(); + opts.num_levels = 7; + opts.compression = kNoCompression; + DestroyAndReopen(opts); + + // Set up LSM + // L5: F1 [key0, key99], F2 [key100, key199] + // L6: F3 [key50, key149] + Random rnd(301); + const int kValLen = 100; + for (int error_file = 1; error_file <= 3; ++error_file) { + for (int i = 50; i < 150; ++i) { + ASSERT_OK(Put(Key(i), rnd.RandomString(kValLen))); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + + std::vector values; + for (int i = 0; i < 100; ++i) { + values.emplace_back(rnd.RandomString(kValLen)); + ASSERT_OK(Put(Key(i), values.back())); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + + for (int i = 100; i < 200; ++i) { + values.emplace_back(rnd.RandomString(kValLen)); + ASSERT_OK(Put(Key(i), values.back())); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + + ASSERT_EQ(2, NumTableFilesAtLevel(5)); + ASSERT_EQ(1, NumTableFilesAtLevel(6)); + + std::atomic_int count = 0; + SyncPoint::GetInstance()->SetCallBack( + "RandomAccessFileReader::Read::BeforeReturn", + [&count, &error_file](void* pair_ptr) { + auto p = + reinterpret_cast*>(pair_ptr); + int cur = ++count; + if (cur == error_file) { + IOStatus* io_s = p->second; + *io_s = IOStatus::IOError(); + io_s->SetRetryable(true); + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + + Status s = db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + // Failed compaction should not lose data. + PinnableSlice slice; + for (int i = 0; i < 200; ++i) { + ASSERT_OK(Get(Key(i), &slice)); + ASSERT_EQ(slice, values[i]); + } + ASSERT_NOK(s); + ASSERT_TRUE(s.IsIOError()); + s = db_->CompactRange(CompactRangeOptions(), nullptr, nullptr); + ASSERT_OK(s); + for (int i = 0; i < 200; ++i) { + ASSERT_OK(Get(Key(i), &slice)); + ASSERT_EQ(slice, values[i]); + } + SyncPoint::GetInstance()->DisableProcessing(); + DestroyAndReopen(opts); + } +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index 862377b6d..7cafadf0e 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -3295,6 +3295,176 @@ TEST_F(DBIteratorTest, IteratorRefreshReturnSV) { Close(); } +TEST_F(DBIteratorTest, ErrorWhenReadFile) { + // This is to test a bug that is fixed in + // https://github.com/facebook/rocksdb/pull/11782. + // + // Ingest error when reading from a file, and + // see if Iterator handles it correctly. + Options opts = CurrentOptions(); + opts.num_levels = 7; + opts.compression = kNoCompression; + BlockBasedTableOptions bbto; + // Always do I/O + bbto.no_block_cache = true; + opts.table_factory.reset(NewBlockBasedTableFactory(bbto)); + DestroyAndReopen(opts); + + // Set up LSM + // L5: F1 [key0, key99], F2 [key100, key199] + // L6: F3 [key50, key149] + Random rnd(301); + const int kValLen = 100; + for (int i = 50; i < 150; ++i) { + ASSERT_OK(Put(Key(i), rnd.RandomString(kValLen))); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + + std::vector values; + for (int i = 0; i < 100; ++i) { + values.emplace_back(rnd.RandomString(kValLen)); + ASSERT_OK(Put(Key(i), values.back())); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + + for (int i = 100; i < 200; ++i) { + values.emplace_back(rnd.RandomString(kValLen)); + ASSERT_OK(Put(Key(i), values.back())); + } + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + + ASSERT_EQ(2, NumTableFilesAtLevel(5)); + ASSERT_EQ(1, NumTableFilesAtLevel(6)); + + std::vector files; + db_->GetLiveFilesMetaData(&files); + // Get file names for F1, F2 and F3. + // These are file names, not full paths. + std::string f1, f2, f3; + for (auto& file_meta : files) { + if (file_meta.level == 6) { + f3 = file_meta.name; + } else { + if (file_meta.smallestkey == Key(0)) { + f1 = file_meta.name; + } else { + f2 = file_meta.name; + } + } + } + ASSERT_TRUE(!f1.empty()); + ASSERT_TRUE(!f2.empty()); + ASSERT_TRUE(!f3.empty()); + + std::string error_file; + SyncPoint::GetInstance()->SetCallBack( + "RandomAccessFileReader::Read::BeforeReturn", + [&error_file](void* io_s_ptr) { + auto p = + reinterpret_cast*>(io_s_ptr); + if (p->first->find(error_file) != std::string::npos) { + *p->second = IOStatus::IOError(); + p->second->SetRetryable(true); + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + // Error reading F1 + error_file = f1; + std::unique_ptr iter{db_->NewIterator(ReadOptions())}; + iter->SeekToFirst(); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + // This does not require reading the first block. + iter->Seek(Key(90)); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[90]); + // iter has ok status before this Seek. + iter->Seek(Key(1)); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + + // Error reading F2 + error_file = f2; + iter.reset(db_->NewIterator(ReadOptions())); + iter->Seek(Key(99)); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[99]); + // Need to read from F2. + iter->Next(); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + iter->Seek(Key(190)); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[190]); + // Seek for first key of F2. + iter->Seek(Key(100)); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + iter->SeekToLast(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[199]); + // SeekForPrev for first key of F2. + iter->SeekForPrev(Key(100)); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + // Does not read first block (offset 0). + iter->SeekForPrev(Key(98)); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[98]); + + // Error reading F3 + error_file = f3; + iter.reset(db_->NewIterator(ReadOptions())); + iter->SeekToFirst(); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + iter->Seek(Key(50)); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + iter->SeekForPrev(Key(50)); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + // Does not read file 3 + iter->Seek(Key(150)); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[150]); + + // Test when file read error occurs during Prev(). + // This requires returning an error when reading near the end of a file + // instead of offset 0. + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->SetCallBack( + "RandomAccessFileReader::Read::AnyOffset", [&f1](void* pair_ptr) { + auto p = + reinterpret_cast*>(pair_ptr); + if (p->first->find(f1) != std::string::npos) { + *p->second = IOStatus::IOError(); + p->second->SetRetryable(true); + } + }); + iter->SeekForPrev(Key(101)); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->value(), values[101]); + // DBIter will not stop at Key(100) since it needs + // to make sure the key it returns has the max sequence number for Key(100). + // So it will call MergingIterator::Prev() which will read F1. + iter->Prev(); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + SyncPoint::GetInstance()->DisableProcessing(); + iter->Reset(); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/error_handler.cc b/db/error_handler.cc index 55821952d..55451e42c 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -472,6 +472,8 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, ROCKS_LOG_INFO( db_options_.info_log, "ErrorHandler: Compaction will schedule by itself to resume\n"); + // Not used in this code path. + new_bg_io_err.PermitUncheckedError(); return bg_error_; } else if (BackgroundErrorReason::kFlushNoWAL == reason || BackgroundErrorReason::kManifestWriteNoWAL == reason) { diff --git a/file/random_access_file_reader.cc b/file/random_access_file_reader.cc index 8b22d617b..2025ce65b 100644 --- a/file/random_access_file_reader.cc +++ b/file/random_access_file_reader.cc @@ -271,6 +271,14 @@ IOStatus RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset, file_read_hist_->Add(elapsed); } +#ifndef NDEBUG + auto pair = std::make_pair(&file_name_, &io_s); + if (offset == 0) { + TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::Read::BeforeReturn", + &pair); + } + TEST_SYNC_POINT_CALLBACK("RandomAccessFileReader::Read::AnyOffset", &pair); +#endif return io_s; } From 137cd4bb750bec6071254f3b47e89db4d2b58593 Mon Sep 17 00:00:00 2001 From: anand76 Date: Wed, 6 Sep 2023 12:57:10 -0700 Subject: [PATCH 092/386] Disable error injection after compaction completion (#11798) Summary: https://github.com/facebook/rocksdb/issues/11789 added error injection during compaction to db_stress. However, error injection was not disabled after compaction completion, which resulted in some test failures due to stale errors. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11798 Reviewed By: cbi42 Differential Revision: D49022821 Pulled By: anand1976 fbshipit-source-id: 3cbfe18d55bee393697e063d05e7a7a7f88b7635 --- db_stress_tool/db_stress_listener.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/db_stress_tool/db_stress_listener.h b/db_stress_tool/db_stress_listener.h index 998a6cc74..aba95d4c0 100644 --- a/db_stress_tool/db_stress_listener.h +++ b/db_stress_tool/db_stress_listener.h @@ -110,6 +110,13 @@ class DbStressListener : public EventListener { } } + void OnSubcompactionCompleted(const SubcompactionJobInfo& /* si */) override { + if (FLAGS_read_fault_one_in) { + (void)fault_fs_guard->GetAndResetErrorCount(); + fault_fs_guard->DisableErrorInjection(); + } + } + void OnTableFileCreationStarted( const TableFileCreationBriefInfo& /*info*/) override { ++num_pending_file_creations_; From 195f35c08be7e376891ef13e2c6ea2a2c823de3a Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 6 Sep 2023 15:22:39 -0700 Subject: [PATCH 093/386] Add a unit test for the fix in #11786 (#11790) Summary: Tests a scenario where range tombstone reseek used to cause MergingIterator to discard non-ok status. Ran on main without https://github.com/facebook/rocksdb/issues/11786: ``` ./db_range_del_test --gtest_filter="*RangeDelReseekAfterFileReadError*" Note: Google Test filter = *RangeDelReseekAfterFileReadError* [==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from DBRangeDelTest [ RUN ] DBRangeDelTest.RangeDelReseekAfterFileReadError db/db_range_del_test.cc:3577: Failure Value of: iter->Valid() Actual: true Expected: false [ FAILED ] DBRangeDelTest.RangeDelReseekAfterFileReadError (64 ms) ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11790 Reviewed By: ajkr Differential Revision: D48972869 Pulled By: cbi42 fbshipit-source-id: b1a71867533b0fb60af86f8ce8a9e391ba84dd57 --- db/db_range_del_test.cc | 132 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index bb75592c7..a19912aa6 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -3511,6 +3511,138 @@ TEST_F(DBRangeDelTest, MemtableMaxRangeDeletions) { ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable()); ASSERT_EQ(3, NumTableFilesAtLevel(0)); } + +TEST_F(DBRangeDelTest, RangeDelReseekAfterFileReadError) { + // This is to test a bug that is fixed in + // https://github.com/facebook/rocksdb/pull/11786. + Options opts = CurrentOptions(); + opts.num_levels = 7; + + // Set up LSM + // + // L4: F1: [key1] F2: [key2] + // L5: F3:[DeleteRange(key3, key6)] + // L6: F4:[key3, key6] + // Will inject error when reading from F2. + // SeekToFirst() should land on key1. + // Next() should encounter error when reading from F2, + // and range del reseek should not reset this status. + Random rnd(301); + // L6 + ASSERT_OK(Put(Key(3), rnd.RandomString(100))); + ASSERT_OK(Put(Key(6), rnd.RandomString(100))); + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + // L5 + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), + Key(6))); + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + // L4 + ASSERT_OK(Put(Key(2), rnd.RandomString(100))); + ASSERT_OK(Flush()); + MoveFilesToLevel(4); + std::string fname; + std::vector live_files; + db_->GetLiveFilesMetaData(&live_files); + for (auto& meta : live_files) { + if (meta.level == 4) { + fname = meta.name; + break; + } + } + ASSERT_TRUE(!fname.empty()); + ASSERT_OK(Put(Key(1), rnd.RandomString(100))); + ASSERT_OK(Flush()); + MoveFilesToLevel(4); + + SyncPoint::GetInstance()->SetCallBack( + "RandomAccessFileReader::Read::BeforeReturn", [&fname](void* pair_ptr) { + auto p = + reinterpret_cast*>(pair_ptr); + if (p->first->find(fname) != std::string::npos) { + *p->second = IOStatus::IOError(); + p->second->SetRetryable(true); + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + std::unique_ptr iter{db_->NewIterator(ReadOptions())}; + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), Key(1)); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + iter.reset(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + + // Reverse scan + // LSM setup + // L4: F1: [key2] F2: [key7, key8] + // L5: F3:[[key3, key6)] + // L6: F4:[key1, key5] + // Ingest error when read from F1. + // SeekToLast() should land on key8. + // During Prev(), MergingIterator will encounter error when reading from F1 + // and do a range del reseek (it sees key5 covered by a range tombstone). + DestroyAndReopen(opts); + // L6 + ASSERT_OK(Put(Key(1), rnd.RandomString(100))); + ASSERT_OK(Put(Key(5), rnd.RandomString(100))); + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + // L5 + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), + Key(6))); + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + // L4 + ASSERT_OK(Put(Key(2), rnd.RandomString(100))); + ASSERT_OK(Flush()); + MoveFilesToLevel(4); + live_files.clear(); + db_->GetLiveFilesMetaData(&live_files); + for (auto& meta : live_files) { + if (meta.level == 4) { + fname = meta.name; + break; + } + } + ASSERT_TRUE(!fname.empty()); + ASSERT_OK(Put(Key(7), rnd.RandomString(100))); + ASSERT_OK(Put(Key(8), rnd.RandomString(100))); + ASSERT_OK(Flush()); + MoveFilesToLevel(4); + + SyncPoint::GetInstance()->SetCallBack( + "RandomAccessFileReader::Read::AnyOffset", [&fname](void* pair_ptr) { + auto p = + reinterpret_cast*>(pair_ptr); + if (p->first->find(fname) != std::string::npos) { + *p->second = IOStatus::IOError(); + p->second->SetRetryable(true); + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + iter.reset(db_->NewIterator(ReadOptions())); + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), Key(8)); + // Note that for reverse scan, DBIter will need to ensure + // the key it returns is the one with the highest sequence number. + // To return key7, it internally calls MergingIterator::Prev() + // until it reaches a previous user key. + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + ASSERT_NOK(iter->status()); + ASSERT_TRUE(iter->status().IsIOError()); + + iter.reset(); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 05183bedcc5cad87add8dfb4df122c6fc1d94410 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Thu, 7 Sep 2023 14:57:39 -0700 Subject: [PATCH 094/386] Add .arcconfig to .gitignore (fb internal use) (#11803) Summary: This is for fb internal use. Please see the comment in internal Phabricator for details. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11803 Reviewed By: hx235 Differential Revision: D49065093 Pulled By: jaykorean fbshipit-source-id: acd71d7c1163f3c95c59c427caf944dacfe58ef6 --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8dd7e8296..8bd9fea59 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,7 @@ fbcode/ fbcode buckifier/*.pyc buckifier/__pycache__ +.arcconfig compile_commands.json clang-format-diff.py From e67ee46642cbb29ec39d9365225cd4dec15ccff7 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 8 Sep 2023 10:50:47 -0700 Subject: [PATCH 095/386] Suppress TSAN reports on AutoHyperClockTable::Lookup (#11806) Summary: This function uses racing reads for heuristic performance improvement. My change in https://github.com/facebook/rocksdb/issues/11792 only worked for clang, not gcc, and gcc does not accurately handle TSAN suppressions. I would have to mark much more code as suppressed than I want to. So I've taken a different approach: TSAN build does not use the racing reads but substitutes random results, as an extra test that a "correct" value is not needed for correct overall behavior. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11806 Test Plan: manual TSAN builds & tests with cache_bench Reviewed By: ajkr Differential Revision: D49100115 Pulled By: pdillinger fbshipit-source-id: d6d0dfb796d710b953212dd3fc171b6e88fadea1 --- cache/clock_cache.cc | 16 +++++++++------- port/lang.h | 7 ------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index caa7db2f4..c7a8cf393 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -3089,13 +3089,15 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( HandleImpl* h = &arr[GetNextFromNextWithShift(next_with_shift)]; // Attempt cheap key match without acquiring a read ref. This could give a // false positive, which is re-checked after acquiring read ref, or false - // negative, which is re-checked in the full Lookup. - - // ReadAllowRace suppresses TSAN report on these reads. Also, using - // & rather than && to give more flexibility to the compiler and CPU, - // as it is safe to access [1] even if [0] doesn't match. - if ((int{ReadAllowRace(h->hashed_key[0]) == hashed_key[0]} & - int{ReadAllowRace(h->hashed_key[1]) == hashed_key[1]})) { + // negative, which is re-checked in the full Lookup. Also, this is a + // technical UB data race according to TSAN, but we don't need to read + // a "correct" value here for correct overall behavior. +#ifdef __SANITIZE_THREAD__ + bool probably_equal = Random::GetTLSInstance()->OneIn(2); +#else + bool probably_equal = h->hashed_key == hashed_key; +#endif + if (probably_equal) { // Increment acquire counter for definitive check uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, std::memory_order_acquire); diff --git a/port/lang.h b/port/lang.h index e0cb8da5d..a4201ca3b 100644 --- a/port/lang.h +++ b/port/lang.h @@ -69,13 +69,6 @@ constexpr bool kMustFreeHeapAllocations = false; #define TSAN_SUPPRESSION #endif // TSAN_SUPPRESSION -// Read memory while allowing data races. Only use where it is OK to read -// the wrong value, e.g. where reading the latest value improves performance. -template -TSAN_SUPPRESSION inline T ReadAllowRace(const T& v) { - return v; -} - // Compile-time CPU feature testing compatibility // // A way to be extra sure these defines have been included. From ed5b6c0d99f7cba6ac4c49c2ace8ea094f3884ba Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Mon, 11 Sep 2023 11:41:44 -0700 Subject: [PATCH 096/386] Avoid alignment in FilePrefetchBuffer during seek with async_io (#11793) Summary: During Seek, the iterator seeks every file on L0. In async_io, it submit the requests to seek on every file on L0 asynchronously using RocksDB FilePrefetchBuffer. However, FilePrefetchBuffer does alignment and reads extra bytes then needed that can increase the throughput. In case of non direct io, the alignment can be avoided. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11793 Test Plan: - Added a unit test that fails without this PR. - make crash_test -j32 completed successfully Reviewed By: anand1976 Differential Revision: D48985051 Pulled By: akankshamahajan15 fbshipit-source-id: 2d130a9e7c3df9c4fcd0408406e6277ab75a4389 --- file/file_prefetch_buffer.cc | 20 ++++-- file/file_prefetch_buffer.h | 3 +- file/prefetch_test.cc | 83 ++++++++++++++++++++++++ unreleased_history/bug_fixes/opt_seek.md | 1 + 4 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 unreleased_history/bug_fixes/opt_seek.md diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 5c9c2797c..0ea246f12 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -896,18 +896,24 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, // - prefetch_size on second. // Calculate length and offsets for reading. if (!DoesBufferContainData(curr_)) { + uint64_t roundup_len1; // Prefetch full data + prefetch_size in curr_. - rounddown_start1 = Rounddown(offset_to_read, alignment); - roundup_end1 = Roundup(offset_to_read + n + prefetch_size, alignment); - uint64_t roundup_len1 = roundup_end1 - rounddown_start1; - assert(roundup_len1 >= alignment); - assert(roundup_len1 % alignment == 0); - + if (is_eligible_for_prefetching || reader->use_direct_io()) { + rounddown_start1 = Rounddown(offset_to_read, alignment); + roundup_end1 = Roundup(offset_to_read + n + prefetch_size, alignment); + roundup_len1 = roundup_end1 - rounddown_start1; + assert(roundup_len1 >= alignment); + assert(roundup_len1 % alignment == 0); + } else { + rounddown_start1 = offset_to_read; + roundup_end1 = offset_to_read + n; + roundup_len1 = roundup_end1 - rounddown_start1; + } CalculateOffsetAndLen(alignment, rounddown_start1, roundup_len1, curr_, false, chunk_len1); assert(chunk_len1 == 0); assert(roundup_len1 >= chunk_len1); - read_len1 = static_cast(roundup_len1 - chunk_len1); + read_len1 = static_cast(roundup_len1); bufs_[curr_].offset_ = rounddown_start1; } diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index 8c2e82476..e2eac5e8d 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -428,7 +428,8 @@ class FilePrefetchBuffer { void UpdateReadAheadSizeForUpperBound(uint64_t offset, size_t n) { // Adjust readhahead_size till upper_bound if upper_bound_offset_ is // set. - if (upper_bound_offset_ > 0 && upper_bound_offset_ > offset) { + if (readahead_size_ > 0 && upper_bound_offset_ > 0 && + upper_bound_offset_ > offset) { if (upper_bound_offset_ < offset + n + readahead_size_) { readahead_size_ = (upper_bound_offset_ - offset) - n; RecordTick(stats_, READAHEAD_TRIMMED); diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 782751c82..85a06151a 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -2877,6 +2877,89 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) { fpb.TryReadFromCacheAsync(io_opts, r.get(), 8192, 8192, &result, &s)); } +// Test to ensure when PrefetchAsync is called during seek, it doesn't do any +// alignment or prefetch extra if readahead is not enabled during seek. +TEST_F(FilePrefetchBufferTest, SeekWithoutAlignment) { + std::string fname = "seek-wwithout-alignment"; + Random rand(0); + std::string content = rand.RandomString(32768); + Write(fname, content); + + FileOptions opts; + std::unique_ptr r; + Read(fname, opts, &r); + + size_t alignment = r->file()->GetRequiredBufferAlignment(); + size_t n = alignment / 2; + + int read_async_called = 0; + SyncPoint::GetInstance()->SetCallBack( + "FilePrefetchBuffer::ReadAsync", + [&](void* /*arg*/) { read_async_called++; }); + SyncPoint::GetInstance()->EnableProcessing(); + + // Without readahead enabled, there will be no alignment and offset of buffer + // will be n. + { + FilePrefetchBuffer fpb( + /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, + /*track_min_offset=*/false, /*implicit_auto_readahead=*/true, + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2, + /*upper_bound_offset=*/0, fs()); + + Slice result; + // Simulate a seek of half of alignment bytes at offset n. Due to the + // readahead settings, it won't prefetch extra or do any alignment and + // offset of buffer will be n. + Status s = fpb.PrefetchAsync(IOOptions(), r.get(), n, n, &result); + + // Platforms that don't have IO uring may not support async IO. + if (s.IsNotSupported()) { + return; + } + + ASSERT_TRUE(s.IsTryAgain()); + + IOOptions io_opts; + io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW; + ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), n, n, &result, &s)); + + if (read_async_called) { + ASSERT_EQ(fpb.GetPrefetchOffset(), n); + } + } + + // With readahead enabled, it will do the alignment and prefetch and offset of + // buffer will be 0. + { + read_async_called = false; + FilePrefetchBuffer fpb( + /*readahead_size=*/16384, /*max_readahead_size=*/16384, /*enable=*/true, + /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2, + /*upper_bound_offset=*/0, fs()); + + Slice result; + // Simulate a seek of half of alignment bytes at offset n. + Status s = fpb.PrefetchAsync(IOOptions(), r.get(), n, n, &result); + + // Platforms that don't have IO uring may not support async IO. + if (s.IsNotSupported()) { + return; + } + + ASSERT_TRUE(s.IsTryAgain()); + + IOOptions io_opts; + io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW; + ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), n, n, &result, &s)); + + if (read_async_called) { + ASSERT_EQ(fpb.GetPrefetchOffset(), 0); + } + } +} + TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { std::string fname = "seek-with-block-cache-hit"; Random rand(0); diff --git a/unreleased_history/bug_fixes/opt_seek.md b/unreleased_history/bug_fixes/opt_seek.md new file mode 100644 index 000000000..742c3d60f --- /dev/null +++ b/unreleased_history/bug_fixes/opt_seek.md @@ -0,0 +1 @@ +Add a fix for async_io where during seek, when reading a block for seeking a target key in a file without any readahead, the iterator aligned the read on a page boundary and reading more than necessary. This increased the storage read bandwidth usage. From 760ea373a8bf6ecad250a990779bedcb88f0d3f3 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 11 Sep 2023 12:13:58 -0700 Subject: [PATCH 097/386] Introduce a wide column aware MergeOperator API (#11807) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11807 For now, RocksDB has limited support for using merge with wide columns: when a bunch of merge operands have to be applied to a wide-column base value, RocksDB currently passes only the value of the default column to the application's `MergeOperator`, which means there is no way to update any other columns during a merge. As a first step in making this more general, the patch adds a new API `FullMergeV3` to `MergeOperator`. `FullMergeV3`'s interface enables applications to receive a plain, wide-column, or non-existent base value as merge input, and to produce a new plain value, a new wide-column value, or an existing operand as merge output. Note that there are no limitations on the column names and values if the merge result is a wide-column entity. Also, the interface is general in the sense that it makes it possible e.g. for a merge that takes a plain base value and some deltas to produce a wide-column entity as a result. For backward compatibility, the default implementation of `FullMergeV3` falls back to `FullMergeV2` and implements the current logic where merge operands are applied to the default column of the base entity and any other columns are unchanged. (Note that with `FullMergeV3` in the `MergeOperator` interface, this behavior will become customizable.) This patch just introduces the new API and the default backward compatible implementation. I plan to integrate `FullMergeV3` into the query and compaction logic in subsequent diffs. Reviewed By: jaykorean Differential Revision: D49117253 fbshipit-source-id: 109e016f25cd130fc504790818d927bae7fec6bd --- db/merge_operator.cc | 82 ++++++++++ db/merge_test.cc | 267 +++++++++++++++++++++++++++++++ include/rocksdb/merge_operator.h | 52 ++++++ util/overload.h | 23 +++ 4 files changed, 424 insertions(+) create mode 100644 util/overload.h diff --git a/db/merge_operator.cc b/db/merge_operator.cc index d32585640..e5d80c8cb 100644 --- a/db/merge_operator.cc +++ b/db/merge_operator.cc @@ -9,6 +9,10 @@ #include "rocksdb/merge_operator.h" +#include + +#include "util/overload.h" + namespace ROCKSDB_NAMESPACE { bool MergeOperator::FullMergeV2(const MergeOperationInput& merge_in, @@ -23,6 +27,84 @@ bool MergeOperator::FullMergeV2(const MergeOperationInput& merge_in, &merge_out->new_value, merge_in.logger); } +bool MergeOperator::FullMergeV3(const MergeOperationInputV3& merge_in, + MergeOperationOutputV3* merge_out) const { + assert(merge_out); + + MergeOperationInput in_v2(merge_in.key, nullptr, merge_in.operand_list, + merge_in.logger); + + std::string new_value; + Slice existing_operand(nullptr, 0); + MergeOperationOutput out_v2(new_value, existing_operand); + + return std::visit( + overload{ + [&](const auto& existing) -> bool { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + in_v2.existing_value = &existing; + } + + const bool result = FullMergeV2(in_v2, &out_v2); + if (!result) { + merge_out->op_failure_scope = out_v2.op_failure_scope; + return false; + } + + if (existing_operand.data()) { + merge_out->new_value = existing_operand; + } else { + merge_out->new_value = std::move(new_value); + } + + return true; + }, + [&](const WideColumns& existing_columns) -> bool { + const bool has_default_column = + !existing_columns.empty() && + existing_columns.front().name() == kDefaultWideColumnName; + + Slice value_of_default; + if (has_default_column) { + value_of_default = existing_columns.front().value(); + } + + in_v2.existing_value = &value_of_default; + + const bool result = FullMergeV2(in_v2, &out_v2); + if (!result) { + merge_out->op_failure_scope = out_v2.op_failure_scope; + return false; + } + + merge_out->new_value = MergeOperationOutputV3::NewColumns(); + auto& new_columns = std::get( + merge_out->new_value); + new_columns.reserve(has_default_column + ? existing_columns.size() + : (existing_columns.size() + 1)); + + if (existing_operand.data()) { + new_columns.emplace_back(kDefaultWideColumnName.ToString(), + existing_operand.ToString()); + } else { + new_columns.emplace_back(kDefaultWideColumnName.ToString(), + std::move(new_value)); + } + + for (size_t i = has_default_column ? 1 : 0; + i < existing_columns.size(); ++i) { + new_columns.emplace_back(existing_columns[i].name().ToString(), + existing_columns[i].value().ToString()); + } + + return true; + }}, + merge_in.existing_value); +} + // The default implementation of PartialMergeMulti, which invokes // PartialMerge multiple times internally and merges two operands at // a time. diff --git a/db/merge_test.cc b/db/merge_test.cc index 6d1333e55..93a8535a7 100644 --- a/db/merge_test.cc +++ b/db/merge_test.cc @@ -18,6 +18,7 @@ #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" #include "rocksdb/utilities/db_ttl.h" +#include "rocksdb/wide_columns.h" #include "test_util/testharness.h" #include "util/coding.h" #include "utilities/merge_operators.h" @@ -607,6 +608,272 @@ TEST_F(MergeTest, MergeWithCompactionAndFlush) { ASSERT_OK(DestroyDB(dbname, Options())); } +TEST_F(MergeTest, FullMergeV3FallbackNewValue) { + // Test that the default FullMergeV3 implementation correctly handles the case + // when FullMergeV2 results in a new value. + + const Slice key("foo"); + const MergeOperator::MergeOperationInputV3::OperandList operands{ + "first", "second", "third"}; + constexpr Logger* logger = nullptr; + + auto append_operator = + MergeOperators::CreateStringAppendOperator(std::string()); + + // No existing value + { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(append_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = std::get(merge_out.new_value); + ASSERT_EQ(result, operands[0].ToString() + operands[1].ToString() + + operands[2].ToString()); + } + + // Plain existing value + { + const Slice plain("plain"); + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(plain); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(append_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = std::get(merge_out.new_value); + ASSERT_EQ(result, plain.ToString() + operands[0].ToString() + + operands[1].ToString() + operands[2].ToString()); + } + + // Wide-column existing value with default column + { + const WideColumns entity{ + {kDefaultWideColumnName, "default"}, {"one", "1"}, {"two", "2"}}; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(entity); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(append_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = + std::get( + merge_out.new_value); + ASSERT_EQ(result.size(), entity.size()); + ASSERT_EQ(result[0].first, entity[0].name()); + ASSERT_EQ(result[0].second, + entity[0].value().ToString() + operands[0].ToString() + + operands[1].ToString() + operands[2].ToString()); + ASSERT_EQ(result[1].first, entity[1].name()); + ASSERT_EQ(result[1].second, entity[1].value()); + ASSERT_EQ(result[2].first, entity[2].name()); + ASSERT_EQ(result[2].second, entity[2].value()); + } + + // Wide-column existing value without default column + { + const WideColumns entity{{"one", "1"}, {"two", "2"}}; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(entity); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(append_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = + std::get( + merge_out.new_value); + ASSERT_EQ(result.size(), entity.size() + 1); + ASSERT_EQ(result[0].first, kDefaultWideColumnName); + ASSERT_EQ(result[0].second, operands[0].ToString() + + operands[1].ToString() + + operands[2].ToString()); + ASSERT_EQ(result[1].first, entity[0].name()); + ASSERT_EQ(result[1].second, entity[0].value()); + ASSERT_EQ(result[2].first, entity[1].name()); + ASSERT_EQ(result[2].second, entity[1].value()); + } +} + +TEST_F(MergeTest, FullMergeV3FallbackExistingOperand) { + // Test that the default FullMergeV3 implementation correctly handles the case + // when FullMergeV2 results in an existing operand. + + const Slice key("foo"); + const MergeOperator::MergeOperationInputV3::OperandList operands{ + "first", "second", "third"}; + constexpr Logger* logger = nullptr; + + auto put_operator = MergeOperators::CreatePutOperator(); + + // No existing value + { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(put_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = std::get(merge_out.new_value); + ASSERT_EQ(result.data(), operands.back().data()); + ASSERT_EQ(result.size(), operands.back().size()); + } + + // Plain existing value + { + const Slice plain("plain"); + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(plain); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(put_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = std::get(merge_out.new_value); + ASSERT_EQ(result.data(), operands.back().data()); + ASSERT_EQ(result.size(), operands.back().size()); + } + + // Wide-column existing value with default column + { + const WideColumns entity{ + {kDefaultWideColumnName, "default"}, {"one", "1"}, {"two", "2"}}; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(entity); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(put_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = + std::get( + merge_out.new_value); + ASSERT_EQ(result.size(), entity.size()); + ASSERT_EQ(result[0].first, entity[0].name()); + ASSERT_EQ(result[0].second, operands.back()); + ASSERT_EQ(result[1].first, entity[1].name()); + ASSERT_EQ(result[1].second, entity[1].value()); + ASSERT_EQ(result[2].first, entity[2].name()); + ASSERT_EQ(result[2].second, entity[2].value()); + } + + // Wide-column existing value without default column + { + const WideColumns entity{{"one", "1"}, {"two", "2"}}; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(entity); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_TRUE(put_operator->FullMergeV3(merge_in, &merge_out)); + + const auto& result = + std::get( + merge_out.new_value); + ASSERT_EQ(result.size(), entity.size() + 1); + ASSERT_EQ(result[0].first, kDefaultWideColumnName); + ASSERT_EQ(result[0].second, operands.back()); + ASSERT_EQ(result[1].first, entity[0].name()); + ASSERT_EQ(result[1].second, entity[0].value()); + ASSERT_EQ(result[2].first, entity[1].name()); + ASSERT_EQ(result[2].second, entity[1].value()); + } +} + +TEST_F(MergeTest, FullMergeV3FallbackFailure) { + // Test that the default FullMergeV3 implementation correctly handles the case + // when FullMergeV2 fails. + + const Slice key("foo"); + const MergeOperator::MergeOperationInputV3::OperandList operands{ + "first", "second", "third"}; + constexpr Logger* logger = nullptr; + + class FailMergeOperator : public MergeOperator { + public: + bool FullMergeV2(const MergeOperationInput& /* merge_in */, + MergeOperationOutput* merge_out) const override { + assert(merge_out); + merge_out->op_failure_scope = OpFailureScope::kMustMerge; + + return false; + } + + const char* Name() const override { return "FailMergeOperator"; } + }; + + FailMergeOperator fail_operator; + + // No existing value + { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_FALSE(fail_operator.FullMergeV3(merge_in, &merge_out)); + ASSERT_EQ(merge_out.op_failure_scope, + MergeOperator::OpFailureScope::kMustMerge); + } + + // Plain existing value + { + const Slice plain("plain"); + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(plain); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_FALSE(fail_operator.FullMergeV3(merge_in, &merge_out)); + ASSERT_EQ(merge_out.op_failure_scope, + MergeOperator::OpFailureScope::kMustMerge); + } + + // Wide-column existing value with default column + { + const WideColumns entity{ + {kDefaultWideColumnName, "default"}, {"one", "1"}, {"two", "2"}}; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(entity); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_FALSE(fail_operator.FullMergeV3(merge_in, &merge_out)); + ASSERT_EQ(merge_out.op_failure_scope, + MergeOperator::OpFailureScope::kMustMerge); + } + + // Wide-column existing value without default column + { + const WideColumns entity{{"one", "1"}, {"two", "2"}}; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(entity); + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + + MergeOperator::MergeOperationOutputV3 merge_out; + + ASSERT_FALSE(fail_operator.FullMergeV3(merge_in, &merge_out)); + ASSERT_EQ(merge_out.op_failure_scope, + MergeOperator::OpFailureScope::kMustMerge); + } +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/include/rocksdb/merge_operator.h b/include/rocksdb/merge_operator.h index 077130475..4db9380b6 100644 --- a/include/rocksdb/merge_operator.h +++ b/include/rocksdb/merge_operator.h @@ -8,10 +8,13 @@ #include #include #include +#include +#include #include #include "rocksdb/customizable.h" #include "rocksdb/slice.h" +#include "rocksdb/wide_columns.h" namespace ROCKSDB_NAMESPACE { @@ -158,6 +161,55 @@ class MergeOperator : public Customizable { virtual bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const; + struct MergeOperationInputV3 { + using ExistingValue = std::variant; + using OperandList = std::vector; + + explicit MergeOperationInputV3(const Slice& _key, + ExistingValue&& _existing_value, + const OperandList& _operand_list, + Logger* _logger) + : key(_key), + existing_value(std::move(_existing_value)), + operand_list(_operand_list), + logger(_logger) {} + + // The user key, including the user-defined timestamp if applicable. + const Slice& key; + // The base value of the merge operation. Can be one of three things (see + // the ExistingValue variant above): no existing value, plain existing + // value, or wide-column existing value. + ExistingValue existing_value; + // The list of operands to apply. + const OperandList& operand_list; + // The logger to use in case a failure happens during the merge operation. + Logger* logger; + }; + + struct MergeOperationOutputV3 { + using NewColumns = std::vector>; + using NewValue = std::variant; + + // The result of the merge operation. Can be one of three things (see the + // NewValue variant above): a new plain value, a new wide-column value, or + // an existing merge operand. + NewValue new_value; + // The scope of the failure if applicable. See above for more details. + OpFailureScope op_failure_scope = OpFailureScope::kDefault; + }; + + // ************************** UNDER CONSTRUCTION ***************************** + // An extended version of FullMergeV2() that supports wide columns on both the + // input and the output side, enabling the application to perform general + // transformations during merges. For backward compatibility, the default + // implementation calls FullMergeV2(). Specifically, if there is no base value + // or the base value is a plain key-value, the default implementation falls + // back to FullMergeV2(). If the base value is a wide-column entity, the + // default implementation invokes FullMergeV2() to perform the merge on the + // default column, and leaves any other columns unchanged. + virtual bool FullMergeV3(const MergeOperationInputV3& merge_in, + MergeOperationOutputV3* merge_out) const; + // This function performs merge(left_op, right_op) // when both the operands are themselves merge operation types // that you would have passed to a DB::Merge() call in the same order diff --git a/util/overload.h b/util/overload.h new file mode 100644 index 000000000..428e805de --- /dev/null +++ b/util/overload.h @@ -0,0 +1,23 @@ +// (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +// A helper template that can combine multiple functors into a single one to be +// used with std::visit for example. It also works with lambdas, since it +// comes with an explicit deduction guide. +template +struct overload : Ts... { + using Ts::operator()...; +}; + +template +overload(Ts...) -> overload; + +} // namespace ROCKSDB_NAMESPACE From 694e49cbb1cff88fbb84a96394a0f76b7bac9e41 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 11 Sep 2023 12:54:50 -0700 Subject: [PATCH 098/386] Add a unit test for the fix in #11763 (#11810) Summary: The unit test depended on https://github.com/facebook/rocksdb/issues/11753, which landed after the bug fix Pull Request resolved: https://github.com/facebook/rocksdb/pull/11810 Reviewed By: jaykorean Differential Revision: D49128695 Pulled By: ajkr fbshipit-source-id: e0a98bd65a292a7c7bd03913650f73c26d0864c7 --- test_util/mock_time_env.h | 4 +++ util/rate_limiter_test.cc | 55 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/test_util/mock_time_env.h b/test_util/mock_time_env.h index e11bed0d1..19bb9e76d 100644 --- a/test_util/mock_time_env.h +++ b/test_util/mock_time_env.h @@ -11,6 +11,7 @@ #include "port/port.h" #include "rocksdb/system_clock.h" #include "test_util/mock_time_env.h" +#include "test_util/sync_point.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { @@ -86,7 +87,10 @@ class MockSystemClock : public SystemClockWrapper { std::this_thread::yield(); bool mock_timeout = Random::GetTLSInstance()->OneIn(2); if (mock_timeout) { + TEST_SYNC_POINT("MockSystemClock::TimedWait:UnlockedPreSleep"); current_time_us_.fetch_add(delay_micros); + TEST_SYNC_POINT("MockSystemClock::TimedWait:UnlockedPostSleep1"); + TEST_SYNC_POINT("MockSystemClock::TimedWait:UnlockedPostSleep2"); } cv->GetMutex()->Lock(); return mock_timeout; diff --git a/util/rate_limiter_test.cc b/util/rate_limiter_test.cc index dfaa3a2cd..f31981a5c 100644 --- a/util/rate_limiter_test.cc +++ b/util/rate_limiter_test.cc @@ -498,6 +498,61 @@ TEST_F(RateLimiterTest, AutoTuneIncreaseWhenFull) { ASSERT_LT(new_bytes_per_sec, orig_bytes_per_sec); } +TEST_F(RateLimiterTest, WaitHangingBug) { + // At t=0: Threads 0 and 1 request `kBytesPerRefill` bytes at low-pri. One + // will be granted immediately and the other will enter `TimedWait()`. + // + // At t=`kMicrosPerRefill`: Thread 2 requests `kBytesPerRefill` bytes at + // low-pri. Thread 2's request enters the queue. To expose the bug scenario, + // `SyncPoint`s ensure this happens while the lock is temporarily released in + // `TimedWait()`. Before the bug fix, Thread 2's request would then hang in + // `Wait()` interminably. + const int kBytesPerSecond = 100; + const int kMicrosPerSecond = 1000 * 1000; + const int kMicrosPerRefill = kMicrosPerSecond; + const int kBytesPerRefill = + kBytesPerSecond * kMicrosPerRefill / kMicrosPerSecond; + + auto mock_clock = + std::make_shared(Env::Default()->GetSystemClock()); + std::unique_ptr limiter(new GenericRateLimiter( + kBytesPerSecond, kMicrosPerRefill, 10 /* fairness */, + RateLimiter::Mode::kWritesOnly, mock_clock, false /* auto_tuned */)); + std::array request_threads; + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"RateLimiterTest::WaitHangingBug:InitialRequestsReady", + "MockSystemClock::TimedWait:UnlockedPreSleep"}, + {"MockSystemClock::TimedWait:UnlockedPostSleep1", + "RateLimiterTest::WaitHangingBug:TestThreadRequestBegin"}, + {"RateLimiterTest::WaitHangingBug:TestThreadRequestEnd", + "MockSystemClock::TimedWait:UnlockedPostSleep2"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + for (int i = 0; i < 2; i++) { + request_threads[i] = std::thread([&]() { + limiter->Request(kBytesPerRefill /* bytes */, Env::IOPriority::IO_LOW, + nullptr /* stats */, RateLimiter::OpType::kWrite); + }); + } + while (limiter->GetTotalRequests() < 2) { + } + TEST_SYNC_POINT("RateLimiterTest::WaitHangingBug:InitialRequestsReady"); + + TEST_SYNC_POINT("RateLimiterTest::WaitHangingBug:TestThreadRequestBegin"); + request_threads[2] = std::thread([&]() { + limiter->Request(kBytesPerRefill /* bytes */, Env::IOPriority::IO_LOW, + nullptr /* stats */, RateLimiter::OpType::kWrite); + }); + while (limiter->GetTotalRequests() < 3) { + } + TEST_SYNC_POINT("RateLimiterTest::WaitHangingBug:TestThreadRequestEnd"); + + for (int i = 0; i < 3; i++) { + request_threads[i].join(); + } +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 8fc78a3a9e1d24ba55731b70c0c25cef0765dbc8 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 11 Sep 2023 16:32:32 -0700 Subject: [PATCH 099/386] Add helper methods WideColumnsHelper::{Has,Get}DefaultColumn (#11813) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11813 The patch adds a couple of helper methods `WideColumnsHelper::{Has,Get}DefaultColumn` to eliminate some code duplication. Reviewed By: jaykorean Differential Revision: D49166682 fbshipit-source-id: f229ca5b94599f7445a0112b10f8317292505c82 --- db/db_iter.cc | 6 +++--- db/merge_helper.cc | 3 ++- db/merge_operator.cc | 4 ++-- db/version_set.cc | 8 +++----- db/version_set_sync_and_async.h | 7 ++----- db/wide/wide_column_serialization.cc | 5 +++-- db/wide/wide_columns_helper.h | 16 ++++++++++++++-- db_stress_tool/db_stress_common.cc | 8 ++------ db_stress_tool/expected_state.cc | 9 +++------ db_stress_tool/no_batched_ops_stress.cc | 13 ++++++------- tools/ldb_cmd.cc | 2 +- 11 files changed, 41 insertions(+), 40 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index e547c2e1c..7e801135b 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -18,6 +18,7 @@ #include "db/merge_helper.h" #include "db/pinned_iterators_manager.h" #include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "file/filename.h" #include "logging/logging.h" #include "memory/arena.h" @@ -230,9 +231,8 @@ bool DBIter::SetValueAndColumnsFromEntity(Slice slice) { return false; } - if (!wide_columns_.empty() && - wide_columns_[0].name() == kDefaultWideColumnName) { - value_ = wide_columns_[0].value(); + if (WideColumnsHelper::HasDefaultColumn(wide_columns_)) { + value_ = WideColumnsHelper::GetDefaultColumn(wide_columns_); } return true; diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 110ac9622..8c7e3d441 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -13,6 +13,7 @@ #include "db/compaction/compaction_iteration_stats.h" #include "db/dbformat.h" #include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "logging/logging.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics_impl.h" @@ -135,7 +136,7 @@ Status MergeHelper::TimedFullMergeWithEntity( } const bool has_default_column = - !base_columns.empty() && base_columns[0].name() == kDefaultWideColumnName; + WideColumnsHelper::HasDefaultColumn(base_columns); Slice value_of_default; if (has_default_column) { diff --git a/db/merge_operator.cc b/db/merge_operator.cc index e5d80c8cb..bb5dbbc36 100644 --- a/db/merge_operator.cc +++ b/db/merge_operator.cc @@ -11,6 +11,7 @@ #include +#include "db/wide/wide_columns_helper.h" #include "util/overload.h" namespace ROCKSDB_NAMESPACE { @@ -63,8 +64,7 @@ bool MergeOperator::FullMergeV3(const MergeOperationInputV3& merge_in, }, [&](const WideColumns& existing_columns) -> bool { const bool has_default_column = - !existing_columns.empty() && - existing_columns.front().name() == kDefaultWideColumnName; + WideColumnsHelper::HasDefaultColumn(existing_columns); Slice value_of_default; if (has_default_column) { diff --git a/db/version_set.cc b/db/version_set.cc index c5057028d..572da83e3 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -39,6 +39,7 @@ #include "db/version_builder.h" #include "db/version_edit.h" #include "db/version_edit_handler.h" +#include "db/wide/wide_columns_helper.h" #include "file/file_util.h" #include "table/compaction_merging_iterator.h" @@ -2452,12 +2453,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, fp.GetHitFileLevel()); if (is_blob_index && do_merge && (value || columns)) { - assert(!columns || - (!columns->columns().empty() && - columns->columns().front().name() == kDefaultWideColumnName)); - Slice blob_index = - value ? *value : columns->columns().front().value(); + value ? *value + : WideColumnsHelper::GetDefaultColumn(columns->columns()); TEST_SYNC_POINT_CALLBACK("Version::Get::TamperWithBlobIndex", &blob_index); diff --git a/db/version_set_sync_and_async.h b/db/version_set_sync_and_async.h index 2507762e8..75776b620 100644 --- a/db/version_set_sync_and_async.h +++ b/db/version_set_sync_and_async.h @@ -113,12 +113,9 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST) } else { assert(iter->columns); - assert(!iter->columns->columns().empty()); - assert(iter->columns->columns().front().name() == - kDefaultWideColumnName); - tmp_s = - blob_index.DecodeFrom(iter->columns->columns().front().value()); + tmp_s = blob_index.DecodeFrom( + WideColumnsHelper::GetDefaultColumn(iter->columns->columns())); } if (tmp_s.ok()) { diff --git a/db/wide/wide_column_serialization.cc b/db/wide/wide_column_serialization.cc index f62143c40..cd1800795 100644 --- a/db/wide/wide_column_serialization.cc +++ b/db/wide/wide_column_serialization.cc @@ -9,6 +9,7 @@ #include #include +#include "db/wide/wide_columns_helper.h" #include "rocksdb/slice.h" #include "util/autovector.h" #include "util/coding.h" @@ -169,12 +170,12 @@ Status WideColumnSerialization::GetValueOfDefaultColumn(Slice& input, return s; } - if (columns.empty() || columns[0].name() != kDefaultWideColumnName) { + if (!WideColumnsHelper::HasDefaultColumn(columns)) { value.clear(); return Status::OK(); } - value = columns[0].value(); + value = WideColumnsHelper::GetDefaultColumn(columns); return Status::OK(); } diff --git a/db/wide/wide_columns_helper.h b/db/wide/wide_columns_helper.h index 83c8548a3..7e4cee5cf 100644 --- a/db/wide/wide_columns_helper.h +++ b/db/wide/wide_columns_helper.h @@ -11,11 +11,23 @@ #include "rocksdb/wide_columns.h" namespace ROCKSDB_NAMESPACE { + class WideColumnsHelper { public: - static void DumpWideColumns(const WideColumns& columns, std::ostream& oss, + static void DumpWideColumns(const WideColumns& columns, std::ostream& os, bool hex); - static Status DumpSliceAsWideColumns(const Slice& value, std::ostream& oss, + + static Status DumpSliceAsWideColumns(const Slice& value, std::ostream& os, bool hex); + + static bool HasDefaultColumn(const WideColumns& columns) { + return !columns.empty() && columns.front().name() == kDefaultWideColumnName; + } + + static const Slice& GetDefaultColumn(const WideColumns& columns) { + assert(HasDefaultColumn(columns)); + return columns.front().value(); + } }; + } // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/db_stress_common.cc b/db_stress_tool/db_stress_common.cc index 93436d0f8..a519f39fb 100644 --- a/db_stress_tool/db_stress_common.cc +++ b/db_stress_tool/db_stress_common.cc @@ -296,15 +296,11 @@ bool VerifyWideColumns(const Slice& value, const WideColumns& columns) { } bool VerifyWideColumns(const WideColumns& columns) { - if (columns.empty()) { + if (!WideColumnsHelper::HasDefaultColumn(columns)) { return false; } - if (columns.front().name() != kDefaultWideColumnName) { - return false; - } - - const Slice& value_of_default = columns.front().value(); + const Slice& value_of_default = WideColumnsHelper::GetDefaultColumn(columns); return VerifyWideColumns(value_of_default, columns); } diff --git a/db_stress_tool/expected_state.cc b/db_stress_tool/expected_state.cc index dd210cab4..b483e154c 100644 --- a/db_stress_tool/expected_state.cc +++ b/db_stress_tool/expected_state.cc @@ -6,8 +6,8 @@ #include #ifdef GFLAGS - #include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "db_stress_tool/db_stress_common.h" #include "db_stress_tool/db_stress_shared_state.h" #include "db_stress_tool/expected_state.h" @@ -363,7 +363,6 @@ bool FileExpectedStateManager::HasHistory() { return saved_seqno_ != kMaxSequenceNumber; } - namespace { // An `ExpectedStateTraceRecordHandler` applies a configurable number of @@ -462,10 +461,8 @@ class ExpectedStateTraceRecordHandler : public TraceRecord::Handler, column_family_id, key, columns); } - assert(!columns.empty()); - assert(columns.front().name() == kDefaultWideColumnName); - - const uint32_t value_base = GetValueBase(columns.front().value()); + const uint32_t value_base = + GetValueBase(WideColumnsHelper::GetDefaultColumn(columns)); state_->SyncPut(column_family_id, static_cast(key_id), value_base); diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 1d2e4a3b5..9fc818b09 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -9,6 +9,7 @@ #include "db_stress_tool/expected_state.h" #ifdef GFLAGS +#include "db/wide/wide_columns_helper.h" #include "db_stress_tool/db_stress_common.h" #include "rocksdb/utilities/transaction_db.h" #include "utilities/fault_injection_fs.h" @@ -165,9 +166,8 @@ class NonBatchedOpsStressTest : public StressTest { if (s.ok()) { const WideColumns& columns = result.columns(); - if (!columns.empty() && - columns.front().name() == kDefaultWideColumnName) { - from_db = columns.front().value().ToString(); + if (WideColumnsHelper::HasDefaultColumn(columns)) { + from_db = WideColumnsHelper::GetDefaultColumn(columns).ToString(); } if (!VerifyWideColumns(columns)) { @@ -251,9 +251,9 @@ class NonBatchedOpsStressTest : public StressTest { if (statuses[j].ok()) { const WideColumns& columns = results[j].columns(); - if (!columns.empty() && - columns.front().name() == kDefaultWideColumnName) { - from_db = columns.front().value().ToString(); + if (WideColumnsHelper::HasDefaultColumn(columns)) { + from_db = + WideColumnsHelper::GetDefaultColumn(columns).ToString(); } if (!VerifyWideColumns(columns)) { @@ -1276,7 +1276,6 @@ class NonBatchedOpsStressTest : public StressTest { const size_t sz = GenerateValue(value_base, value, sizeof(value)); const Slice v(value, sz); - Status s; if (FLAGS_use_merge) { diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 77096b113..a6cfdb43f 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1087,7 +1087,7 @@ std::string LDBCommand::PrintKeyValueOrWideColumns( bool is_key_hex, bool is_value_hex) { if (wide_columns.empty() || (wide_columns.size() == 1 && - wide_columns.front().name() == kDefaultWideColumnName)) { + WideColumnsHelper::HasDefaultColumn(wide_columns))) { return PrintKeyValue(key.ToString(), value.ToString(), is_key_hex, is_value_hex); } From 4b123f3a54b8a5980b24c9c58f44b3227aa72c6d Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 12 Sep 2023 10:00:57 -0700 Subject: [PATCH 100/386] Change file size related variables type to uint64_t in PickCompactionToReduceSizeAmp() (#11814) Summary: **Context/Summary:** size_t is not most likely not needed as SortedRun::size/compensated_file_size is uint64_t. This is a pre-requisite to addressing https://github.com/facebook/rocksdb/pull/11749/files#r1321828933. Other places already uses uint64_t e.g, https://github.com/facebook/rocksdb/blob/8.6.fb/db/compaction/compaction_picker_universal.cc#L349-L353 **Test** CI Pull Request resolved: https://github.com/facebook/rocksdb/pull/11814 Reviewed By: ajkr Differential Revision: D49169155 Pulled By: hx235 fbshipit-source-id: 2b3ad70e6f18aa360e94ed8907c8534ad2797e62 --- db/compaction/compaction_picker_universal.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/compaction/compaction_picker_universal.cc b/db/compaction/compaction_picker_universal.cc index ecd67131d..3b2156f91 100644 --- a/db/compaction/compaction_picker_universal.cc +++ b/db/compaction/compaction_picker_universal.cc @@ -797,9 +797,9 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { if (sorted_runs_[end_index].being_compacted) { return nullptr; } - const size_t base_sr_size = sorted_runs_[end_index].size; + const uint64_t base_sr_size = sorted_runs_[end_index].size; size_t start_index = end_index; - size_t candidate_size = 0; + uint64_t candidate_size = 0; // Get longest span (i.e, [start_index, end_index]) of available sorted runs while (start_index > 0) { From 1e63fc9925919257d0435ea206763acc6b92a1c6 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Tue, 12 Sep 2023 12:36:07 -0700 Subject: [PATCH 101/386] Add a helper method WideColumnsHelper::SortColumns (#11823) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11823 Similarly to https://github.com/facebook/rocksdb/pull/11813, the patch is a small refactoring that eliminates some copy-paste around sorting the columns of entities by column name. Reviewed By: jaykorean Differential Revision: D49195504 fbshipit-source-id: d48c9f290e3203f838cc5949856c469ecf730008 --- db/compaction/compaction_iterator.cc | 8 +++----- db/wide/wide_columns_helper.cc | 10 ++++++++++ db/wide/wide_columns_helper.h | 2 ++ db/write_batch.cc | 6 ++---- db_stress_tool/db_stress_common.cc | 5 +---- table/sst_file_writer.cc | 6 ++---- 6 files changed, 20 insertions(+), 17 deletions(-) diff --git a/db/compaction/compaction_iterator.cc b/db/compaction/compaction_iterator.cc index 1c3ca5e1e..abfa7a692 100644 --- a/db/compaction/compaction_iterator.cc +++ b/db/compaction/compaction_iterator.cc @@ -14,6 +14,7 @@ #include "db/blob/prefetch_buffer_collection.h" #include "db/snapshot_checker.h" #include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "logging/logging.h" #include "port/likely.h" #include "rocksdb/listener.h" @@ -423,16 +424,13 @@ bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip, return false; } else if (decision == CompactionFilter::Decision::kChangeWideColumnEntity) { WideColumns sorted_columns; - sorted_columns.reserve(new_columns.size()); + for (const auto& column : new_columns) { sorted_columns.emplace_back(column.first, column.second); } - std::sort(sorted_columns.begin(), sorted_columns.end(), - [](const WideColumn& lhs, const WideColumn& rhs) { - return lhs.name().compare(rhs.name()) < 0; - }); + WideColumnsHelper::SortColumns(sorted_columns); { const Status s = WideColumnSerialization::Serialize( diff --git a/db/wide/wide_columns_helper.cc b/db/wide/wide_columns_helper.cc index 4ef0c148e..cf829ce79 100644 --- a/db/wide/wide_columns_helper.cc +++ b/db/wide/wide_columns_helper.cc @@ -5,6 +5,8 @@ #include "db/wide/wide_columns_helper.h" +#include + #include "db/wide/wide_column_serialization.h" namespace ROCKSDB_NAMESPACE { @@ -22,6 +24,7 @@ void WideColumnsHelper::DumpWideColumns(const WideColumns& columns, os << ' ' << *it; } } + Status WideColumnsHelper::DumpSliceAsWideColumns(const Slice& value, std::ostream& os, bool hex) { WideColumns columns; @@ -33,4 +36,11 @@ Status WideColumnsHelper::DumpSliceAsWideColumns(const Slice& value, return s; } +void WideColumnsHelper::SortColumns(WideColumns& columns) { + std::sort(columns.begin(), columns.end(), + [](const WideColumn& lhs, const WideColumn& rhs) { + return lhs.name().compare(rhs.name()) < 0; + }); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/wide/wide_columns_helper.h b/db/wide/wide_columns_helper.h index 7e4cee5cf..86c77c02d 100644 --- a/db/wide/wide_columns_helper.h +++ b/db/wide/wide_columns_helper.h @@ -28,6 +28,8 @@ class WideColumnsHelper { assert(HasDefaultColumn(columns)); return columns.front().value(); } + + static void SortColumns(WideColumns& columns); }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/write_batch.cc b/db/write_batch.cc index 78a137d21..2851b8559 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -57,6 +57,7 @@ #include "db/snapshot_impl.h" #include "db/trim_history_scheduler.h" #include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "db/write_batch_internal.h" #include "monitoring/perf_context_imp.h" #include "monitoring/statistics_impl.h" @@ -948,10 +949,7 @@ Status WriteBatchInternal::PutEntity(WriteBatch* b, uint32_t column_family_id, } WideColumns sorted_columns(columns); - std::sort(sorted_columns.begin(), sorted_columns.end(), - [](const WideColumn& lhs, const WideColumn& rhs) { - return lhs.name().compare(rhs.name()) < 0; - }); + WideColumnsHelper::SortColumns(sorted_columns); std::string entity; const Status s = WideColumnSerialization::Serialize(sorted_columns, entity); diff --git a/db_stress_tool/db_stress_common.cc b/db_stress_tool/db_stress_common.cc index a519f39fb..6a5986665 100644 --- a/db_stress_tool/db_stress_common.cc +++ b/db_stress_tool/db_stress_common.cc @@ -270,10 +270,7 @@ WideColumns GenerateExpectedWideColumns(uint32_t value_base, WideColumns columns = GenerateWideColumns(value_base, slice); - std::sort(columns.begin(), columns.end(), - [](const WideColumn& lhs, const WideColumn& rhs) { - return lhs.name().compare(rhs.name()) < 0; - }); + WideColumnsHelper::SortColumns(columns); return columns; } diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index b929a7e28..1ef0f98aa 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -10,6 +10,7 @@ #include "db/db_impl/db_impl.h" #include "db/dbformat.h" #include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "file/writable_file_writer.h" #include "rocksdb/file_system.h" #include "rocksdb/table.h" @@ -134,10 +135,7 @@ struct SstFileWriter::Rep { Status AddEntity(const Slice& user_key, const WideColumns& columns) { WideColumns sorted_columns(columns); - std::sort(sorted_columns.begin(), sorted_columns.end(), - [](const WideColumn& lhs, const WideColumn& rhs) { - return lhs.name().compare(rhs.name()) < 0; - }); + WideColumnsHelper::SortColumns(sorted_columns); std::string entity; const Status s = WideColumnSerialization::Serialize(sorted_columns, entity); From 9d71682d1bbbd544af77fed630c26eea687de490 Mon Sep 17 00:00:00 2001 From: Changyu Bi <102700264+cbi42@users.noreply.github.com> Date: Tue, 12 Sep 2023 15:48:36 -0700 Subject: [PATCH 102/386] Add statistics `COMPACTION_CPU_TOTAL_TIME` for total compaction time (#11741) Summary: Existing compaction statistics are `COMPACTION_TIME` and `COMPACTION_CPU_TIME` which are histogram and are logged at the end of a compaction. The new statistics `COMPACTION_CPU_TOTAL_TIME` is for cumulative total compaction time which is updated regularly during a compaction. This allows user to more closely track compaction cpu usage. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11741 Test Plan: * new unit test `DBTestWithParam.CompactionTotalTimeTest` Reviewed By: ajkr Differential Revision: D48608094 Pulled By: cbi42 fbshipit-source-id: b597109f3e4bf2237fb5a216b6fd036e5363b4c0 --- db/compaction/compaction_job.cc | 12 +++- db/db_test.cc | 58 +++++++++++++++++++ include/rocksdb/statistics.h | 3 + monitoring/statistics.cc | 1 + .../new_features/compaction_time_stats.md | 1 + 5 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/new_features/compaction_time_stats.md diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 8ea806816..66f377a5f 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1318,6 +1318,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { "CompactionJob::ProcessKeyValueCompaction()::Processing", reinterpret_cast( const_cast(sub_compact->compaction))); + uint64_t last_cpu_micros = prev_cpu_micros; while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) { // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid() // returns true. @@ -1329,6 +1330,12 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats); c_iter->ResetRecordCounts(); RecordCompactionIOStats(); + + uint64_t cur_cpu_micros = db_options_.clock->CPUMicros(); + assert(cur_cpu_micros >= last_cpu_micros); + RecordTick(stats_, COMPACTION_CPU_TOTAL_TIME, + cur_cpu_micros - last_cpu_micros); + last_cpu_micros = cur_cpu_micros; } // Add current compaction_iterator key to target compaction output, if the @@ -1436,8 +1443,11 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { sub_compact->Current().UpdateBlobStats(); } + uint64_t cur_cpu_micros = db_options_.clock->CPUMicros(); sub_compact->compaction_job_stats.cpu_micros = - db_options_.clock->CPUMicros() - prev_cpu_micros; + cur_cpu_micros - prev_cpu_micros; + RecordTick(stats_, COMPACTION_CPU_TOTAL_TIME, + cur_cpu_micros - last_cpu_micros); if (measure_io_stats_) { sub_compact->compaction_job_stats.file_write_nanos += diff --git a/db/db_test.cc b/db/db_test.cc index d5c5c4413..8e7717a7c 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -6040,6 +6040,64 @@ TEST_P(DBTestWithParam, FilterCompactionTimeTest) { delete itr; } +#ifndef OS_WIN +// CPUMicros() is not supported. See WinClock::CPUMicros(). +TEST_P(DBTestWithParam, CompactionTotalTimeTest) { + int record_count = 0; + class TestStatistics : public StatisticsImpl { + public: + explicit TestStatistics(int* record_count) + : StatisticsImpl(nullptr), record_count_(record_count) {} + void recordTick(uint32_t ticker_type, uint64_t count) override { + if (ticker_type == COMPACTION_CPU_TOTAL_TIME) { + ASSERT_GT(count, 0); + (*record_count_)++; + } + StatisticsImpl::recordTick(ticker_type, count); + } + + int* record_count_; + }; + + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.create_if_missing = true; + options.statistics = std::make_shared(&record_count); + options.statistics->set_stats_level(kExceptTimeForMutex); + options.max_subcompactions = max_subcompactions_; + DestroyAndReopen(options); + + int n = 0; + for (int table = 0; table < 4; ++table) { + for (int i = 0; i < 1000; ++i) { + ASSERT_OK(Put(std::to_string(table * 1000 + i), "val")); + ++n; + } + // Overlapping tables + ASSERT_OK(Put(std::to_string(0), "val")); + ++n; + ASSERT_OK(Flush()); + } + + CompactRangeOptions cro; + cro.exclusive_manual_compaction = exclusive_manual_compaction_; + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + + // Hard-coded number in CompactionJob::ProcessKeyValueCompaction(). + const int kRecordStatsEvery = 1000; + // The stat COMPACTION_CPU_TOTAL_TIME should be recorded + // during compaction and once more after compaction. + ASSERT_EQ(n / kRecordStatsEvery + 1, record_count); + + // Check that COMPACTION_CPU_TOTAL_TIME correctly + // records compaction time after a compaction. + HistogramData h; + options.statistics->histogramData(COMPACTION_CPU_TIME, &h); + ASSERT_EQ(1, h.count); + ASSERT_EQ(h.max, TestGetTickerCount(options, COMPACTION_CPU_TOTAL_TIME)); +} +#endif + TEST_F(DBTest, TestLogCleanup) { Options options = CurrentOptions(); options.write_buffer_size = 64 * 1024; // very small diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index b07ee1f33..dcaf54278 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -208,8 +208,11 @@ enum Tickers : uint32_t { // DEPRECATED / unused (see NUMBER_BLOCK_COMPRESSION_*) NUMBER_BLOCK_NOT_COMPRESSED, + + // Tickers that record cumulative time. MERGE_OPERATION_TOTAL_TIME, FILTER_OPERATION_TOTAL_TIME, + COMPACTION_CPU_TOTAL_TIME, // Row cache. ROW_CACHE_HIT, diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 5a7473f2a..d3c0dc08f 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -111,6 +111,7 @@ const std::vector> TickersNameMap = { {NUMBER_BLOCK_NOT_COMPRESSED, "rocksdb.number.block.not_compressed"}, {MERGE_OPERATION_TOTAL_TIME, "rocksdb.merge.operation.time.nanos"}, {FILTER_OPERATION_TOTAL_TIME, "rocksdb.filter.operation.time.nanos"}, + {COMPACTION_CPU_TOTAL_TIME, "rocksdb.compaction.total.time.cpu_micros"}, {ROW_CACHE_HIT, "rocksdb.row.cache.hit"}, {ROW_CACHE_MISS, "rocksdb.row.cache.miss"}, {READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"}, diff --git a/unreleased_history/new_features/compaction_time_stats.md b/unreleased_history/new_features/compaction_time_stats.md new file mode 100644 index 000000000..6aa3e508c --- /dev/null +++ b/unreleased_history/new_features/compaction_time_stats.md @@ -0,0 +1 @@ +* Add a new statistic `COMPACTION_CPU_TOTAL_TIME` that records cumulative compaction cpu time. This ticker is updated regularly while a compaction is running. \ No newline at end of file From ef3e289b2d6cfda6ff3c8a67664992aba9e62f27 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 12 Sep 2023 15:53:15 -0700 Subject: [PATCH 103/386] Conditionally exclude some L0 input files in size amp compaction (#11749) Summary: **Context/Summary:** A size amp compaction can select and prevent a large number of L0 files from being selected by other compaction. If such compaction is running long or being queued behind, these L0 files will exist for long. With a few more flushes, we can run into write stop triggered by # L0 files. We've seen this happen on a host with many DBs sharing same thread pool, each of these DBs submits a size amp compaction with (110-180)+ files to the pool upon reopen and with a few more flushes, they hit the 200 L0 write stop condition. The idea is to exclude some L0 input files in size amp compaction that are harmless to size amp reduction but improve the situation described above. The exclusion algorithm is in `MightExcludeNewL0sToReduceWriteStop()` with two elements: 1. #L0 to exclude + (level0_stop_writes_trigger - num_l0_input_pre_exclusion) should be in the range of [min_merge_width, max_merge_width]. - This is to ensure we are excluding enough L0 input files but not too many to be qualified to picked for another compaction along with the incoming future L0 files before write stop. 2. Based on (1), further constrain #L0 to exclude based on the post-exclusion compaction score. The goal is to ensure our exclusion will not disqualify the size amp compaction from being a size amp compaction after exclusion. **Tets plan:** New unit test Pull Request resolved: https://github.com/facebook/rocksdb/pull/11749 Reviewed By: ajkr Differential Revision: D48850631 Pulled By: hx235 fbshipit-source-id: 2c321036e164087c36319dd5645cbbf6b6152092 --- db/compaction/compaction_picker_test.cc | 61 ++++++++++ db/compaction/compaction_picker_universal.cc | 110 +++++++++++++++++- .../exclude_some_l0_size_amp.md | 1 + 3 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 unreleased_history/behavior_changes/exclude_some_l0_size_amp.md diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 2684f62e6..64326a95c 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -994,6 +994,61 @@ TEST_F(CompactionPickerTest, UniversalIncrementalSpace5) { ASSERT_EQ(13, compaction->num_input_files(1)); } +TEST_F(CompactionPickerTest, + PartiallyExcludeL0ToReduceWriteStopForSizeAmpCompaction) { + const uint64_t kFileSize = 100000; + const uint64_t kL0FileCount = 30; + const uint64_t kLastLevelFileCount = 1; + const uint64_t kNumLevels = 5; + + for (const uint64_t test_no_exclusion : {false, true}) { + const uint64_t kExpectedNumExcludedL0 = + test_no_exclusion ? 0 : kL0FileCount * 1 / 10; + + mutable_cf_options_.level0_stop_writes_trigger = 36; + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent = 1; + mutable_cf_options_.compaction_options_universal.max_merge_width = + test_no_exclusion + // In universal compaction, sorted runs from non L0 levels are + // counted toward `level0_stop_writes_trigger`. Therefore we need to + // subtract the total number of sorted runs picked originally for + // this compaction (i.e, kL0FileCount + kLastLevelFileCount) from + // `level0_stop_writes_trigger` to calculate `max_merge_width` that + // results in no L0 exclusion for testing purpose. + ? mutable_cf_options_.level0_stop_writes_trigger - + (kL0FileCount + kLastLevelFileCount) + : UINT_MAX; + + UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); + NewVersionStorage(kNumLevels, kCompactionStyleUniversal); + + for (uint64_t i = 1; i <= kL0FileCount + kLastLevelFileCount; ++i) { + Add(i <= kL0FileCount ? 0 : kNumLevels - 1, static_cast(i), + std::to_string((i + 100) * 1000).c_str(), + std::to_string((i + 100) * 1000 + 999).c_str(), kFileSize, 0, i * 100, + i * 100 + 99); + } + + UpdateVersionStorageInfo(); + + ASSERT_TRUE(universal_compaction_picker.NeedsCompaction(vstorage_.get())); + std::unique_ptr compaction( + universal_compaction_picker.PickCompaction( + cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(), + &log_buffer_)); + ASSERT_TRUE(compaction.get() != nullptr); + ASSERT_EQ(compaction->compaction_reason(), + CompactionReason::kUniversalSizeAmplification); + ASSERT_EQ(compaction->num_input_files(0), + kL0FileCount - kExpectedNumExcludedL0); + ASSERT_EQ(compaction->num_input_files(kNumLevels - 1), kLastLevelFileCount); + for (uint64_t level = 1; level <= kNumLevels - 2; level++) { + ASSERT_EQ(compaction->num_input_files(level), 0); + } + } +} + TEST_F(CompactionPickerTest, NeedsCompactionFIFO) { NewVersionStorage(1, kCompactionStyleFIFO); const int kFileCount = @@ -3380,6 +3435,9 @@ TEST_F(CompactionPickerTest, UniversalSizeAmpTierCompactionNonLastLevel) { ioptions_.preclude_last_level_data_seconds = 1000; mutable_cf_options_.compaction_options_universal .max_size_amplification_percent = 200; + // To avoid any L0 file exclusion in size amp compaction intended for reducing + // write stop + mutable_cf_options_.compaction_options_universal.max_merge_width = 2; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(kNumLevels, kCompactionStyleUniversal); @@ -3453,6 +3511,9 @@ TEST_F(CompactionPickerTest, UniversalSizeAmpTierCompactionNotSuport) { ioptions_.preclude_last_level_data_seconds = 1000; mutable_cf_options_.compaction_options_universal .max_size_amplification_percent = 200; + // To avoid any L0 file exclusion in size amp compaction intended for reducing + // write stop + mutable_cf_options_.compaction_options_universal.max_merge_width = 2; UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_); NewVersionStorage(kNumLevels, kCompactionStyleUniversal); diff --git a/db/compaction/compaction_picker_universal.cc b/db/compaction/compaction_picker_universal.cc index 3b2156f91..6d9ff43cd 100644 --- a/db/compaction/compaction_picker_universal.cc +++ b/db/compaction/compaction_picker_universal.cc @@ -9,7 +9,7 @@ #include "db/compaction/compaction_picker_universal.h" -#include +#include #include #include #include @@ -114,7 +114,7 @@ class UniversalCompactionBuilder { // because some files are being compacted. Compaction* PickPeriodicCompaction(); - bool ShouldSkipLastSortedRunForSizeAmpCompaction() { + bool ShouldSkipLastSortedRunForSizeAmpCompaction() const { assert(!sorted_runs_.empty()); return ioptions_.preclude_last_level_data_seconds > 0 && ioptions_.num_levels > 2 && @@ -129,6 +129,100 @@ class UniversalCompactionBuilder { uint64_t GetMaxOverlappingBytes() const; + // To conditionally exclude some of the newest L0 files + // from a size amp compaction. This is to prevent a large number of L0 + // files from being locked by a size amp compaction, potentially leading to + // write stop with a few more flushes. + // + // Such exclusion is based on `num_l0_input_pre_exclusion`, + // `level0_stop_writes_trigger`, `max/min_merge_width` and the pre-exclusion + // compaction score. Noted that it will not make the size amp compaction of + // interest invalid from running as a size amp compaction as long as its + // pre-exclusion compaction score satisfies the condition to run. + // + // @param `num_l0_input_pre_exclusion` Number of L0 input files prior to + // exclusion + // @param `end_index` Index of the last sorted run selected as compaction + // input. Will not be affected by this exclusion. + // @param `start_index` Index of the first input sorted run prior to + // exclusion. Will be modified as output based on the exclusion. + // @param `candidate_size` Total size of all except for the last input sorted + // runs prior to exclusion. Will be modified as output based on the exclusion. + // + // @return Number of L0 files to exclude. `start_index` and + // `candidate_size` will be modified accordingly + std::size_t MightExcludeNewL0sToReduceWriteStop( + std::size_t num_l0_input_pre_exclusion, std::size_t end_index, + std::size_t& start_index, uint64_t& candidate_size) const { + if (num_l0_input_pre_exclusion == 0) { + return 0; + } + + assert(start_index <= end_index && sorted_runs_.size() > end_index); + assert(mutable_cf_options_.level0_stop_writes_trigger > 0); + const std::size_t level0_stop_writes_trigger = static_cast( + mutable_cf_options_.level0_stop_writes_trigger); + const std::size_t max_merge_width = static_cast( + mutable_cf_options_.compaction_options_universal.max_merge_width); + const std::size_t min_merge_width = static_cast( + mutable_cf_options_.compaction_options_universal.min_merge_width); + const uint64_t max_size_amplification_percent = + mutable_cf_options_.compaction_options_universal + .max_size_amplification_percent; + const uint64_t base_sr_size = sorted_runs_[end_index].size; + + // Leave at least 1 L0 file and 2 input sorted runs after exclusion + const std::size_t max_num_l0_to_exclude = + std::min(num_l0_input_pre_exclusion - 1, end_index - start_index - 1); + // In universal compaction, sorted runs from non L0 levels are counted + // toward `level0_stop_writes_trigger`. Therefore we need to subtract the + // total number of sorted runs picked originally for this compaction from + // `level0_stop_writes_trigger` to calculate + // `num_extra_l0_before_write_stop` + const std::size_t num_extra_l0_before_write_stop = + level0_stop_writes_trigger - + std::min(level0_stop_writes_trigger, end_index - start_index + 1); + const std::size_t num_l0_to_exclude_for_max_merge_width = + std::min(max_merge_width - + std::min(max_merge_width, num_extra_l0_before_write_stop), + max_num_l0_to_exclude); + const std::size_t num_l0_to_exclude_for_min_merge_width = + std::min(min_merge_width - + std::min(min_merge_width, num_extra_l0_before_write_stop), + max_num_l0_to_exclude); + + std::size_t num_l0_to_exclude = 0; + uint64_t candidate_size_post_exclusion = candidate_size; + + for (std::size_t possible_num_l0_to_exclude = + num_l0_to_exclude_for_min_merge_width; + possible_num_l0_to_exclude <= num_l0_to_exclude_for_max_merge_width; + ++possible_num_l0_to_exclude) { + uint64_t current_candidate_size = candidate_size_post_exclusion; + for (std::size_t j = num_l0_to_exclude; j < possible_num_l0_to_exclude; + ++j) { + current_candidate_size -= + sorted_runs_.at(start_index + j).compensated_file_size; + } + + // To ensure the compaction score before and after exclusion is similar + // so this exclusion will not make the size amp compaction of + // interest invalid from running as a size amp compaction as long as its + // pre-exclusion compaction score satisfies the condition to run. + if (current_candidate_size * 100 < + max_size_amplification_percent * base_sr_size || + current_candidate_size < candidate_size * 9 / 10) { + break; + } + num_l0_to_exclude = possible_num_l0_to_exclude; + candidate_size_post_exclusion = current_candidate_size; + } + + start_index += num_l0_to_exclude; + candidate_size = candidate_size_post_exclusion; + return num_l0_to_exclude; + } + const ImmutableOptions& ioptions_; const InternalKeyComparator* icmp_; double score_; @@ -800,6 +894,7 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { const uint64_t base_sr_size = sorted_runs_[end_index].size; size_t start_index = end_index; uint64_t candidate_size = 0; + size_t num_l0_files = 0; // Get longest span (i.e, [start_index, end_index]) of available sorted runs while (start_index > 0) { @@ -815,6 +910,7 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { break; } candidate_size += sr->compensated_file_size; + num_l0_files += sr->level == 0 ? 1 : 0; --start_index; } @@ -822,6 +918,16 @@ Compaction* UniversalCompactionBuilder::PickCompactionToReduceSizeAmp() { return nullptr; } + { + const size_t num_l0_to_exclude = MightExcludeNewL0sToReduceWriteStop( + num_l0_files, end_index, start_index, candidate_size); + ROCKS_LOG_BUFFER(log_buffer_, + "[%s] Universal: Excluding %" ROCKSDB_PRIszt + " latest L0 files to reduce potential write stop " + "triggered by `level0_stop_writes_trigger`", + cf_name_.c_str(), num_l0_to_exclude); + } + { char file_num_buf[kFormatFileNumberBufSize]; sorted_runs_[start_index].Dump(file_num_buf, sizeof(file_num_buf), true); diff --git a/unreleased_history/behavior_changes/exclude_some_l0_size_amp.md b/unreleased_history/behavior_changes/exclude_some_l0_size_amp.md new file mode 100644 index 000000000..3c73e6789 --- /dev/null +++ b/unreleased_history/behavior_changes/exclude_some_l0_size_amp.md @@ -0,0 +1 @@ +Universal size amp compaction will conditionally exclude some of the newest L0 files when selecting input with a small negative impact to size amp. This is to prevent a large number of L0 files from being locked by a size amp compaction, potentially leading to write stop with a few more flushes. From 4b79e8c0039389fc731f8bc9bd6109257c21d9d1 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Tue, 12 Sep 2023 16:32:40 -0700 Subject: [PATCH 104/386] GetEntity and PutEntity Support in ldb (#11796) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: - `get_entity` and `put_entity` command support in ldb - Input Format for `put_entity`: `ldb --db= put_entity : : ...` - Output Format for `get_entity`: `: :` - If `get_entity` is called against non-wide column value (existing behavior), empty key (kDefaultWideColumnName) will be printed, appended by `:` - If `get` is called against wide column value (existing behavior), first column value is printed if the first column name is kDefaultWideColumnName. # Test Checks for `put_entity` and `get_entity` added in `ldb_test.py` ``` ❯ python3 tools/ldb_test.py took 45s at 10:45:44 AM Running testBlobBatchPut... .Running testBlobDump .Running testBlobPut... .Running testBlobStartingLevel... .Running testCheckConsistency... .Running testColumnFamilies... .Running testCountDelimDump... .Running testCountDelimIDump... .Running testDumpLiveFiles... .Running testDumpLoad... Warning: 7 bad lines ignored. .Running testGetProperty... .Running testHexPutGet... .Running testIDumpBasics... .Running testIDumpDecodeBlobIndex... .Running testIngestExternalSst... .Running testInvalidCmdLines... .Running testListColumnFamilies... .Running testListLiveFilesMetadata... .Running testManifestDump... .Running testMiscAdminTask... Compacting the db... Sequence,Count,ByteSize,Physical Offset,Key(s) .Running testSSTDump... .Running testSimpleStringPutGet... .Running testStringBatchPut... .Running testTtlPutGet... .Running testWALDump... . ---------------------------------------------------------------------- Ran 25 tests in 57.742s ``` Manual Test ``` # Invalid format for wide columns ❯ ./ldb --db=/tmp/test_db put_entity x4 x5 Failed: wide column format needs to be : (did you mean put ?) # empty column name (kDefaultWideColumnName) ❯ ./ldb --db=/tmp/test_db put_entity x4 :x5 OK ❯ ./ldb --db=/tmp/test_db get_entity x4 :x5 ❯ ./ldb --db=/tmp/test_db get x4 x5 ❯ ./ldb --db=/tmp/test_db put_entity a1 :z1 b1:c1 b2:f1 OK ❯ ./ldb --db=/tmp/test_db get_entity a1 :z1 b1:c1 b2:f1 # Keeping the existing behavior if `get` was called on wide column values ❯ ./ldb --db=/tmp/test_db get a1 z1 # Scan ❯ ./ldb --db=/tmp/test_db scan a1 ==> b1:c1 b2:f1 x4 ==> x5 x5 ==> cn1:cv1 cn2:cv2 # Scan hex ❯ ./ldb --db=/tmp/test_db scan --hex 0x6131 ==> 0x6231:0x6331 0x6232:0x6631 0x7834 ==> 0x7835 0x7835 ==> 0x636E31:0x637631 0x636E32:0x637632 # More testing with hex values ❯ ./ldb --db=/tmp/test_db get_entity 0x6131 --hex 0x6231:0x6331 0x6232:0x6631 ❯ ./ldb --db=/tmp/test_db get_entity 0x78 --hex Failed: GetEntity failed: NotFound: ❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --hex :0x7835 ❯ ./ldb --db=/tmp/test_db put_entity 0x7834 0x6234:0x6635 --hex OK ❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --hex 0x6234:0x6635 ❯ ./ldb --db=/tmp/test_db get_entity 0x7834 --key_hex b4:f5 ❯ ./ldb --db=/tmp/test_db get_entity x4 --value_hex 0x6234:0x6635 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11796 Reviewed By: jowlyzhang Differential Revision: D48978141 Pulled By: jaykorean fbshipit-source-id: 4f87c222417ed90a6dbf39bd7b0f068b01e68393 --- tools/ldb_cmd.cc | 132 ++++++++++++++++++++++++++++++++++++++++++- tools/ldb_cmd_impl.h | 36 ++++++++++++ tools/ldb_test.py | 31 ++++++++-- 3 files changed, 194 insertions(+), 5 deletions(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index a6cfdb43f..490773c14 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -6,7 +6,7 @@ // #include "rocksdb/utilities/ldb_cmd.h" -#include +#include #include #include #include @@ -208,9 +208,15 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) { if (parsed_params.cmd == GetCommand::Name()) { return new GetCommand(parsed_params.cmd_params, parsed_params.option_map, parsed_params.flags); + } else if (parsed_params.cmd == GetEntityCommand::Name()) { + return new GetEntityCommand(parsed_params.cmd_params, + parsed_params.option_map, parsed_params.flags); } else if (parsed_params.cmd == PutCommand::Name()) { return new PutCommand(parsed_params.cmd_params, parsed_params.option_map, parsed_params.flags); + } else if (parsed_params.cmd == PutEntityCommand::Name()) { + return new PutEntityCommand(parsed_params.cmd_params, + parsed_params.option_map, parsed_params.flags); } else if (parsed_params.cmd == BatchPutCommand::Name()) { return new BatchPutCommand(parsed_params.cmd_params, parsed_params.option_map, parsed_params.flags); @@ -2838,6 +2844,55 @@ void GetCommand::DoCommand() { // ---------------------------------------------------------------------------- +GetEntityCommand::GetEntityCommand( + const std::vector& params, + const std::map& options, + const std::vector& flags) + : LDBCommand( + options, flags, true, + BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX})) { + if (params.size() != 1) { + exec_state_ = LDBCommandExecuteResult::Failed( + " must be specified for the get_entity command"); + } else { + key_ = params.at(0); + } + + if (is_key_hex_) { + key_ = HexToString(key_); + } +} + +void GetEntityCommand::Help(std::string& ret) { + ret.append(" "); + ret.append(GetEntityCommand::Name()); + ret.append(" "); + ret.append(" [--" + ARG_TTL + "]"); + ret.append("\n"); +} + +void GetEntityCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } + PinnableWideColumns pinnable_wide_columns; + Status st = db_->GetEntity(ReadOptions(), GetCfHandle(), key_, + &pinnable_wide_columns); + if (st.ok()) { + std::ostringstream oss; + WideColumnsHelper::DumpWideColumns(pinnable_wide_columns.columns(), oss, + is_value_hex_); + fprintf(stdout, "%s\n", oss.str().c_str()); + } else { + std::stringstream oss; + oss << "GetEntity failed: " << st.ToString(); + exec_state_ = LDBCommandExecuteResult::Failed(oss.str()); + } +} + +// ---------------------------------------------------------------------------- + ApproxSizeCommand::ApproxSizeCommand( const std::vector& /*params*/, const std::map& options, @@ -3274,6 +3329,81 @@ void PutCommand::OverrideBaseOptions() { // ---------------------------------------------------------------------------- +PutEntityCommand::PutEntityCommand( + const std::vector& params, + const std::map& options, + const std::vector& flags) + : LDBCommand(options, flags, false, + BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX, + ARG_VALUE_HEX, ARG_CREATE_IF_MISSING})) { + if (params.size() < 2) { + exec_state_ = LDBCommandExecuteResult::Failed( + " and at least one column : must be " + "specified for the put_entity command"); + } else { + auto iter = params.begin(); + key_ = *iter; + if (is_key_hex_) { + key_ = HexToString(key_); + } + for (++iter; iter != params.end(); ++iter) { + auto split = StringSplit(*iter, ':'); + if (split.size() != 2) { + exec_state_ = LDBCommandExecuteResult::Failed( + "wide column format needs to be : (did " + "you mean put ?)"); + return; + } + std::string name(split[0]); + std::string value(split[1]); + if (is_value_hex_) { + name = HexToString(name); + value = HexToString(value); + } + column_names_.push_back(name); + column_values_.push_back(value); + } + } + create_if_missing_ = IsFlagPresent(flags_, ARG_CREATE_IF_MISSING); +} + +void PutEntityCommand::Help(std::string& ret) { + ret.append(" "); + ret.append(PutCommand::Name()); + ret.append( + " : : " + "<...>"); + ret.append(" [--" + ARG_CREATE_IF_MISSING + "]"); + ret.append(" [--" + ARG_TTL + "]"); + ret.append("\n"); +} + +void PutEntityCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } + assert(column_names_.size() == column_values_.size()); + WideColumns columns; + for (size_t i = 0; i < column_names_.size(); i++) { + WideColumn column(column_names_[i], column_values_[i]); + columns.emplace_back(column); + } + Status st = db_->PutEntity(WriteOptions(), GetCfHandle(), key_, columns); + if (st.ok()) { + fprintf(stdout, "OK\n"); + } else { + exec_state_ = LDBCommandExecuteResult::Failed(st.ToString()); + } +} + +void PutEntityCommand::OverrideBaseOptions() { + LDBCommand::OverrideBaseOptions(); + options_.create_if_missing = create_if_missing_; +} + +// ---------------------------------------------------------------------------- + const char* DBQuerierCommand::HELP_CMD = "help"; const char* DBQuerierCommand::GET_CMD = "get"; const char* DBQuerierCommand::PUT_CMD = "put"; diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index 97de981b1..2a396754d 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -403,6 +403,22 @@ class GetCommand : public LDBCommand { std::string key_; }; +class GetEntityCommand : public LDBCommand { + public: + static std::string Name() { return "get_entity"; } + + GetEntityCommand(const std::vector& params, + const std::map& options, + const std::vector& flags); + + void DoCommand() override; + + static void Help(std::string& ret); + + private: + std::string key_; +}; + class ApproxSizeCommand : public LDBCommand { public: static std::string Name() { return "approxsize"; } @@ -530,6 +546,26 @@ class PutCommand : public LDBCommand { std::string value_; }; +class PutEntityCommand : public LDBCommand { + public: + static std::string Name() { return "put_entity"; } + + PutEntityCommand(const std::vector& params, + const std::map& options, + const std::vector& flags); + + void DoCommand() override; + + static void Help(std::string& ret); + + void OverrideBaseOptions() override; + + private: + std::string key_; + std::vector column_names_; + std::vector column_values_; +}; + /** * Command that starts up a REPL shell that allows * get/put/delete. diff --git a/tools/ldb_test.py b/tools/ldb_test.py index ff39aff74..cde041471 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -121,17 +121,40 @@ def testSimpleStringPutGet(self): self.assertRunOK("get x2", "y2") self.assertRunFAIL("get x3") - self.assertRunOK("scan --from=x1 --to=z", "x1 ==> y1\nx2 ==> y2") + self.assertRunFAIL("put_entity x4") + self.assertRunFAIL("put_entity x4 cv1") + self.assertRunOK("put_entity x4 :cv1", "OK") + self.assertRunOK("get_entity x4", ":cv1") + + self.assertRunOK("put_entity x5 cn1:cv1 cn2:cv2", "OK") + self.assertRunOK("get_entity x5", "cn1:cv1 cn2:cv2") + + self.assertRunOK( + "scan --from=x1 --to=z", + "x1 ==> y1\nx2 ==> y2\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2", + ) self.assertRunOK("put x3 y3", "OK") - self.assertRunOK("scan --from=x1 --to=z", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") - self.assertRunOK("scan", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") - self.assertRunOK("scan --from=x", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3") + self.assertRunOK( + "scan --from=x1 --to=z", + "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2", + ) + self.assertRunOK( + "scan", + "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2", + ) + self.assertRunOK( + "scan --from=x", + "x1 ==> y1\nx2 ==> y2\nx3 ==> y3\nx4 ==> cv1\nx5 ==> cn1:cv1 cn2:cv2", + ) self.assertRunOK("scan --to=x2", "x1 ==> y1") self.assertRunOK("scan --from=x1 --to=z --max_keys=1", "x1 ==> y1") self.assertRunOK("scan --from=x1 --to=z --max_keys=2", "x1 ==> y1\nx2 ==> y2") + self.assertRunOK("delete x4", "OK") + self.assertRunOK("delete x5", "OK") + self.assertRunOK( "scan --from=x1 --to=z --max_keys=3", "x1 ==> y1\nx2 ==> y2\nx3 ==> y3" ) From 3db2cf113d7c19661058cd09924c47edd4441f5a Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 13 Sep 2023 09:50:44 -0700 Subject: [PATCH 105/386] Fix copyright header in util/overload.h (#11826) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11826 Reviewed By: jaykorean Differential Revision: D49233043 fbshipit-source-id: cadf6cda3b9720789609e3d3d9404822c6681da2 --- util/overload.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/overload.h b/util/overload.h index 428e805de..27da81648 100644 --- a/util/overload.h +++ b/util/overload.h @@ -1,4 +1,4 @@ -// (c) Meta Platforms, Inc. and affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). From 3285ba7a29166377557acb47f576d6fe0ea69a43 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 13 Sep 2023 15:53:05 -0700 Subject: [PATCH 106/386] Fix unit test tsan failure (#11828) Summary: The test DBCompactionWaitForCompactTest.WaitForCompactWithOptionToFlushAndCloseDB failed tsan in https://app.circleci.com/pipelines/github/facebook/rocksdb/32009/workflows/577e4e1f-a909-4e80-8ef4-af98b5ff7446/jobs/660989. I cannot repro locally, but this should be the fix. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11828 Test Plan: `./db_compaction_test --gtest_filter="*DBCompactionWaitForCompactTest/DBCompactionWaitForCompactTest.*"` Reviewed By: jaykorean Differential Revision: D49241904 Pulled By: cbi42 fbshipit-source-id: 68714c836d982dcb3946da104533d5c0594980de --- db/db_compaction_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index bc7e2f552..d1bd59155 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -3536,12 +3536,12 @@ TEST_P(DBCompactionWaitForCompactTest, // (has_unpersisted_data_ true) Check to make sure there's no extra L0 file // created from WAL. Re-opening DB won't trigger any flush or compaction - int compaction_finished = 0; + std::atomic_int compaction_finished = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::BackgroundCompaction:Finish", [&](void*) { compaction_finished++; }); - int flush_finished = 0; + std::atomic_int flush_finished = 0; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "FlushJob::End", [&](void*) { flush_finished++; }); From 39a4ff2cab9131c3f28b1fffefeae0812fbe70c5 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 13 Sep 2023 16:34:18 -0700 Subject: [PATCH 107/386] Track full_history_ts_low per SuperVersion (#11784) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: As discussed in https://github.com/facebook/rocksdb/issues/11730 , this PR tracks the effective `full_history_ts_low` per SuperVersion and update existing sanity checks for `ReadOptions.timestamp >= full_history_ts_low` to use this per SuperVersion `full_history_ts_low` instead. This also means the check is moved to happen after acquiring SuperVersion. There are two motivations for this: 1) Each time `full_history_ts_low` really come into effect to collapse history, a new SuperVersion is always installed, because it would involve either a Flush or Compaction, both of which change the LSM tree shape. We can take advantage of this to ensure that as long as this sanity check is passed, even if `full_history_ts_low` can be concurrently increased and collapse some history above the requested `ReadOptions.timestamp`, a read request won’t have visibility to that part of history through this SuperVersion that it already acquired. 2) the existing sanity check uses `ColumnFamilyData::GetFullHistoryTsLow` without locking the db mutex, which is the mutex all `IncreaseFullHistoryTsLow` operation is using when mutating this field. So there is a race condition. This also solve the race condition on the read path. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11784 Test Plan: `make all check` // Checks success scenario really provide the read consistency attribute as mentioned above. `./db_with_timestamp_basic_test --gtest_filter=*FullHistoryTsLowSanityCheckPassReadIsConsistent*` // Checks failure scenario cleans up SuperVersion properly. `./db_with_timestamp_basic_test --gtest_filter=*FullHistoryTsLowSanityCheckFail*` `./db_secondary_test --gtest_filter=*FullHistoryTsLowSanityCheckFail*` `./db_readonly_with_timestamp_test --gtest_filter=*FullHistoryTsLowSanitchCheckFail*` Reviewed By: ltamasi Differential Revision: D48894795 Pulled By: jowlyzhang fbshipit-source-id: 1f801fe8e1bc8e63ca76c03cbdbd0974e5ff5bf6 --- db/blob/db_blob_index_test.cc | 10 +- db/column_family.cc | 1 + db/column_family.h | 6 + db/db_impl/compacted_db_impl.cc | 22 +- db/db_impl/db_impl.cc | 199 +++++++++++----- db/db_impl/db_impl.h | 66 ++++-- db/db_impl/db_impl_readonly.cc | 42 +++- db/db_impl/db_impl_secondary.cc | 54 ++++- db/db_impl/db_impl_secondary.h | 2 +- db/db_impl/db_impl_write.cc | 10 +- db/db_iterator_test.cc | 15 +- db/db_readonly_with_timestamp_test.cc | 47 ++++ db/db_secondary_test.cc | 49 ++++ db/db_with_timestamp_basic_test.cc | 219 ++++++++++++++++++ utilities/blob_db/blob_db_impl.cc | 3 +- .../transactions/pessimistic_transaction.cc | 4 +- .../transactions/write_prepared_txn_db.cc | 12 +- .../transactions/write_unprepared_txn_db.cc | 5 +- 18 files changed, 638 insertions(+), 128 deletions(-) diff --git a/db/blob/db_blob_index_test.cc b/db/blob/db_blob_index_test.cc index eabca1358..e29976034 100644 --- a/db/blob/db_blob_index_test.cc +++ b/db/blob/db_blob_index_test.cc @@ -96,9 +96,13 @@ class DBBlobIndexTest : public DBTestBase { } ArenaWrappedDBIter* GetBlobIterator() { - return dbfull()->NewIteratorImpl( - ReadOptions(), cfd(), dbfull()->GetLatestSequenceNumber(), - nullptr /*read_callback*/, true /*expose_blob_index*/); + ColumnFamilyData* column_family = cfd(); + DBImpl* db_impl = dbfull(); + return db_impl->NewIteratorImpl( + ReadOptions(), column_family, + column_family->GetReferencedSuperVersion(db_impl), + db_impl->GetLatestSequenceNumber(), nullptr /*read_callback*/, + true /*expose_blob_index*/); } Options GetTestOptions() { diff --git a/db/column_family.cc b/db/column_family.cc index 7578b7ec7..9c126f3c0 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -476,6 +476,7 @@ void SuperVersion::Init(ColumnFamilyData* new_cfd, MemTable* new_mem, mem = new_mem; imm = new_imm; current = new_current; + full_history_ts_low = cfd->GetFullHistoryTsLow(); cfd->Ref(); mem->Ref(); imm->Ref(); diff --git a/db/column_family.h b/db/column_family.h index 3f3466e8e..65fb3f4f8 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -211,6 +211,12 @@ struct SuperVersion { // Version number of the current SuperVersion uint64_t version_number; WriteStallCondition write_stall_condition; + // Each time `full_history_ts_low` collapses history, a new SuperVersion is + // installed. This field tracks the effective `full_history_ts_low` for that + // SuperVersion, to be used by read APIs for sanity checks. This field is + // immutable once SuperVersion is installed. For column family that doesn't + // enable UDT feature, this is an empty string. + std::string full_history_ts_low; // should be called outside the mutex SuperVersion() = default; diff --git a/db/db_impl/compacted_db_impl.cc b/db/db_impl/compacted_db_impl.cc index b1338ec99..3b665ea26 100644 --- a/db/db_impl/compacted_db_impl.cc +++ b/db/db_impl/compacted_db_impl.cc @@ -59,12 +59,18 @@ Status CompactedDBImpl::Get(const ReadOptions& _read_options, assert(user_comparator_); if (read_options.timestamp) { - const Status s = - FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp), - /*ts_for_read=*/true); + Status s = + FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp)); if (!s.ok()) { return s; } + if (read_options.timestamp->size() > 0) { + s = FailIfReadCollapsedHistory(cfd_, cfd_->GetSuperVersion(), + *(read_options.timestamp)); + if (!s.ok()) { + return s; + } + } } else { const Status s = FailIfCfHasTs(DefaultColumnFamily()); if (!s.ok()) { @@ -133,11 +139,17 @@ std::vector CompactedDBImpl::MultiGet( if (read_options.timestamp) { Status s = - FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp), - /*ts_for_read=*/true); + FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp)); if (!s.ok()) { return std::vector(num_keys, s); } + if (read_options.timestamp->size() > 0) { + s = FailIfReadCollapsedHistory(cfd_, cfd_->GetSuperVersion(), + *(read_options.timestamp)); + if (!s.ok()) { + return std::vector(num_keys, s); + } + } } else { Status s = FailIfCfHasTs(DefaultColumnFamily()); if (!s.ok()) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 228a3da4f..059902c11 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2012,8 +2012,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, if (read_options.timestamp) { const Status s = FailIfTsMismatchCf(get_impl_options.column_family, - *(read_options.timestamp), - /*ts_for_read=*/true); + *(read_options.timestamp)); if (!s.ok()) { return s; } @@ -2060,7 +2059,16 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, // Acquire SuperVersion SuperVersion* sv = GetAndRefSuperVersion(cfd); + if (read_options.timestamp && read_options.timestamp->size() > 0) { + const Status s = + FailIfReadCollapsedHistory(cfd, sv, *(read_options.timestamp)); + if (!s.ok()) { + ReturnAndCleanupSuperVersion(cfd, sv); + return s; + } + } + TEST_SYNC_POINT_CALLBACK("DBImpl::GetImpl:AfterAcquireSv", nullptr); TEST_SYNC_POINT("DBImpl::GetImpl:1"); TEST_SYNC_POINT("DBImpl::GetImpl:2"); @@ -2336,8 +2344,7 @@ std::vector DBImpl::MultiGet( assert(column_family[i]); if (read_options.timestamp) { stat_list[i] = - FailIfTsMismatchCf(column_family[i], *(read_options.timestamp), - /*ts_for_read=*/true); + FailIfTsMismatchCf(column_family[i], *(read_options.timestamp)); if (!stat_list[i].ok()) { should_fail = true; } @@ -2369,8 +2376,6 @@ std::vector DBImpl::MultiGet( } } - SequenceNumber consistent_seqnum; - UnorderedMap multiget_cf_data( column_family.size()); for (auto cf : column_family) { @@ -2388,10 +2393,21 @@ std::vector DBImpl::MultiGet( [](UnorderedMap::iterator& cf_iter) { return &cf_iter->second; }; - bool unref_only = + SequenceNumber consistent_seqnum; + bool unref_only; + Status status = MultiCFSnapshot>( read_options, nullptr, iter_deref_lambda, &multiget_cf_data, - &consistent_seqnum); + &consistent_seqnum, &unref_only); + + if (!status.ok()) { + for (auto& s : stat_list) { + if (s.ok()) { + s = status; + } + } + return stat_list; + } TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum1"); TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum2"); @@ -2522,21 +2538,49 @@ std::vector DBImpl::MultiGet( } template -bool DBImpl::MultiCFSnapshot( +Status DBImpl::MultiCFSnapshot( const ReadOptions& read_options, ReadCallback* callback, std::function& iter_deref_func, - T* cf_list, SequenceNumber* snapshot) { + T* cf_list, SequenceNumber* snapshot, bool* unref_only) { PERF_TIMER_GUARD(get_snapshot_time); + assert(unref_only); + *unref_only = false; + Status s = Status::OK(); + const bool check_read_ts = + read_options.timestamp && read_options.timestamp->size() > 0; + // unref_only set to true means the SuperVersion to be cleaned up is acquired + // directly via ColumnFamilyData instead of thread local. + const auto sv_cleanup_func = [&]() -> void { + for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end(); + ++cf_iter) { + auto node = iter_deref_func(cf_iter); + SuperVersion* super_version = node->super_version; + ColumnFamilyData* cfd = node->cfd; + if (super_version != nullptr) { + if (*unref_only) { + super_version->Unref(); + } else { + ReturnAndCleanupSuperVersion(cfd, super_version); + } + } + node->super_version = nullptr; + } + }; + bool last_try = false; if (cf_list->size() == 1) { - // Fast path for a single column family. We can simply get the thread loca + // Fast path for a single column family. We can simply get the thread local // super version auto cf_iter = cf_list->begin(); auto node = iter_deref_func(cf_iter); node->super_version = GetAndRefSuperVersion(node->cfd); - if (read_options.snapshot != nullptr) { + if (check_read_ts) { + s = FailIfReadCollapsedHistory(node->cfd, node->super_version, + *(read_options.timestamp)); + } + if (s.ok() && read_options.snapshot != nullptr) { // Note: In WritePrepared txns this is not necessary but not harmful // either. Because prep_seq > snapshot => commit_seq > snapshot so if // a snapshot is specified we should be fine with skipping seq numbers @@ -2550,7 +2594,7 @@ bool DBImpl::MultiCFSnapshot( if (callback) { *snapshot = std::max(*snapshot, callback->max_visible_seq()); } - } else { + } else if (s.ok()) { // Since we get and reference the super version before getting // the snapshot number, without a mutex protection, it is possible // that a memtable switch happened in the middle and not all the @@ -2564,26 +2608,17 @@ bool DBImpl::MultiCFSnapshot( *snapshot = GetLastPublishedSequence(); } } else { - // If we end up with the same issue of memtable geting sealed during 2 + // If we end up with the same issue of memtable getting sealed during 2 // consecutive retries, it means the write rate is very high. In that case - // its probably ok to take the mutex on the 3rd try so we can succeed for - // sure + // it's probably ok to take the mutex on the 3rd try so we can succeed for + // sure. constexpr int num_retries = 3; for (int i = 0; i < num_retries; ++i) { last_try = (i == num_retries - 1); bool retry = false; if (i > 0) { - for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end(); - ++cf_iter) { - auto node = iter_deref_func(cf_iter); - SuperVersion* super_version = node->super_version; - ColumnFamilyData* cfd = node->cfd; - if (super_version != nullptr) { - ReturnAndCleanupSuperVersion(cfd, super_version); - } - node->super_version = nullptr; - } + sv_cleanup_func(); } if (read_options.snapshot == nullptr) { if (last_try) { @@ -2607,6 +2642,19 @@ bool DBImpl::MultiCFSnapshot( node->super_version = node->cfd->GetSuperVersion()->Ref(); } TEST_SYNC_POINT("DBImpl::MultiGet::AfterRefSV"); + if (check_read_ts) { + s = FailIfReadCollapsedHistory(node->cfd, node->super_version, + *(read_options.timestamp)); + if (!s.ok()) { + // If read timestamp check failed, a.k.a ReadOptions.timestamp < + // super_version.full_history_ts_low. There is no need to continue + // because this check will keep failing for the same and newer + // SuperVersions, instead we fail fast and ask user to provide + // a higher read timestamp. + retry = false; + break; + } + } if (read_options.snapshot != nullptr || last_try) { // If user passed a snapshot, then we don't care if a memtable is // sealed or compaction happens because the snapshot would ensure @@ -2638,8 +2686,11 @@ bool DBImpl::MultiCFSnapshot( // Keep track of bytes that we read for statistics-recording later PERF_TIMER_STOP(get_snapshot_time); - - return last_try; + *unref_only = last_try; + if (!s.ok()) { + sv_cleanup_func(); + } + return s; } void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, @@ -2689,8 +2740,7 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, ColumnFamilyHandle* cfh = column_families[i]; assert(cfh); if (read_options.timestamp) { - statuses[i] = FailIfTsMismatchCf(cfh, *(read_options.timestamp), - /*ts_for_read=*/true); + statuses[i] = FailIfTsMismatchCf(cfh, *(read_options.timestamp)); if (!statuses[i].ok()) { should_fail = true; } @@ -2773,10 +2823,20 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, }; SequenceNumber consistent_seqnum; - bool unref_only = MultiCFSnapshot< + bool unref_only; + Status s = MultiCFSnapshot< autovector>( read_options, nullptr, iter_deref_lambda, &multiget_cf_data, - &consistent_seqnum); + &consistent_seqnum, &unref_only); + + if (!s.ok()) { + for (size_t i = 0; i < num_keys; ++i) { + if (statuses[i].ok()) { + statuses[i] = s; + } + } + return; + } GetWithTimestampReadCallback timestamp_read_callback(0); ReadCallback* read_callback = nullptr; @@ -2785,7 +2845,6 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, read_callback = ×tamp_read_callback; } - Status s; auto cf_iter = multiget_cf_data.begin(); for (; cf_iter != multiget_cf_data.end(); ++cf_iter) { s = MultiGetImpl(read_options, cf_iter->start, cf_iter->num_keys, @@ -2961,9 +3020,13 @@ void DBImpl::MultiGetWithCallback( size_t num_keys = sorted_keys->size(); SequenceNumber consistent_seqnum; - bool unref_only = MultiCFSnapshot>( + bool unref_only; + Status s = MultiCFSnapshot>( read_options, callback, iter_deref_lambda, &multiget_cf_data, - &consistent_seqnum); + &consistent_seqnum, &unref_only); + if (!s.ok()) { + return; + } #ifndef NDEBUG assert(!unref_only); #else @@ -2998,9 +3061,9 @@ void DBImpl::MultiGetWithCallback( read_callback = ×tamp_read_callback; } - Status s = MultiGetImpl(read_options, 0, num_keys, sorted_keys, - multiget_cf_data[0].super_version, consistent_seqnum, - read_callback); + s = MultiGetImpl(read_options, 0, num_keys, sorted_keys, + multiget_cf_data[0].super_version, consistent_seqnum, + read_callback); assert(s.ok() || s.IsTimedOut() || s.IsAborted()); ReturnAndCleanupSuperVersion(multiget_cf_data[0].cfd, multiget_cf_data[0].super_version); @@ -3470,8 +3533,7 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options, if (read_options.timestamp) { const Status s = - FailIfTsMismatchCf(column_family, *(read_options.timestamp), - /*ts_for_read=*/true); + FailIfTsMismatchCf(column_family, *(read_options.timestamp)); if (!s.ok()) { return NewErrorIterator(s); } @@ -3486,8 +3548,16 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options, ColumnFamilyData* cfd = cfh->cfd(); assert(cfd != nullptr); ReadCallback* read_callback = nullptr; // No read callback provided. + SuperVersion* sv = cfd->GetReferencedSuperVersion(this); + if (read_options.timestamp && read_options.timestamp->size() > 0) { + const Status s = + FailIfReadCollapsedHistory(cfd, sv, *(read_options.timestamp)); + if (!s.ok()) { + CleanupSuperVersion(sv); + return NewErrorIterator(s); + } + } if (read_options.tailing) { - SuperVersion* sv = cfd->GetReferencedSuperVersion(this); auto iter = new ForwardIterator(this, read_options, cfd, sv, /* allow_unprepared_value */ true); result = NewDBIterator( @@ -3499,7 +3569,7 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options, // Note: no need to consider the special case of // last_seq_same_as_publish_seq_==false since NewIterator is overridden in // WritePreparedTxnDB - result = NewIteratorImpl(read_options, cfd, + result = NewIteratorImpl(read_options, cfd, sv, (read_options.snapshot != nullptr) ? read_options.snapshot->GetSequenceNumber() : kMaxSequenceNumber, @@ -3508,14 +3578,10 @@ Iterator* DBImpl::NewIterator(const ReadOptions& _read_options, return result; } -ArenaWrappedDBIter* DBImpl::NewIteratorImpl(const ReadOptions& read_options, - ColumnFamilyData* cfd, - SequenceNumber snapshot, - ReadCallback* read_callback, - bool expose_blob_index, - bool allow_refresh) { - SuperVersion* sv = cfd->GetReferencedSuperVersion(this); - +ArenaWrappedDBIter* DBImpl::NewIteratorImpl( + const ReadOptions& read_options, ColumnFamilyData* cfd, SuperVersion* sv, + SequenceNumber snapshot, ReadCallback* read_callback, + bool expose_blob_index, bool allow_refresh) { TEST_SYNC_POINT("DBImpl::NewIterator:1"); TEST_SYNC_POINT("DBImpl::NewIterator:2"); @@ -3615,8 +3681,7 @@ Status DBImpl::NewIterators( if (read_options.timestamp) { for (auto* cf : column_families) { assert(cf); - const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp), - /*ts_for_read=*/true); + const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp)); if (!s.ok()) { return s; } @@ -3634,10 +3699,27 @@ Status DBImpl::NewIterators( ReadCallback* read_callback = nullptr; // No read callback provided. iterators->clear(); iterators->reserve(column_families.size()); + autovector> cfd_to_sv; + const bool check_read_ts = + read_options.timestamp && read_options.timestamp->size() > 0; + for (auto cfh : column_families) { + auto cfd = static_cast_with_check(cfh)->cfd(); + SuperVersion* sv = cfd->GetReferencedSuperVersion(this); + cfd_to_sv.emplace_back(cfd, sv); + if (check_read_ts) { + const Status s = + FailIfReadCollapsedHistory(cfd, sv, *(read_options.timestamp)); + if (!s.ok()) { + for (auto prev_entry : cfd_to_sv) { + CleanupSuperVersion(std::get<1>(prev_entry)); + } + return s; + } + } + } + assert(cfd_to_sv.size() == column_families.size()); if (read_options.tailing) { - for (auto cfh : column_families) { - auto cfd = static_cast_with_check(cfh)->cfd(); - SuperVersion* sv = cfd->GetReferencedSuperVersion(this); + for (auto [cfd, sv] : cfd_to_sv) { auto iter = new ForwardIterator(this, read_options, cfd, sv, /* allow_unprepared_value */ true); iterators->push_back(NewDBIterator( @@ -3653,12 +3735,9 @@ Status DBImpl::NewIterators( auto snapshot = read_options.snapshot != nullptr ? read_options.snapshot->GetSequenceNumber() : versions_->LastSequence(); - for (size_t i = 0; i < column_families.size(); ++i) { - auto* cfd = - static_cast_with_check(column_families[i]) - ->cfd(); + for (auto [cfd, sv] : cfd_to_sv) { iterators->push_back( - NewIteratorImpl(read_options, cfd, snapshot, read_callback)); + NewIteratorImpl(read_options, cfd, sv, snapshot, read_callback)); } } diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 50aec5804..e33b9a0d7 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -648,7 +648,7 @@ class DBImpl : public DB { // If `snapshot` == kMaxSequenceNumber, set a recent one inside the file. ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& options, - ColumnFamilyData* cfd, + ColumnFamilyData* cfd, SuperVersion* sv, SequenceNumber snapshot, ReadCallback* read_callback, bool expose_blob_index = false, @@ -1543,8 +1543,18 @@ class DBImpl : public DB { void SetDbSessionId(); Status FailIfCfHasTs(const ColumnFamilyHandle* column_family) const; - Status FailIfTsMismatchCf(ColumnFamilyHandle* column_family, const Slice& ts, - bool ts_for_read) const; + Status FailIfTsMismatchCf(ColumnFamilyHandle* column_family, + const Slice& ts) const; + + // Check that the read timestamp `ts` is at or above the `full_history_ts_low` + // timestamp in a `SuperVersion`. It's necessary to do this check after + // grabbing the SuperVersion. If the check passed, the referenced SuperVersion + // this read holds on to can ensure the read won't be affected if + // `full_history_ts_low` is increased concurrently, and this achieves that + // without explicitly locking by piggybacking the SuperVersion. + Status FailIfReadCollapsedHistory(const ColumnFamilyData* cfd, + const SuperVersion* sv, + const Slice& ts) const; // recovery_ctx stores the context about version edits and // LogAndApplyForRecovery persist all those edits to new Manifest after @@ -2312,15 +2322,18 @@ class DBImpl : public DB { // If callback is non-null, the callback is refreshed with the snapshot // sequence number // - // A return value of true indicates that the SuperVersions were obtained - // from the ColumnFamilyData, whereas false indicates they are thread - // local + // `unref_only` being set to true indicates that the SuperVersions were + // obtained from the ColumnFamilyData, whereas false indicates they are thread + // local. + // A non-OK status will be returned if for a column family that enables + // user-defined timestamp feature, the specified `ReadOptions.timestamp` + // attemps to read collapsed history. template - bool MultiCFSnapshot( + Status MultiCFSnapshot( const ReadOptions& read_options, ReadCallback* callback, std::function& iter_deref_func, - T* cf_list, SequenceNumber* snapshot); + T* cf_list, SequenceNumber* snapshot, bool* unref_only); // The actual implementation of the batching MultiGet. The caller is expected // to have acquired the SuperVersion and pass in a snapshot sequence number @@ -2829,8 +2842,7 @@ inline Status DBImpl::FailIfCfHasTs( } inline Status DBImpl::FailIfTsMismatchCf(ColumnFamilyHandle* column_family, - const Slice& ts, - bool ts_for_read) const { + const Slice& ts) const { if (!column_family) { return Status::InvalidArgument("column family handle cannot be null"); } @@ -2850,20 +2862,28 @@ inline Status DBImpl::FailIfTsMismatchCf(ColumnFamilyHandle* column_family, << ts_sz << " given"; return Status::InvalidArgument(oss.str()); } - if (ts_for_read) { - auto cfh = static_cast_with_check(column_family); - auto cfd = cfh->cfd(); - std::string current_ts_low = cfd->GetFullHistoryTsLow(); - if (!current_ts_low.empty() && - ucmp->CompareTimestamp(ts, current_ts_low) < 0) { - std::stringstream oss; - oss << "Read timestamp: " << ts.ToString(true) - << " is smaller than full_history_ts_low: " - << Slice(current_ts_low).ToString(true) << std::endl; - return Status::InvalidArgument(oss.str()); - } - } return Status::OK(); } +inline Status DBImpl::FailIfReadCollapsedHistory(const ColumnFamilyData* cfd, + const SuperVersion* sv, + const Slice& ts) const { + // Reaching to this point means the timestamp size matching sanity check in + // `DBImpl::FailIfTsMismatchCf` already passed. So we skip that and assume + // column family has the same user-defined timestamp format as `ts`. + const Comparator* const ucmp = cfd->user_comparator(); + assert(ucmp); + const std::string& full_history_ts_low = sv->full_history_ts_low; + assert(full_history_ts_low.empty() || + full_history_ts_low.size() == ts.size()); + if (!full_history_ts_low.empty() && + ucmp->CompareTimestamp(ts, full_history_ts_low) < 0) { + std::stringstream oss; + oss << "Read timestamp: " << ts.ToString(true) + << " is smaller than full_history_ts_low: " + << Slice(full_history_ts_low).ToString(true) << std::endl; + return Status::InvalidArgument(oss.str()); + } + return Status::OK(); +} } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 783d55082..69a8de398 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -58,8 +58,7 @@ Status DBImplReadOnly::Get(const ReadOptions& _read_options, assert(column_family); if (read_options.timestamp) { const Status s = - FailIfTsMismatchCf(column_family, *(read_options.timestamp), - /*ts_for_read=*/true); + FailIfTsMismatchCf(column_family, *(read_options.timestamp)); if (!s.ok()) { return s; } @@ -92,6 +91,13 @@ Status DBImplReadOnly::Get(const ReadOptions& _read_options, } } SuperVersion* super_version = cfd->GetSuperVersion(); + if (read_options.timestamp && read_options.timestamp->size() > 0) { + s = FailIfReadCollapsedHistory(cfd, super_version, + *(read_options.timestamp)); + if (!s.ok()) { + return s; + } + } MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; LookupKey lkey(key, snapshot, read_options.timestamp); @@ -137,8 +143,7 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& _read_options, assert(column_family); if (read_options.timestamp) { const Status s = - FailIfTsMismatchCf(column_family, *(read_options.timestamp), - /*ts_for_read=*/true); + FailIfTsMismatchCf(column_family, *(read_options.timestamp)); if (!s.ok()) { return NewErrorIterator(s); } @@ -151,6 +156,14 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& _read_options, auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); SuperVersion* super_version = cfd->GetSuperVersion()->Ref(); + if (read_options.timestamp && read_options.timestamp->size() > 0) { + const Status s = FailIfReadCollapsedHistory(cfd, super_version, + *(read_options.timestamp)); + if (!s.ok()) { + cfd->GetSuperVersion()->Unref(); + return NewErrorIterator(s); + } + } SequenceNumber latest_snapshot = versions_->LastSequence(); SequenceNumber read_seq = read_options.snapshot != nullptr @@ -177,8 +190,7 @@ Status DBImplReadOnly::NewIterators( if (read_options.timestamp) { for (auto* cf : column_families) { assert(cf); - const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp), - /*ts_for_read=*/true); + const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp)); if (!s.ok()) { return s; } @@ -206,9 +218,27 @@ Status DBImplReadOnly::NewIterators( ->number_ : latest_snapshot; + autovector> cfd_to_sv; + + const bool check_read_ts = + read_options.timestamp && read_options.timestamp->size() > 0; for (auto cfh : column_families) { auto* cfd = static_cast_with_check(cfh)->cfd(); auto* sv = cfd->GetSuperVersion()->Ref(); + cfd_to_sv.emplace_back(cfd, sv); + if (check_read_ts) { + const Status s = + FailIfReadCollapsedHistory(cfd, sv, *(read_options.timestamp)); + if (!s.ok()) { + for (auto prev_entry : cfd_to_sv) { + std::get<1>(prev_entry)->Unref(); + } + return s; + } + } + } + assert(cfd_to_sv.size() == column_families.size()); + for (auto [cfd, sv] : cfd_to_sv) { auto* db_iter = NewArenaWrappedDbIterator( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, sv->current, read_seq, diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index ad8783042..3e5997b0d 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -384,8 +384,8 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, assert(column_family); if (read_options.timestamp) { - const Status s = FailIfTsMismatchCf( - column_family, *(read_options.timestamp), /*ts_for_read=*/true); + const Status s = + FailIfTsMismatchCf(column_family, *(read_options.timestamp)); if (!s.ok()) { return s; } @@ -412,6 +412,14 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, } // Acquire SuperVersion SuperVersion* super_version = GetAndRefSuperVersion(cfd); + if (read_options.timestamp && read_options.timestamp->size() > 0) { + const Status s = FailIfReadCollapsedHistory(cfd, super_version, + *(read_options.timestamp)); + if (!s.ok()) { + ReturnAndCleanupSuperVersion(cfd, super_version); + return s; + } + } SequenceNumber snapshot = versions_->LastSequence(); GetWithTimestampReadCallback read_cb(snapshot); MergeContext merge_context; @@ -491,8 +499,7 @@ Iterator* DBImplSecondary::NewIterator(const ReadOptions& _read_options, assert(column_family); if (read_options.timestamp) { const Status s = - FailIfTsMismatchCf(column_family, *(read_options.timestamp), - /*ts_for_read=*/true); + FailIfTsMismatchCf(column_family, *(read_options.timestamp)); if (!s.ok()) { return NewErrorIterator(s); } @@ -516,17 +523,25 @@ Iterator* DBImplSecondary::NewIterator(const ReadOptions& _read_options, Status::NotSupported("snapshot not supported in secondary mode")); } else { SequenceNumber snapshot(kMaxSequenceNumber); - result = NewIteratorImpl(read_options, cfd, snapshot, read_callback); + SuperVersion* sv = cfd->GetReferencedSuperVersion(this); + if (read_options.timestamp && read_options.timestamp->size() > 0) { + const Status s = + FailIfReadCollapsedHistory(cfd, sv, *(read_options.timestamp)); + if (!s.ok()) { + CleanupSuperVersion(sv); + return NewErrorIterator(s); + } + } + result = NewIteratorImpl(read_options, cfd, sv, snapshot, read_callback); } return result; } ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl( const ReadOptions& read_options, ColumnFamilyData* cfd, - SequenceNumber snapshot, ReadCallback* read_callback, - bool expose_blob_index, bool allow_refresh) { + SuperVersion* super_version, SequenceNumber snapshot, + ReadCallback* read_callback, bool expose_blob_index, bool allow_refresh) { assert(nullptr != cfd); - SuperVersion* super_version = cfd->GetReferencedSuperVersion(this); assert(snapshot == kMaxSequenceNumber); snapshot = versions_->LastSequence(); assert(snapshot != kMaxSequenceNumber); @@ -572,8 +587,7 @@ Status DBImplSecondary::NewIterators( if (read_options.timestamp) { for (auto* cf : column_families) { assert(cf); - const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp), - /*ts_for_read=*/true); + const Status s = FailIfTsMismatchCf(cf, *(read_options.timestamp)); if (!s.ok()) { return s; } @@ -597,10 +611,28 @@ Status DBImplSecondary::NewIterators( return Status::NotSupported("snapshot not supported in secondary mode"); } else { SequenceNumber read_seq(kMaxSequenceNumber); + autovector> cfd_to_sv; + const bool check_read_ts = + read_options.timestamp && read_options.timestamp->size() > 0; for (auto cfh : column_families) { ColumnFamilyData* cfd = static_cast(cfh)->cfd(); + SuperVersion* sv = cfd->GetReferencedSuperVersion(this); + cfd_to_sv.emplace_back(cfd, sv); + if (check_read_ts) { + const Status s = + FailIfReadCollapsedHistory(cfd, sv, *(read_options.timestamp)); + if (!s.ok()) { + for (auto prev_entry : cfd_to_sv) { + CleanupSuperVersion(std::get<1>(prev_entry)); + } + return s; + } + } + } + assert(cfd_to_sv.size() == column_families.size()); + for (auto [cfd, sv] : cfd_to_sv) { iterators->push_back( - NewIteratorImpl(read_options, cfd, read_seq, read_callback)); + NewIteratorImpl(read_options, cfd, sv, read_seq, read_callback)); } } return Status::OK(); diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index 50edc4ec9..a6ff4a66d 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -122,7 +122,7 @@ class DBImplSecondary : public DBImpl { ColumnFamilyHandle* column_family) override; ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& read_options, - ColumnFamilyData* cfd, + ColumnFamilyData* cfd, SuperVersion* sv, SequenceNumber snapshot, ReadCallback* read_callback, bool expose_blob_index = false, diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 8a1a6ce31..bc260c5a8 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -30,7 +30,7 @@ Status DBImpl::Put(const WriteOptions& o, ColumnFamilyHandle* column_family, Status DBImpl::Put(const WriteOptions& o, ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& val) { - const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false); + const Status s = FailIfTsMismatchCf(column_family, ts); if (!s.ok()) { return s; } @@ -64,7 +64,7 @@ Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family, Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& val) { - const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false); + const Status s = FailIfTsMismatchCf(column_family, ts); if (!s.ok()) { return s; } @@ -83,7 +83,7 @@ Status DBImpl::Delete(const WriteOptions& write_options, Status DBImpl::Delete(const WriteOptions& write_options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) { - const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false); + const Status s = FailIfTsMismatchCf(column_family, ts); if (!s.ok()) { return s; } @@ -103,7 +103,7 @@ Status DBImpl::SingleDelete(const WriteOptions& write_options, Status DBImpl::SingleDelete(const WriteOptions& write_options, ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts) { - const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false); + const Status s = FailIfTsMismatchCf(column_family, ts); if (!s.ok()) { return s; } @@ -124,7 +124,7 @@ Status DBImpl::DeleteRange(const WriteOptions& write_options, ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key, const Slice& ts) { - const Status s = FailIfTsMismatchCf(column_family, ts, /*ts_for_read=*/false); + const Status s = FailIfTsMismatchCf(column_family, ts); if (!s.ok()) { return s; } diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index 7cafadf0e..c982fcff1 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -107,7 +107,10 @@ class DBIteratorTest : public DBIteratorBaseTest, read_callbacks_.push_back( std::unique_ptr(read_callback)); } - return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback); + DBImpl* db_impl = dbfull(); + SuperVersion* super_version = cfd->GetReferencedSuperVersion(db_impl); + return db_impl->NewIteratorImpl(read_options, cfd, super_version, seq, + read_callback); } private: @@ -3130,8 +3133,10 @@ TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) { static_cast_with_check(db_->DefaultColumnFamily()) ->cfd(); // The iterator are suppose to see data before seq1. - Iterator* iter = - dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1); + DBImpl* db_impl = dbfull(); + SuperVersion* super_version = cfd->GetReferencedSuperVersion(db_impl); + Iterator* iter = db_impl->NewIteratorImpl(ReadOptions(), cfd, super_version, + seq2, &callback1); // Seek // The latest value of "foo" before seq1 is "v3" @@ -3209,7 +3214,9 @@ TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) { SequenceNumber seq4 = db_->GetLatestSequenceNumber(); // The iterator is suppose to see data before seq3. - iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2); + super_version = cfd->GetReferencedSuperVersion(db_impl); + iter = db_impl->NewIteratorImpl(ReadOptions(), cfd, super_version, seq4, + &callback2); // Seek to "z", which is visible. iter->Seek("z"); ASSERT_TRUE(iter->Valid()); diff --git a/db/db_readonly_with_timestamp_test.cc b/db/db_readonly_with_timestamp_test.cc index 675e4943b..8e8a5d278 100644 --- a/db/db_readonly_with_timestamp_test.cc +++ b/db/db_readonly_with_timestamp_test.cc @@ -336,6 +336,53 @@ TEST_F(DBReadOnlyTestWithTimestamp, Iterators) { Close(); } +TEST_F(DBReadOnlyTestWithTimestamp, FullHistoryTsLowSanityCheckFail) { + Options options = CurrentOptions(); + options.env = env_; + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + // Use UDT in memtable only feature for this test, so we can control that + // newly set `full_history_ts_low` collapse history when Flush happens. + options.persist_user_defined_timestamps = false; + options.allow_concurrent_memtable_write = false; + DestroyAndReopen(options); + + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(db_->Put(WriteOptions(), "foo", write_ts, "val1")); + + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 3); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), + full_history_ts_low)); + ASSERT_OK(Flush(0)); + + // Reopen the database in read only mode to test its timestamp support. + Close(); + ASSERT_OK(ReadOnlyReopen(options)); + + // Reading below full_history_ts_low fails a sanity check. + std::string read_ts; + PutFixed64(&read_ts, 2); + Slice read_ts_slice = read_ts; + ReadOptions read_opts; + read_opts.timestamp = &read_ts_slice; + + // Get() + std::string value; + ASSERT_TRUE(db_->Get(read_opts, "foo", &value).IsInvalidArgument()); + // NewIterator() + std::unique_ptr iter( + db_->NewIterator(read_opts, db_->DefaultColumnFamily())); + ASSERT_TRUE(iter->status().IsInvalidArgument()); + + // NewIterators() + std::vector cfhs = {db_->DefaultColumnFamily()}; + std::vector iterators; + ASSERT_TRUE( + db_->NewIterators(read_opts, cfhs, &iterators).IsInvalidArgument()); + Close(); +} + TEST_F(DBReadOnlyTestWithTimestamp, IteratorsReadTimestampSizeMismatch) { const int kNumKeysPerFile = 128; const uint64_t kMaxKey = 1024; diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc index f3f0a8d05..19cbdfb95 100644 --- a/db/db_secondary_test.cc +++ b/db/db_secondary_test.cc @@ -1561,6 +1561,55 @@ TEST_F(DBSecondaryTestWithTimestamp, IteratorsReadTimestampSizeMismatch) { Close(); } +TEST_F(DBSecondaryTestWithTimestamp, FullHistoryTsLowSanityCheckFail) { + Options options = CurrentOptions(); + options.env = env_; + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + // Use UDT in memtable only feature for this test, so we can control that + // newly set `full_history_ts_low` collapse history when Flush happens. + options.persist_user_defined_timestamps = false; + options.allow_concurrent_memtable_write = false; + DestroyAndReopen(options); + + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(db_->Put(WriteOptions(), "foo", write_ts, "val1")); + + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 3); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), + full_history_ts_low)); + ASSERT_OK(Flush(0)); + + // Reopen the database as secondary instance to test its timestamp support. + Close(); + options.max_open_files = -1; + ASSERT_OK(ReopenAsSecondary(options)); + + // Reading below full_history_ts_low fails a sanity check. + std::string read_ts; + PutFixed64(&read_ts, 2); + Slice read_ts_slice = read_ts; + ReadOptions read_opts; + read_opts.timestamp = &read_ts_slice; + + // Get() + std::string value; + ASSERT_TRUE(db_->Get(read_opts, "foo", &value).IsInvalidArgument()); + + // NewIterator() + std::unique_ptr iter( + db_->NewIterator(read_opts, db_->DefaultColumnFamily())); + ASSERT_TRUE(iter->status().IsInvalidArgument()); + + // NewIterators() + std::vector cfhs = {db_->DefaultColumnFamily()}; + std::vector iterators; + ASSERT_TRUE( + db_->NewIterators(read_opts, cfhs, &iterators).IsInvalidArgument()); + Close(); +} + TEST_F(DBSecondaryTestWithTimestamp, IteratorsReadTimestampSpecifiedWithoutWriteTimestamp) { const int kNumKeysPerFile = 128; diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 2572bb892..8d632d810 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -3416,6 +3416,225 @@ TEST_F(DBBasicTestWithTimestamp, EnableDisableUDT) { Close(); } +// Tests that as long as the +// `ReadOptions.timestamp >= SuperVersion.full_history_ts_low` sanity check +// passes. The read will be consistent even if the column family's +// full_history_ts_low is concurrently increased and collapsed some history +// above `ReadOptions.timestamp`. +TEST_F(DBBasicTestWithTimestamp, + FullHistoryTsLowSanityCheckPassReadIsConsistent) { + Options options = CurrentOptions(); + options.env = env_; + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + // Use UDT in memtable only feature for this test, so we can control that + // newly set `full_history_ts_low` collapse history when Flush happens. + options.persist_user_defined_timestamps = false; + options.allow_concurrent_memtable_write = false; + DestroyAndReopen(options); + std::string min_ts; + PutFixed64(&min_ts, 0); + + // Write two versions of the key (1, v1), (3, v3), and always read with + // timestamp 2. + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(db_->Put(WriteOptions(), "foo", write_ts, "val1")); + + std::string read_ts; + PutFixed64(&read_ts, 2); + Slice read_ts_slice = read_ts; + ReadOptions read_opts; + read_opts.timestamp = &read_ts_slice; + + // First read, no full_history_ts_low set, sanity check pass. + std::string value; + std::string timestamp; + ASSERT_OK(db_->Get(read_opts, "foo", &value, ×tamp)); + ASSERT_EQ("val1", value); + ASSERT_EQ(write_ts, timestamp); + + std::string full_history_ts_low; + std::string marked_ts_low; + PutFixed64(&full_history_ts_low, 2); + marked_ts_low = full_history_ts_low; + ASSERT_OK(db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), + full_history_ts_low)); + ASSERT_OK(Flush(0)); + + // Write the (3, v3) entry after flush, otherwise with UDT in memtable only + // the previous Flush(0) with full_history_ts_low = 2 will be postponed + // waiting for (3, v3) to expire too. + write_ts.clear(); + PutFixed64(&write_ts, 3); + ASSERT_OK(db_->Put(WriteOptions(), "foo", write_ts, "val3")); + + // Second read: + // ReadOptions.timestamp(2) >= SuperVersion.full_history_ts_low(2), + // and ReadOptions.timestamp(2) >= ColumnFamilyData.full_history_ts_low(2). + // history below 2 is collapsed. Reading at 2 or above 2 is ok. + // Sanity check pass. Read return consistent value, but timestamp is already + // collapsed. + ASSERT_OK(db_->Get(read_opts, "foo", &value, ×tamp)); + ASSERT_EQ("val1", value); + ASSERT_EQ(min_ts, timestamp); + + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::GetImpl:AfterAcquireSv", [&](void* /*arg*/) { + // Concurrently increasing full_history_ts_low and flush to create a + // new SuperVersion + std::string current_ts_low; + ASSERT_OK(db_->GetFullHistoryTsLow(db_->DefaultColumnFamily(), + ¤t_ts_low)); + if (current_ts_low.empty() || current_ts_low != marked_ts_low) { + return; + } + full_history_ts_low.clear(); + PutFixed64(&full_history_ts_low, 4); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), + full_history_ts_low)); + ASSERT_OK(Flush(0)); + }); + + SyncPoint::GetInstance()->EnableProcessing(); + + // Third read: + // ReadOptions.timestamp(2) >= SuperVersion.full_history_ts_low(2), + // but ReadOptions.timestamp(2) < ColumnFamilyData.full_history_ts_low(4). + // History below 4 is collapsed in the newly installed SuperVersion. But the + // SuperVersion attached to this read still has the history below 4 available. + // Sanity check pass. Read return consistent value, timestamp is collapsed. + ASSERT_OK(db_->Get(read_opts, "foo", &value, ×tamp)); + ASSERT_EQ("val1", value); + ASSERT_EQ(min_ts, timestamp); + + // Fourth read: + // ReadOptions.timestamp(2) < SuperVersion.full_history_ts_low(4). + // Sanity check fails. Had it succeeded, the read would return "v3", + // which is inconsistent. + ASSERT_TRUE( + db_->Get(read_opts, "foo", &value, ×tamp).IsInvalidArgument()); + Close(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + +// Tests that in cases when +// `ReadOptions.timestamp >= SuperVersion.full_history_ts_low` sanity check +// fails. The referenced SuperVersion is dereferenced and cleaned up properly +// for all read APIs that involves this sanity check. +TEST_F(DBBasicTestWithTimestamp, FullHistoryTsLowSanityCheckFail) { + Options options = CurrentOptions(); + options.env = env_; + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + // Use UDT in memtable only feature for this test, so we can control that + // newly set `full_history_ts_low` collapse history when Flush happens. + options.persist_user_defined_timestamps = false; + options.allow_concurrent_memtable_write = false; + DestroyAndReopen(options); + + ColumnFamilyHandle* handle2 = nullptr; + Status s = db_->CreateColumnFamily(options, "data", &handle2); + ASSERT_OK(s); + + std::string write_ts; + PutFixed64(&write_ts, 1); + ASSERT_OK(db_->Put(WriteOptions(), "foo", write_ts, "val1")); + ASSERT_OK(db_->Put(WriteOptions(), handle2, "foo", write_ts, "val1")); + + std::string full_history_ts_low; + PutFixed64(&full_history_ts_low, 3); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(db_->DefaultColumnFamily(), + full_history_ts_low)); + ASSERT_OK(db_->IncreaseFullHistoryTsLow(handle2, full_history_ts_low)); + ASSERT_OK(Flush(0)); + ASSERT_OK(db_->Flush(FlushOptions(), handle2)); + + std::string read_ts; + PutFixed64(&read_ts, 2); + Slice read_ts_slice = read_ts; + ReadOptions read_opts; + read_opts.timestamp = &read_ts_slice; + + // Get() + std::string value; + ASSERT_TRUE(db_->Get(read_opts, "foo", &value).IsInvalidArgument()); + + // MultiGet() + std::vector cfhs = {db_->DefaultColumnFamily(), handle2}; + { + std::vector key_vals = {"foo", "foo"}; + std::vector keys; + std::vector values; + for (size_t j = 0; j < 2; ++j) { + keys.push_back(key_vals[j]); + } + + std::vector statuses = + db_->MultiGet(read_opts, cfhs, keys, &values); + for (auto status : statuses) { + ASSERT_TRUE(status.IsInvalidArgument()); + } + } + + // MultiGet with only one column family + { + std::vector one_cfh = {db_->DefaultColumnFamily()}; + std::vector key_vals = {"foo"}; + std::vector keys; + std::vector values; + for (size_t j = 0; j < 1; ++j) { + keys.push_back(key_vals[j]); + } + + std::vector statuses = + db_->MultiGet(read_opts, one_cfh, keys, &values); + for (auto status : statuses) { + ASSERT_TRUE(status.IsInvalidArgument()); + } + } + + // Overloaded version of MultiGet + ColumnFamilyHandle* column_families[] = {db_->DefaultColumnFamily(), handle2}; + { + Slice keys[] = {"foo", "foo"}; + PinnableSlice values[] = {PinnableSlice(), PinnableSlice()}; + Status statuses[] = {Status::OK(), Status::OK()}; + db_->MultiGet(read_opts, /*num_keys=*/2, &column_families[0], &keys[0], + &values[0], &statuses[0], /*sorted_input=*/false); + for (auto status : statuses) { + ASSERT_TRUE(status.IsInvalidArgument()); + } + } + + // Overloaded versions of MultiGet with one column family + { + ColumnFamilyHandle* one_column_family[] = {db_->DefaultColumnFamily()}; + Slice keys[] = {"foo"}; + PinnableSlice values[] = {PinnableSlice()}; + Status statuses[] = {Status::OK()}; + db_->MultiGet(read_opts, /*num_keys=*/1, &one_column_family[0], &keys[0], + &values[0], &statuses[0], /*sorted_input=*/false); + for (auto status : statuses) { + ASSERT_TRUE(status.IsInvalidArgument()); + } + } + + // NewIterator() + std::unique_ptr iter( + db_->NewIterator(read_opts, db_->DefaultColumnFamily())); + ASSERT_TRUE(iter->status().IsInvalidArgument()); + std::unique_ptr iter2(db_->NewIterator(read_opts, handle2)); + ASSERT_TRUE(iter2->status().IsInvalidArgument()); + + // NewIterators() + std::vector iterators; + ASSERT_TRUE( + db_->NewIterators(read_opts, cfhs, &iterators).IsInvalidArgument()); + delete handle2; + Close(); +} + TEST_F(DBBasicTestWithTimestamp, GCPreserveRangeTombstoneWhenNoOrSmallFullHistoryLow) { Options options = CurrentOptions(); diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 034701136..2fa7ae898 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -2101,8 +2101,9 @@ Iterator* BlobDBImpl::NewIterator(const ReadOptions& _read_options) { own_snapshot = new ManagedSnapshot(db_); snapshot = own_snapshot->snapshot(); } + SuperVersion* sv = cfd->GetReferencedSuperVersion(db_impl_); auto* iter = db_impl_->NewIteratorImpl( - read_options, cfd, snapshot->GetSequenceNumber(), + read_options, cfd, sv, snapshot->GetSequenceNumber(), nullptr /*read_callback*/, true /*expose_blob_index*/); return new BlobDBIterator(own_snapshot, iter, this, clock_, statistics_); } diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index 2986fae8d..d0ee86540 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -183,8 +183,8 @@ inline Status WriteCommittedTxn::GetForUpdateImpl( value, exclusive, do_validate); } } else { - Status s = db_impl_->FailIfTsMismatchCf( - column_family, *(read_options.timestamp), /*ts_for_read=*/true); + Status s = + db_impl_->FailIfTsMismatchCf(column_family, *(read_options.timestamp)); if (!s.ok()) { return s; } diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc index 5f17247e4..91a81d158 100644 --- a/utilities/transactions/write_prepared_txn_db.cc +++ b/utilities/transactions/write_prepared_txn_db.cc @@ -413,9 +413,10 @@ Iterator* WritePreparedTxnDB::NewIterator(const ReadOptions& _read_options, static_cast_with_check(column_family)->cfd(); auto* state = new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted); - auto* db_iter = db_impl_->NewIteratorImpl(read_options, cfd, snapshot_seq, - &state->callback, expose_blob_index, - allow_refresh); + SuperVersion* super_version = cfd->GetReferencedSuperVersion(db_impl_); + auto* db_iter = db_impl_->NewIteratorImpl(read_options, cfd, super_version, + snapshot_seq, &state->callback, + expose_blob_index, allow_refresh); db_iter->RegisterCleanup(CleanupWritePreparedTxnDBIterator, state, nullptr); return db_iter; } @@ -461,8 +462,9 @@ Status WritePreparedTxnDB::NewIterators( static_cast_with_check(column_family)->cfd(); auto* state = new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted); - auto* db_iter = db_impl_->NewIteratorImpl(read_options, cfd, snapshot_seq, - &state->callback, + SuperVersion* super_version = cfd->GetReferencedSuperVersion(db_impl_); + auto* db_iter = db_impl_->NewIteratorImpl(read_options, cfd, super_version, + snapshot_seq, &state->callback, expose_blob_index, allow_refresh); db_iter->RegisterCleanup(CleanupWritePreparedTxnDBIterator, state, nullptr); iterators->push_back(db_iter); diff --git a/utilities/transactions/write_unprepared_txn_db.cc b/utilities/transactions/write_unprepared_txn_db.cc index 973aa49fd..1d75dd449 100644 --- a/utilities/transactions/write_unprepared_txn_db.cc +++ b/utilities/transactions/write_unprepared_txn_db.cc @@ -472,9 +472,10 @@ Iterator* WriteUnpreparedTxnDB::NewIterator(const ReadOptions& _read_options, static_cast_with_check(column_family)->cfd(); auto* state = new IteratorState(this, snapshot_seq, own_snapshot, min_uncommitted, txn); + SuperVersion* super_version = cfd->GetReferencedSuperVersion(db_impl_); auto* db_iter = db_impl_->NewIteratorImpl( - read_options, cfd, state->MaxVisibleSeq(), &state->callback, - expose_blob_index, allow_refresh); + read_options, cfd, super_version, state->MaxVisibleSeq(), + &state->callback, expose_blob_index, allow_refresh); db_iter->RegisterCleanup(CleanupWriteUnpreparedTxnDBIterator, state, nullptr); return db_iter; } From 84d335b61953b885b113764ab096b26a135f5dbb Mon Sep 17 00:00:00 2001 From: Jonah Gao Date: Thu, 14 Sep 2023 11:25:33 -0700 Subject: [PATCH 108/386] Remove an unused variable: `last_stats_dump_time_microsec_` (#11824) Summary: `last_stats_dump_time_microsec_` is not used after initialization. I guess that it was previously used to implement periodically dumping stats, but this functionality has now been delegated to the `PeriodicTaskScheduler`. https://github.com/facebook/rocksdb/blob/4b79e8c0039389fc731f8bc9bd6109257c21d9d1/db/db_impl/db_impl.cc#L770-L778 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11824 Reviewed By: cbi42 Differential Revision: D49278311 Pulled By: jowlyzhang fbshipit-source-id: 5856245580afc026e6b490755a45c5436a2375c9 --- db/db_impl/db_impl.cc | 1 - db/db_impl/db_impl.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 059902c11..0f92cc70e 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -211,7 +211,6 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, disable_delete_obsolete_files_(0), pending_purge_obsolete_files_(0), delete_obsolete_files_last_run_(immutable_db_options_.clock->NowMicros()), - last_stats_dump_time_microsec_(0), has_unpersisted_data_(false), unable_to_release_oldest_log_(false), num_running_ingest_file_(0), diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index e33b9a0d7..3a2606de8 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2633,9 +2633,6 @@ class DBImpl : public DB { // initialized with startup time. uint64_t delete_obsolete_files_last_run_; - // last time stats were dumped to LOG - std::atomic last_stats_dump_time_microsec_; - // The thread that wants to switch memtable, can wait on this cv until the // pending writes to memtable finishes. std::condition_variable switch_cv_; From 3c27f56d0b7e359defbc25bf90061214c889f40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Thu, 14 Sep 2023 13:36:39 -0700 Subject: [PATCH 109/386] Fix the problem that some keys of ClipColumnFamily may not be deleted (#11811) Summary: When executing ClipColumnFamily, if end_key is equal to largest_user_key in a file, this key will not be deleted. So we need to change less than to less than or equal to Pull Request resolved: https://github.com/facebook/rocksdb/pull/11811 Reviewed By: ajkr Differential Revision: D49206936 Pulled By: cbi42 fbshipit-source-id: 3e8bcb7b52040a9b4d1176de727616cc298d3445 --- db/db_impl/db_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 0f92cc70e..35fbaa3f3 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -5920,7 +5920,7 @@ Status DBImpl::ClipColumnFamily(ColumnFamilyHandle* column_family, if (status.ok()) { // Delete [clip_end_key, largest_use_key] - if (ucmp->Compare(end_key, largest_user_key) < 0) { + if (ucmp->Compare(end_key, largest_user_key) <= 0) { status = DeleteRange(wo, column_family, end_key, largest_user_key); if (status.ok()) { status = Delete(wo, column_family, largest_user_key); From 47e023abbd2db5f715dde923af84b37b5b05c039 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 14 Sep 2023 16:07:59 -0700 Subject: [PATCH 110/386] Remove "rocksdb.file.read.db.open.micros" typo from 8.6 HISTORY (#11839) Summary: **Context/Summary:** `rocksdb.file.read.db.open.micros` is landed in 8.3 but not 8.6. It was included in the HISTORY due to an error. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11839 Test Plan: no code change; Will cherry pick this to 8.6 branch when landed. Reviewed By: anand1976 Differential Revision: D49294250 Pulled By: hx235 fbshipit-source-id: b2ac10758a15eadd5c129d80e93e1c3d0aa569cb --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 27a248b4e..fc955a1ae 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -12,7 +12,7 @@ * Add a column family option `memtable_max_range_deletions` that limits the number of range deletions in a memtable. RocksDB will try to do an automatic flush after the limit is reached. (#11358) * Add PutEntity API in sst_file_writer * Add `timeout` in microsecond option to `WaitForCompactOptions` to allow timely termination of prolonged waiting in scenarios like recurring recoverable errors, such as out-of-space situations and continuous write streams that sustain ongoing flush and compactions -* New statistics `rocksdb.file.read.{db.open|get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` measure read time of block-based SST tables or blob files during db open, `Get()`, `MultiGet()`, using db iterator, `VerifyFileChecksums()` and `VerifyChecksum()`. They require stats level greater than `StatsLevel::kExceptDetailedTimers`. +* New statistics `rocksdb.file.read.{get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` measure read time of block-based SST tables or blob files during db open, `Get()`, `MultiGet()`, using db iterator, `VerifyFileChecksums()` and `VerifyChecksum()`. They require stats level greater than `StatsLevel::kExceptDetailedTimers`. * Add close_db option to `WaitForCompactOptions` to call Close() after waiting is done. * Add a new compression option `CompressionOptions::checksum` for enabling ZSTD's checksum feature to detect corruption during decompression. From f2b623bcc1ad0d633164b3824b4b683b1b57ce28 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 15 Sep 2023 08:30:44 -0700 Subject: [PATCH 111/386] GetEntity Support for ReadOnlyDB and SecondaryDB (#11799) Summary: `GetEntity` API support for ReadOnly DB and Secondary DB. - Introduced `GetImpl()` with `GetImplOptions` in `db_impl_readonly` and refactored current `Get()` logic into `GetImpl()` so that look up logic can be reused for `GetEntity()` (Following the same pattern as `DBImpl::Get()` and `DBImpl::GetEntity()`) - Introduced `GetImpl()` with `GetImplOptions` in `db_impl_secondary` and refactored current `GetImpl()` logic. This is to make `DBImplSecondary::Get/GetEntity` consistent with `DBImpl::Get/GetEntity` and `DBImplReadOnly::Get/GetEntity` - `GetImpl()` in `db_impl` is now virtual. both `db_impl_readonly` and `db_impl_secondary`'s `Get()` override are no longer needed since all three dbs now have the same `Get()` which calls `GetImpl()` internally. - `GetImpl()` in `DBImplReadOnly` and `DBImplSecondary` now pass in `columns` instead of `nullptr` in lookup functions like `memtable->get()` - Introduced `GetEntity()` API in `DBImplReadOnly` and `DBImplSecondary` which simply calls `GetImpl()` with `columns` set in `GetImplOptions`. - Introduced `Env::IOActivity::kGetEntity` and set read_options.io_activity to `Env::IOActivity::kGetEntity` for `GetEntity()` operations (in db_impl) Pull Request resolved: https://github.com/facebook/rocksdb/pull/11799 Test Plan: **Unit Tests** - Added verification in `DBWideBasicTest::PutEntity` by Reopening DB as ReadOnly with the same setup. - Added verification in `DBSecondaryTest::ReopenAsSecondary` by calling `PutEntity()` and `GetEntity()` on top of existing `Put()` and `Get()` - `make -j64 check` **Crash Tests** - `python3 tools/db_crashtest.py blackbox --max_key=25000000 --write_buffer_size=4194304 --max_bytes_for_level_base=2097152 --target_file_size_base=2097152 --periodic_compaction_seconds=0 --use_put_entity_one_in=10 --use_get_entity=1 --duration=60 --inter val=10` - `python3 tools/db_crashtest.py blackbox --simple --max_key=25000000 --write_buffer_size=4194304 --max_bytes_for_level_base=2097152 --target_file_size_base=2097152 --periodic_compaction_seconds=0 --use_put_entity_one_in=10 --use_get_entity=1 ` - `python3 tools/db_crashtest.py blackbox --cf_consistency --max_key=25000000 --write_buffer_size=4194304 --max_bytes_for_level_base=2097152 --target_file_size_base=2097152 --periodic_compaction_seconds=0 --use_put_entity_one_in=10 --use_get_entity=1 --duration=60 --inter val=10` Reviewed By: ltamasi Differential Revision: D49037040 Pulled By: jaykorean fbshipit-source-id: a0648253ded6e91af7953de364ed3c6bf163626b --- db/db_impl/db_impl.cc | 14 +- db/db_impl/db_impl.h | 4 +- db/db_impl/db_impl_readonly.cc | 98 ++++++------- db/db_impl/db_impl_readonly.h | 10 +- db/db_impl/db_impl_secondary.cc | 131 ++++++++---------- db/db_impl/db_impl_secondary.h | 16 +-- db/db_secondary_test.cc | 15 +- db/wide/db_wide_basic_test.cc | 5 + db_stress_tool/db_stress_test_base.cc | 4 + include/rocksdb/env.h | 2 + include/rocksdb/thread_status.h | 2 + monitoring/thread_status_util_debug.cc | 4 + ...get_entity_in_secondary_and_readonly_db.md | 1 + util/thread_operation.h | 2 + 14 files changed, 158 insertions(+), 150 deletions(-) create mode 100644 unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 35fbaa3f3..a88e8f3b4 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1944,25 +1944,27 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, return s; } -Status DBImpl::GetEntity(const ReadOptions& read_options, +Status DBImpl::GetEntity(const ReadOptions& _read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableWideColumns* columns) { if (!column_family) { return Status::InvalidArgument( "Cannot call GetEntity without a column family handle"); } - if (!columns) { return Status::InvalidArgument( "Cannot call GetEntity without a PinnableWideColumns object"); } - - if (read_options.io_activity != Env::IOActivity::kUnknown) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGetEntity) { return Status::InvalidArgument( "Cannot call GetEntity with `ReadOptions::io_activity` != " - "`Env::IOActivity::kUnknown`"); + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGetEntity`"); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGetEntity; } - columns->Reset(); GetImplOptions get_impl_options; diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 3a2606de8..8092c7669 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -643,8 +643,8 @@ class DBImpl : public DB { // get_impl_options.key via get_impl_options.value // If get_impl_options.get_value = false get merge operands associated with // get_impl_options.key via get_impl_options.merge_operands - Status GetImpl(const ReadOptions& options, const Slice& key, - GetImplOptions& get_impl_options); + virtual Status GetImpl(const ReadOptions& options, const Slice& key, + GetImplOptions& get_impl_options); // If `snapshot` == kMaxSequenceNumber, set a recent one inside the file. ArenaWrappedDBIter* NewIteratorImpl(const ReadOptions& options, diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 69a8de398..997a4e2ed 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -29,41 +29,23 @@ DBImplReadOnly::DBImplReadOnly(const DBOptions& db_options, DBImplReadOnly::~DBImplReadOnly() {} // Implementations of the DB interface -Status DBImplReadOnly::Get(const ReadOptions& read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* pinnable_val) { - return Get(read_options, column_family, key, pinnable_val, - /*timestamp*/ nullptr); -} +Status DBImplReadOnly::GetImpl(const ReadOptions& read_options, + const Slice& key, + GetImplOptions& get_impl_options) { + assert(get_impl_options.value != nullptr || + get_impl_options.columns != nullptr); + assert(get_impl_options.column_family); -Status DBImplReadOnly::Get(const ReadOptions& _read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* pinnable_val, - std::string* timestamp) { - if (_read_options.io_activity != Env::IOActivity::kUnknown && - _read_options.io_activity != Env::IOActivity::kGet) { - return Status::InvalidArgument( - "Can only call Get with `ReadOptions::io_activity` is " - "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); - } - ReadOptions read_options(_read_options); - if (read_options.io_activity == Env::IOActivity::kUnknown) { - read_options.io_activity = Env::IOActivity::kGet; - } - assert(pinnable_val != nullptr); - PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); - StopWatch sw(immutable_db_options_.clock, stats_, DB_GET); - PERF_TIMER_GUARD(get_snapshot_time); + Status s; - assert(column_family); if (read_options.timestamp) { - const Status s = - FailIfTsMismatchCf(column_family, *(read_options.timestamp)); + s = FailIfTsMismatchCf(get_impl_options.column_family, + *(read_options.timestamp)); if (!s.ok()) { return s; } } else { - const Status s = FailIfCfHasTs(column_family); + s = FailIfCfHasTs(get_impl_options.column_family); if (!s.ok()) { return s; } @@ -71,25 +53,32 @@ Status DBImplReadOnly::Get(const ReadOptions& _read_options, // Clear the timestamps for returning results so that we can distinguish // between tombstone or key that has never been written - if (timestamp) { - timestamp->clear(); + if (get_impl_options.timestamp) { + get_impl_options.timestamp->clear(); } - const Comparator* ucmp = column_family->GetComparator(); - assert(ucmp); - std::string* ts = ucmp->timestamp_size() > 0 ? timestamp : nullptr; + PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); + StopWatch sw(immutable_db_options_.clock, stats_, DB_GET); + PERF_TIMER_GUARD(get_snapshot_time); - Status s; + const Comparator* ucmp = get_impl_options.column_family->GetComparator(); + assert(ucmp); + std::string* ts = + ucmp->timestamp_size() > 0 ? get_impl_options.timestamp : nullptr; SequenceNumber snapshot = versions_->LastSequence(); GetWithTimestampReadCallback read_cb(snapshot); - auto cfh = static_cast_with_check(column_family); + auto cfh = static_cast_with_check( + get_impl_options.column_family); auto cfd = cfh->cfd(); if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { - tracer_->Get(column_family, key); + tracer_->Get(get_impl_options.column_family, key); } } + + // In read-only mode Get(), no super version operation is needed (i.e. + // GetAndRefSuperVersion and ReturnAndCleanupSuperVersion) SuperVersion* super_version = cfd->GetSuperVersion(); if (read_options.timestamp && read_options.timestamp->size() > 0) { s = FailIfReadCollapsedHistory(cfd, super_version, @@ -102,29 +91,42 @@ Status DBImplReadOnly::Get(const ReadOptions& _read_options, SequenceNumber max_covering_tombstone_seq = 0; LookupKey lkey(key, snapshot, read_options.timestamp); PERF_TIMER_STOP(get_snapshot_time); - if (super_version->mem->Get(lkey, pinnable_val->GetSelf(), - /*columns=*/nullptr, ts, &s, &merge_context, - &max_covering_tombstone_seq, read_options, - false /* immutable_memtable */, &read_cb)) { - pinnable_val->PinSelf(); + + // Look up starts here + if (super_version->mem->Get( + lkey, + get_impl_options.value ? get_impl_options.value->GetSelf() : nullptr, + get_impl_options.columns, ts, &s, &merge_context, + &max_covering_tombstone_seq, read_options, + false /* immutable_memtable */, &read_cb)) { + if (get_impl_options.value) { + get_impl_options.value->PinSelf(); + } RecordTick(stats_, MEMTABLE_HIT); } else { PERF_TIMER_GUARD(get_from_output_files_time); PinnedIteratorsManager pinned_iters_mgr; super_version->current->Get( - read_options, lkey, pinnable_val, /*columns=*/nullptr, ts, &s, - &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, + read_options, lkey, get_impl_options.value, get_impl_options.columns, + ts, &s, &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, /*value_found*/ nullptr, /*key_exists*/ nullptr, /*seq*/ nullptr, &read_cb, /*is_blob*/ nullptr, /*do_merge*/ true); RecordTick(stats_, MEMTABLE_MISS); } - RecordTick(stats_, NUMBER_KEYS_READ); - size_t size = pinnable_val->size(); - RecordTick(stats_, BYTES_READ, size); - RecordInHistogram(stats_, BYTES_PER_READ, size); - PERF_COUNTER_ADD(get_read_bytes, size); + { + RecordTick(stats_, NUMBER_KEYS_READ); + size_t size = 0; + if (get_impl_options.value) { + size = get_impl_options.value->size(); + } else if (get_impl_options.columns) { + size = get_impl_options.columns->serialized_size(); + } + RecordTick(stats_, BYTES_READ, size); + RecordInHistogram(stats_, BYTES_PER_READ, size); + PERF_COUNTER_ADD(get_read_bytes, size); + } return s; } diff --git a/db/db_impl/db_impl_readonly.h b/db/db_impl/db_impl_readonly.h index 972e5531a..cc925ba50 100644 --- a/db/db_impl/db_impl_readonly.h +++ b/db/db_impl/db_impl_readonly.h @@ -24,13 +24,9 @@ class DBImplReadOnly : public DBImpl { virtual ~DBImplReadOnly(); // Implementations of the DB interface - using DB::Get; - virtual Status Get(const ReadOptions& options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value) override; - Status Get(const ReadOptions& _read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value, std::string* timestamp) override; + using DBImpl::GetImpl; + Status GetImpl(const ReadOptions& options, const Slice& key, + GetImplOptions& get_impl_options) override; // TODO: Implement ReadOnly MultiGet? diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 3e5997b0d..e6dd8e080 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -339,113 +339,93 @@ Status DBImplSecondary::RecoverLogFiles( return status; } -// Implementation of the DB interface -Status DBImplSecondary::Get(const ReadOptions& _read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value) { - if (_read_options.io_activity != Env::IOActivity::kUnknown && - _read_options.io_activity != Env::IOActivity::kGet) { - return Status::InvalidArgument( - "Can only call Get with `ReadOptions::io_activity` is " - "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); - } - ReadOptions read_options(_read_options); - if (read_options.io_activity == Env::IOActivity::kUnknown) { - read_options.io_activity = Env::IOActivity::kGet; - } - return GetImpl(read_options, column_family, key, value, - /*timestamp*/ nullptr); -} - -Status DBImplSecondary::Get(const ReadOptions& _read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value, std::string* timestamp) { - if (_read_options.io_activity != Env::IOActivity::kUnknown && - _read_options.io_activity != Env::IOActivity::kGet) { - return Status::InvalidArgument( - "Can only call Get with `ReadOptions::io_activity` is " - "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`"); - } - ReadOptions read_options(_read_options); - if (read_options.io_activity == Env::IOActivity::kUnknown) { - read_options.io_activity = Env::IOActivity::kGet; - } - return GetImpl(read_options, column_family, key, value, timestamp); -} - Status DBImplSecondary::GetImpl(const ReadOptions& read_options, - ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* pinnable_val, - std::string* timestamp) { - assert(pinnable_val != nullptr); - PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); - StopWatch sw(immutable_db_options_.clock, stats_, DB_GET); - PERF_TIMER_GUARD(get_snapshot_time); + const Slice& key, + GetImplOptions& get_impl_options) { + assert(get_impl_options.value != nullptr || + get_impl_options.columns != nullptr); + assert(get_impl_options.column_family); + + Status s; - assert(column_family); if (read_options.timestamp) { - const Status s = - FailIfTsMismatchCf(column_family, *(read_options.timestamp)); + s = FailIfTsMismatchCf(get_impl_options.column_family, + *(read_options.timestamp)); if (!s.ok()) { return s; } } else { - const Status s = FailIfCfHasTs(column_family); + s = FailIfCfHasTs(get_impl_options.column_family); if (!s.ok()) { return s; } } - // Clear the timestamp for returning results so that we can distinguish - // between tombstone or key that has never been written later. - if (timestamp) { - timestamp->clear(); + // Clear the timestamps for returning results so that we can distinguish + // between tombstone or key that has never been written + if (get_impl_options.timestamp) { + get_impl_options.timestamp->clear(); } - auto cfh = static_cast(column_family); - ColumnFamilyData* cfd = cfh->cfd(); + PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock); + StopWatch sw(immutable_db_options_.clock, stats_, DB_GET); + PERF_TIMER_GUARD(get_snapshot_time); + + const Comparator* ucmp = get_impl_options.column_family->GetComparator(); + assert(ucmp); + std::string* ts = + ucmp->timestamp_size() > 0 ? get_impl_options.timestamp : nullptr; + SequenceNumber snapshot = versions_->LastSequence(); + GetWithTimestampReadCallback read_cb(snapshot); + auto cfh = static_cast_with_check( + get_impl_options.column_family); + auto cfd = cfh->cfd(); if (tracer_) { InstrumentedMutexLock lock(&trace_mutex_); if (tracer_) { - tracer_->Get(column_family, key); + tracer_->Get(get_impl_options.column_family, key); } } + // Acquire SuperVersion SuperVersion* super_version = GetAndRefSuperVersion(cfd); if (read_options.timestamp && read_options.timestamp->size() > 0) { - const Status s = FailIfReadCollapsedHistory(cfd, super_version, - *(read_options.timestamp)); + s = FailIfReadCollapsedHistory(cfd, super_version, + *(read_options.timestamp)); if (!s.ok()) { ReturnAndCleanupSuperVersion(cfd, super_version); return s; } } - SequenceNumber snapshot = versions_->LastSequence(); - GetWithTimestampReadCallback read_cb(snapshot); MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; - Status s; LookupKey lkey(key, snapshot, read_options.timestamp); PERF_TIMER_STOP(get_snapshot_time); - bool done = false; - const Comparator* ucmp = column_family->GetComparator(); - assert(ucmp); - std::string* ts = ucmp->timestamp_size() > 0 ? timestamp : nullptr; - if (super_version->mem->Get(lkey, pinnable_val->GetSelf(), - /*columns=*/nullptr, ts, &s, &merge_context, - &max_covering_tombstone_seq, read_options, - false /* immutable_memtable */, &read_cb)) { + + // Look up starts here + if (super_version->mem->Get( + lkey, + get_impl_options.value ? get_impl_options.value->GetSelf() : nullptr, + get_impl_options.columns, ts, &s, &merge_context, + &max_covering_tombstone_seq, read_options, + false /* immutable_memtable */, &read_cb)) { done = true; - pinnable_val->PinSelf(); + if (get_impl_options.value) { + get_impl_options.value->PinSelf(); + } RecordTick(stats_, MEMTABLE_HIT); } else if ((s.ok() || s.IsMergeInProgress()) && super_version->imm->Get( - lkey, pinnable_val->GetSelf(), /*columns=*/nullptr, ts, &s, - &merge_context, &max_covering_tombstone_seq, read_options, - &read_cb)) { + lkey, + get_impl_options.value ? get_impl_options.value->GetSelf() + : nullptr, + get_impl_options.columns, ts, &s, &merge_context, + &max_covering_tombstone_seq, read_options, &read_cb)) { done = true; - pinnable_val->PinSelf(); + if (get_impl_options.value) { + get_impl_options.value->PinSelf(); + } RecordTick(stats_, MEMTABLE_HIT); } if (!done && !s.ok() && !s.IsMergeInProgress()) { @@ -456,8 +436,8 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, PERF_TIMER_GUARD(get_from_output_files_time); PinnedIteratorsManager pinned_iters_mgr; super_version->current->Get( - read_options, lkey, pinnable_val, /*columns=*/nullptr, ts, &s, - &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, + read_options, lkey, get_impl_options.value, get_impl_options.columns, + ts, &s, &merge_context, &max_covering_tombstone_seq, &pinned_iters_mgr, /*value_found*/ nullptr, /*key_exists*/ nullptr, /*seq*/ nullptr, &read_cb, /*is_blob*/ nullptr, /*do_merge*/ true); @@ -467,7 +447,12 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options, PERF_TIMER_GUARD(get_post_process_time); ReturnAndCleanupSuperVersion(cfd, super_version); RecordTick(stats_, NUMBER_KEYS_READ); - size_t size = pinnable_val->size(); + size_t size = 0; + if (get_impl_options.value) { + size = get_impl_options.value->size(); + } else if (get_impl_options.columns) { + size = get_impl_options.columns->serialized_size(); + } RecordTick(stats_, BYTES_READ, size); RecordTimeToHistogram(stats_, BYTES_PER_READ, size); PERF_COUNTER_ADD(get_read_bytes, size); diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index a6ff4a66d..8616b9ed4 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -85,8 +85,6 @@ class DBImplSecondary : public DBImpl { bool error_if_data_exists_in_wals, uint64_t* = nullptr, RecoveryContext* recovery_ctx = nullptr) override; - // Implementations of the DB interface. - using DB::Get; // Can return IOError due to files being deleted by the primary. To avoid // IOError in this case, application can coordinate between primary and // secondaries so that primary will not delete files that are currently being @@ -96,17 +94,9 @@ class DBImplSecondary : public DBImpl { // workaround, the secondaries can be opened with `max_open_files=-1` so that // it eagerly keeps all talbe files open and is able to access the contents of // deleted files via prior open fd. - Status Get(const ReadOptions& _read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value) override; - - Status Get(const ReadOptions& _read_options, - ColumnFamilyHandle* column_family, const Slice& key, - PinnableSlice* value, std::string* timestamp) override; - - Status GetImpl(const ReadOptions& options, ColumnFamilyHandle* column_family, - const Slice& key, PinnableSlice* value, - std::string* timestamp); + using DBImpl::GetImpl; + Status GetImpl(const ReadOptions& options, const Slice& key, + GetImplOptions& get_impl_options) override; using DBImpl::NewIterator; // Operations on the created iterators can return IOError due to files being diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc index 19cbdfb95..afceabe67 100644 --- a/db/db_secondary_test.cc +++ b/db/db_secondary_test.cc @@ -164,12 +164,22 @@ TEST_F(DBSecondaryTest, ReopenAsSecondary) { Reopen(options); ASSERT_OK(Put("foo", "foo_value")); ASSERT_OK(Put("bar", "bar_value")); + WideColumns columns{{kDefaultWideColumnName, "attr_default_val"}, + {"attr_name1", "attr_value_1"}, + {"attr_name2", "attr_value_2"}}; + ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), "baz", + columns)); ASSERT_OK(dbfull()->Flush(FlushOptions())); Close(); ASSERT_OK(ReopenAsSecondary(options)); ASSERT_EQ("foo_value", Get("foo")); ASSERT_EQ("bar_value", Get("bar")); + PinnableWideColumns result; + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), "baz", + &result)); + ASSERT_EQ(result.columns(), columns); + ReadOptions ropts; ropts.verify_checksums = true; auto db1 = static_cast(db_); @@ -182,13 +192,16 @@ TEST_F(DBSecondaryTest, ReopenAsSecondary) { ASSERT_EQ("bar", iter->key().ToString()); ASSERT_EQ("bar_value", iter->value().ToString()); } else if (1 == count) { + ASSERT_EQ("baz", iter->key().ToString()); + ASSERT_EQ(columns, iter->columns()); + } else if (2 == count) { ASSERT_EQ("foo", iter->key().ToString()); ASSERT_EQ("foo_value", iter->value().ToString()); } ++count; } delete iter; - ASSERT_EQ(2, count); + ASSERT_EQ(3, count); } TEST_F(DBSecondaryTest, SimpleInternalCompaction) { diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 536a543e6..686dddd89 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -208,6 +208,11 @@ TEST_F(DBWideBasicTest, PutEntity) { ASSERT_OK(Flush()); verify(); + + // Reopen as Readonly DB and verify + Close(); + ASSERT_OK(ReadOnlyReopen(options)); + verify(); } TEST_F(DBWideBasicTest, PutEntityColumnFamily) { diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 0195971c0..d54698435 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1060,7 +1060,11 @@ void StressTest::OperateDb(ThreadState* thread) { i += batch_size - 1; } else if (FLAGS_use_get_entity) { + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_GETENTITY); TestGetEntity(thread, read_opts, rand_column_families, rand_keys); + ThreadStatusUtil::ResetThreadStatus(); } else if (FLAGS_use_multiget) { // Leave room for one more iteration of the loop with a single key // batch. This is to ensure that each thread does exactly the same diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 7a09d17a5..63a161923 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -446,6 +446,8 @@ class Env : public Customizable { kDBIterator = 5, kVerifyDBChecksum = 6, kVerifyFileChecksums = 7, + kGetEntity = 8, + kMultiGetEntity = 9, kUnknown, // Keep last for easy array of non-unknowns }; diff --git a/include/rocksdb/thread_status.h b/include/rocksdb/thread_status.h index 8cfdf931f..5bc6eeb2b 100644 --- a/include/rocksdb/thread_status.h +++ b/include/rocksdb/thread_status.h @@ -62,6 +62,8 @@ struct ThreadStatus { OP_DBITERATOR, OP_VERIFY_DB_CHECKSUM, OP_VERIFY_FILE_CHECKSUMS, + OP_GETENTITY, + OP_MULTIGETENTITY, NUM_OP_TYPES }; diff --git a/monitoring/thread_status_util_debug.cc b/monitoring/thread_status_util_debug.cc index 6d3f9be08..24d269cbb 100644 --- a/monitoring/thread_status_util_debug.cc +++ b/monitoring/thread_status_util_debug.cc @@ -46,6 +46,10 @@ Env::IOActivity ThreadStatusUtil::TEST_GetExpectedIOActivity( return Env::IOActivity::kVerifyDBChecksum; case ThreadStatus::OperationType::OP_VERIFY_FILE_CHECKSUMS: return Env::IOActivity::kVerifyFileChecksums; + case ThreadStatus::OperationType::OP_GETENTITY: + return Env::IOActivity::kGetEntity; + case ThreadStatus::OperationType::OP_MULTIGETENTITY: + return Env::IOActivity::kMultiGetEntity; default: return Env::IOActivity::kUnknown; } diff --git a/unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md b/unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md new file mode 100644 index 000000000..b974fb08b --- /dev/null +++ b/unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md @@ -0,0 +1 @@ +Add `GetEntity()` API for ReadOnly DB and Secondary DB. diff --git a/util/thread_operation.h b/util/thread_operation.h index c95b7c693..4c01782ca 100644 --- a/util/thread_operation.h +++ b/util/thread_operation.h @@ -45,6 +45,8 @@ static OperationInfo global_operation_table[] = { {ThreadStatus::OP_DBITERATOR, "DBIterator"}, {ThreadStatus::OP_VERIFY_DB_CHECKSUM, "VerifyDBChecksum"}, {ThreadStatus::OP_VERIFY_FILE_CHECKSUMS, "VerifyFileChecksums"}, + {ThreadStatus::OP_GETENTITY, "GetEntity"}, + {ThreadStatus::OP_MULTIGETENTITY, "MultiGetEntity"}, }; From e1fd348b92a0c82637c4def968a421150247c7c1 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 15 Sep 2023 09:50:39 -0700 Subject: [PATCH 112/386] Fix a bug in multiget for cleaning up SuperVersion (#11830) Summary: When `MultiGet` acquires `SuperVersion` via locking the db mutex and get the current `ColumnFamilyData::super_version_`, its corresponding cleanup logic is not correctly done. It's currently doing this: `MultiGetColumnFamilyData::cfd->GetSuperVersion().Unref()` This operates on the most recent `SuperVersion` without locking db mutex , which is not thread safe by itself. And this unref operation is intended for the originally acquired `SuperVersion` instead of the current one. Because a race condition could happen where a new `SuperVersion` is installed in between this `MultiGet`'s ref and unref. When this race condition does happen, it's not sufficient to just unref the `SuperVersion`, `DBImpl::CleanupSuperVersion` should be called instead to properly clean up the `SuperVersion` had this `MultiGet` call be its last reference holder. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11830 Test Plan: `make all check` Added a unit test that would originally fail Reviewed By: ltamasi Differential Revision: D49287715 Pulled By: jowlyzhang fbshipit-source-id: 8353636ee11b2e90d85c677a96a92360072644b0 --- db/db_basic_test.cc | 27 +++++++++-- db/db_impl/db_impl.cc | 45 ++++++++++--------- db/db_impl/db_impl.h | 6 +-- .../bug_fixes/fix_multiget_sv_cleanup.md | 1 + 4 files changed, 51 insertions(+), 28 deletions(-) create mode 100644 unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 15cc5d3f5..1cb78e62b 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1427,10 +1427,7 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) { int retries = 0; bool last_try = false; ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( - "DBImpl::MultiGet::LastTry", [&](void* /*arg*/) { - last_try = true; - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - }); + "DBImpl::MultiGet::LastTry", [&](void* /*arg*/) { last_try = true; }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "DBImpl::MultiGet::AfterRefSV", [&](void* /*arg*/) { if (last_try) { @@ -1447,8 +1444,28 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) { } } }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ + {"DBImpl::MultiGet::AfterLastTryRefSV", + "DBMultiGetTestWithParam::MultiGetMultiCFMutex:BeforeCreateSV"}, + {"DBMultiGetTestWithParam::MultiGetMultiCFMutex:AfterCreateSV", + "DBImpl::MultiGet::BeforeLastTryUnRefSV"}, + }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + port::Thread create_sv_thread([this]() { + TEST_SYNC_POINT( + "DBMultiGetTestWithParam::MultiGetMultiCFMutex:BeforeCreateSV"); + // Create a new SuperVersion for each column family after last_try + // of MultiGet ref SuperVersion and before unref it. + for (int i = 0; i < 8; ++i) { + ASSERT_OK(Put(i, "cf" + std::to_string(i) + "_key", + "cf" + std::to_string(i) + "_val_after_last_try")); + ASSERT_OK(Flush(i)); + } + TEST_SYNC_POINT( + "DBMultiGetTestWithParam::MultiGetMultiCFMutex:AfterCreateSV"); + }); + std::vector cfs; std::vector keys; std::vector values; @@ -1460,6 +1477,7 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) { values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()), std::get<1>(GetParam())); + create_sv_thread.join(); ASSERT_TRUE(last_try); ASSERT_EQ(values.size(), 8); for (unsigned int j = 0; j < values.size(); ++j) { @@ -1473,6 +1491,7 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) { ->cfd(); ASSERT_NE(cfd->TEST_GetLocalSV()->Get(), SuperVersion::kSVInUse); } + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a88e8f3b4..0df899142 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2395,11 +2395,11 @@ std::vector DBImpl::MultiGet( cf_iter) { return &cf_iter->second; }; SequenceNumber consistent_seqnum; - bool unref_only; + bool sv_from_thread_local; Status status = MultiCFSnapshot>( read_options, nullptr, iter_deref_lambda, &multiget_cf_data, - &consistent_seqnum, &unref_only); + &consistent_seqnum, &sv_from_thread_local); if (!status.ok()) { for (auto& s : stat_list) { @@ -2521,10 +2521,11 @@ std::vector DBImpl::MultiGet( for (auto mgd_iter : multiget_cf_data) { auto mgd = mgd_iter.second; - if (!unref_only) { + if (sv_from_thread_local) { ReturnAndCleanupSuperVersion(mgd.cfd, mgd.super_version); } else { - mgd.cfd->GetSuperVersion()->Unref(); + TEST_SYNC_POINT("DBImpl::MultiGet::BeforeLastTryUnRefSV"); + CleanupSuperVersion(mgd.super_version); } } RecordTick(stats_, NUMBER_MULTIGET_CALLS); @@ -2543,16 +2544,16 @@ Status DBImpl::MultiCFSnapshot( const ReadOptions& read_options, ReadCallback* callback, std::function& iter_deref_func, - T* cf_list, SequenceNumber* snapshot, bool* unref_only) { + T* cf_list, SequenceNumber* snapshot, bool* sv_from_thread_local) { PERF_TIMER_GUARD(get_snapshot_time); - assert(unref_only); - *unref_only = false; + assert(sv_from_thread_local); + *sv_from_thread_local = true; Status s = Status::OK(); const bool check_read_ts = read_options.timestamp && read_options.timestamp->size() > 0; - // unref_only set to true means the SuperVersion to be cleaned up is acquired - // directly via ColumnFamilyData instead of thread local. + // sv_from_thread_local set to false means the SuperVersion to be cleaned up + // is acquired directly via ColumnFamilyData instead of thread local. const auto sv_cleanup_func = [&]() -> void { for (auto cf_iter = cf_list->begin(); cf_iter != cf_list->end(); ++cf_iter) { @@ -2560,10 +2561,10 @@ Status DBImpl::MultiCFSnapshot( SuperVersion* super_version = node->super_version; ColumnFamilyData* cfd = node->cfd; if (super_version != nullptr) { - if (*unref_only) { - super_version->Unref(); - } else { + if (*sv_from_thread_local) { ReturnAndCleanupSuperVersion(cfd, super_version); + } else { + CleanupSuperVersion(super_version); } } node->super_version = nullptr; @@ -2679,6 +2680,7 @@ Status DBImpl::MultiCFSnapshot( if (!retry) { if (last_try) { mutex_.Unlock(); + TEST_SYNC_POINT("DBImpl::MultiGet::AfterLastTryRefSV"); } break; } @@ -2687,7 +2689,7 @@ Status DBImpl::MultiCFSnapshot( // Keep track of bytes that we read for statistics-recording later PERF_TIMER_STOP(get_snapshot_time); - *unref_only = last_try; + *sv_from_thread_local = !last_try; if (!s.ok()) { sv_cleanup_func(); } @@ -2824,11 +2826,11 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, }; SequenceNumber consistent_seqnum; - bool unref_only; + bool sv_from_thread_local; Status s = MultiCFSnapshot< autovector>( read_options, nullptr, iter_deref_lambda, &multiget_cf_data, - &consistent_seqnum, &unref_only); + &consistent_seqnum, &sv_from_thread_local); if (!s.ok()) { for (size_t i = 0; i < num_keys; ++i) { @@ -2866,10 +2868,11 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, } for (const auto& iter : multiget_cf_data) { - if (!unref_only) { + if (sv_from_thread_local) { ReturnAndCleanupSuperVersion(iter.cfd, iter.super_version); } else { - iter.cfd->GetSuperVersion()->Unref(); + TEST_SYNC_POINT("DBImpl::MultiGet::BeforeLastTryUnRefSV"); + CleanupSuperVersion(iter.super_version); } } } @@ -3021,18 +3024,18 @@ void DBImpl::MultiGetWithCallback( size_t num_keys = sorted_keys->size(); SequenceNumber consistent_seqnum; - bool unref_only; + bool sv_from_thread_local; Status s = MultiCFSnapshot>( read_options, callback, iter_deref_lambda, &multiget_cf_data, - &consistent_seqnum, &unref_only); + &consistent_seqnum, &sv_from_thread_local); if (!s.ok()) { return; } #ifndef NDEBUG - assert(!unref_only); + assert(sv_from_thread_local); #else // Silence unused variable warning - (void)unref_only; + (void)sv_from_thread_local; #endif // NDEBUG if (callback && read_options.snapshot == nullptr) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 8092c7669..3adc0b4ab 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2322,8 +2322,8 @@ class DBImpl : public DB { // If callback is non-null, the callback is refreshed with the snapshot // sequence number // - // `unref_only` being set to true indicates that the SuperVersions were - // obtained from the ColumnFamilyData, whereas false indicates they are thread + // `sv_from_thread_local` being set to false indicates that the SuperVersion + // obtained from the ColumnFamilyData, whereas true indicates they are thread // local. // A non-OK status will be returned if for a column family that enables // user-defined timestamp feature, the specified `ReadOptions.timestamp` @@ -2333,7 +2333,7 @@ class DBImpl : public DB { const ReadOptions& read_options, ReadCallback* callback, std::function& iter_deref_func, - T* cf_list, SequenceNumber* snapshot, bool* unref_only); + T* cf_list, SequenceNumber* snapshot, bool* sv_from_thread_local); // The actual implementation of the batching MultiGet. The caller is expected // to have acquired the SuperVersion and pass in a snapshot sequence number diff --git a/unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md b/unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md new file mode 100644 index 000000000..f9e8db661 --- /dev/null +++ b/unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md @@ -0,0 +1 @@ +Fixed a bug in `MultiGet` for cleaning up SuperVersion acquired with locking db mutex. From 1e2fd343bb10f519f5fbf5218e6768d841a6c48f Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Fri, 15 Sep 2023 10:05:56 -0700 Subject: [PATCH 113/386] Update upper_bound_offset when reseek changes iterate_upper_bound dynamically (#11775) Summary: Update the logic in FilePrefetchBuffer to update `upper_bound_offset_` during reseek. During Reseek, `iterate_upper_bound` can be changed dynamically. So added an API to update that in FilePrefetchBuffer. Added unit test to confirm the behavior. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11775 Test Plan: - Check stress tests in case there is any failure after this diff. - make crash_test -j32 with auto_readahead_size=1 passed locally Reviewed By: anand1976 Differential Revision: D48815177 Pulled By: akankshamahajan15 fbshipit-source-id: 5f44fbb3af06c86a1c38f139c5fa4543891837f4 --- file/file_prefetch_buffer.h | 6 +- file/prefetch_test.cc | 87 ++++++++++++++----- table/block_based/block_prefetcher.h | 5 ++ .../bug_fixes/upper_bound_autoreadahead.md | 1 + 4 files changed, 78 insertions(+), 21 deletions(-) create mode 100644 unreleased_history/bug_fixes/upper_bound_autoreadahead.md diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index e2eac5e8d..a999ea762 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -279,6 +279,11 @@ class FilePrefetchBuffer { // Callback function passed to underlying FS in case of asynchronous reads. void PrefetchAsyncCallback(const FSReadRequest& req, void* cb_arg); + void ResetUpperBoundOffset(uint64_t upper_bound_offset) { + upper_bound_offset_ = upper_bound_offset; + readahead_size_ = initial_auto_readahead_size_; + } + private: // Calculates roundoff offset and length to be prefetched based on alignment // and data present in buffer_. It also allocates new buffer or refit tail if @@ -321,7 +326,6 @@ class FilePrefetchBuffer { void ResetValues() { num_file_reads_ = 1; readahead_size_ = initial_auto_readahead_size_; - upper_bound_offset_ = 0; } // Called in case of implicit auto prefetching. diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 85a06151a..909b0eba6 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -2082,6 +2082,7 @@ TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { int buff_count_with_tuning = 0, buff_count_without_tuning = 0; int keys_with_tuning = 0, keys_without_tuning = 0; + int reseek_keys_with_tuning = 0, reseek_keys_without_tuning = 0; buff_prefetch_count = 0; SyncPoint::GetInstance()->SetCallBack( @@ -2102,48 +2103,92 @@ TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { ropts.async_io = true; } - Slice ub = Slice("my_key_uuu"); - ropts.iterate_upper_bound = &ub; - Slice seek_key = Slice("my_key_aaa"); - // With tuning readahead_size. { ASSERT_OK(options.statistics->Reset()); + Slice ub = Slice("my_key_uuu"); + Slice* ub_ptr = &ub; + ropts.iterate_upper_bound = ub_ptr; ropts.auto_readahead_size = true; auto iter = std::unique_ptr(db_->NewIterator(ropts)); - iter->Seek(seek_key); + // Seek. + { + Slice seek_key = Slice("my_key_aaa"); + iter->Seek(seek_key); - while (iter->Valid()) { - keys_with_tuning++; - iter->Next(); + while (iter->Valid()) { + keys_with_tuning++; + iter->Next(); + } + + uint64_t readahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_GT(readahead_trimmed, 0); + buff_count_with_tuning = buff_prefetch_count; } - uint64_t readhahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_GT(readhahead_trimmed, 0); - buff_count_with_tuning = buff_prefetch_count; + // Reseek with new upper_bound_iterator. + { + ub = Slice("my_key_y"); + Slice reseek_key = Slice("my_key_v"); + iter->Seek(reseek_key); + + while (iter->Valid()) { + iter->Next(); + reseek_keys_with_tuning++; + } + + uint64_t readahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_GT(readahead_trimmed, 0); + ASSERT_GT(reseek_keys_with_tuning, 0); + } } // Without tuning readahead_size { + Slice ub = Slice("my_key_uuu"); + Slice* ub_ptr = &ub; + ropts.iterate_upper_bound = ub_ptr; buff_prefetch_count = 0; ASSERT_OK(options.statistics->Reset()); ropts.auto_readahead_size = false; auto iter = std::unique_ptr(db_->NewIterator(ropts)); - iter->Seek(seek_key); + // Seek. + { + Slice seek_key = Slice("my_key_aaa"); + iter->Seek(seek_key); - while (iter->Valid()) { - keys_without_tuning++; - iter->Next(); + while (iter->Valid()) { + keys_without_tuning++; + iter->Next(); + } + buff_count_without_tuning = buff_prefetch_count; + uint64_t readahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_EQ(readahead_trimmed, 0); + } + + // Reseek with new upper_bound_iterator. + { + ub = Slice("my_key_y"); + Slice reseek_key = Slice("my_key_v"); + iter->Seek(reseek_key); + + while (iter->Valid()) { + iter->Next(); + reseek_keys_without_tuning++; + } + + uint64_t readahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_EQ(readahead_trimmed, 0); + ASSERT_GT(reseek_keys_without_tuning, 0); } - buff_count_without_tuning = buff_prefetch_count; - uint64_t readhahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_EQ(readhahead_trimmed, 0); } { @@ -2159,6 +2204,8 @@ TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { ASSERT_GT(buff_count_with_tuning, 0); // No of keys should be equal. ASSERT_EQ(keys_without_tuning, keys_with_tuning); + // No of keys after reseek with new upper bound should be equal. + ASSERT_EQ(reseek_keys_without_tuning, reseek_keys_with_tuning); } Close(); } diff --git a/table/block_based/block_prefetcher.h b/table/block_based/block_prefetcher.h index eecb12f40..859a85f66 100644 --- a/table/block_based/block_prefetcher.h +++ b/table/block_based/block_prefetcher.h @@ -55,6 +55,11 @@ class BlockPrefetcher { void SetUpperBoundOffset(uint64_t upper_bound_offset) { upper_bound_offset_ = upper_bound_offset; + if (prefetch_buffer() != nullptr) { + // Upper bound can be changed on reseek. So update that in + // FilePrefetchBuffer. + prefetch_buffer()->ResetUpperBoundOffset(upper_bound_offset); + } } private: diff --git a/unreleased_history/bug_fixes/upper_bound_autoreadahead.md b/unreleased_history/bug_fixes/upper_bound_autoreadahead.md new file mode 100644 index 000000000..5ad9ddbc4 --- /dev/null +++ b/unreleased_history/bug_fixes/upper_bound_autoreadahead.md @@ -0,0 +1 @@ +* When auto_readahead_size is enabled, update readahead upper bound during readahead trimming when reseek changes iterate_upper_bound dynamically. From c4a19ed399f3f8435b6a16e2c84b1667a7ae3d8c Mon Sep 17 00:00:00 2001 From: Sarang Masti Date: Fri, 15 Sep 2023 10:25:57 -0700 Subject: [PATCH 114/386] Add Transaction::CollapseKey to collapse merge op chains ondemand (#11815) Summary: Application using rocksdb today dont have much control over the cost of reads when merge-ops are enabled, expect for waiting for compactions to kick in or using max_successive_merges hint, which only applies to memtable. This change adds Transaction::CollapseKey api giving applications the ability to request merge chain collapse on-demand, when they detect high read costs due to merges. Currently, this only supported on PessimisticTransactions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11815 Test Plan: Add a unit test Reviewed By: ajkr Differential Revision: D49309387 Pulled By: sarangbh fbshipit-source-id: a1eb5cc9e3bd4b3206a36150aacead770318e3e1 --- include/rocksdb/utilities/transaction.h | 9 ++++ .../transactions/pessimistic_transaction.cc | 12 +++++ .../transactions/pessimistic_transaction.h | 4 ++ utilities/transactions/transaction_test.cc | 49 +++++++++++++++++++ utilities/transactions/transaction_test.h | 6 +++ 5 files changed, 80 insertions(+) diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h index 510ff1dd9..e6452056a 100644 --- a/include/rocksdb/utilities/transaction.h +++ b/include/rocksdb/utilities/transaction.h @@ -527,6 +527,15 @@ class Transaction { virtual Status SingleDeleteUntracked(const Slice& key) = 0; + // Collpase the merge chain for the given key. This is can be used by the + // application to trigger an on-demand collpase to a key that has a long + // merge chain to reduce read amplification, without waiting for compaction + // to kick in. + virtual Status CollapseKey(const ReadOptions&, const Slice&, + ColumnFamilyHandle* = nullptr) { + return Status::NotSupported("collpase not supported"); + } + // Similar to WriteBatch::PutLogData virtual void PutLogData(const Slice& blob) = 0; diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index d0ee86540..1e870190e 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -1168,4 +1168,16 @@ Status PessimisticTransaction::SetName(const TransactionName& name) { return s; } +Status PessimisticTransaction::CollapseKey(const ReadOptions& options, + const Slice& key, + ColumnFamilyHandle* column_family) { + auto* cfh = column_family ? column_family : db_impl_->DefaultColumnFamily(); + std::string value; + const auto status = GetForUpdate(options, cfh, key, &value, true, true); + if (!status.ok()) { + return status; + } + return Put(column_family, key, value); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/transactions/pessimistic_transaction.h b/utilities/transactions/pessimistic_transaction.h index dfec50d00..bb12266ec 100644 --- a/utilities/transactions/pessimistic_transaction.h +++ b/utilities/transactions/pessimistic_transaction.h @@ -119,6 +119,10 @@ class PessimisticTransaction : public TransactionBaseImpl { const Endpoint& start_key, const Endpoint& end_key) override; + virtual Status CollapseKey( + const ReadOptions& options, const Slice& key, + ColumnFamilyHandle* column_family = nullptr) override; + protected: // Refer to // TransactionOptions::use_only_the_last_commit_time_batch_for_recovery diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 4552835aa..a122a7dd7 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -6759,6 +6759,55 @@ TEST_P(TransactionTest, UnlockWALStallCleared) { } } +TEST_F(TransactionDBTest, CollapseKey) { + ASSERT_OK(ReOpen()); + ASSERT_OK(db->Put({}, "hello", "world")); + ASSERT_OK(db->Flush({})); + ASSERT_OK(db->Merge({}, "hello", "world")); + ASSERT_OK(db->Flush({})); + ASSERT_OK(db->Merge({}, "hello", "world")); + ASSERT_OK(db->Flush({})); + + std::string value; + ASSERT_OK(db->Get({}, "hello", &value)); + ASSERT_EQ("world,world,world", value); + + // get merge op info + std::vector operands(3); + rocksdb::GetMergeOperandsOptions mergeOperandOptions; + mergeOperandOptions.expected_max_number_of_operands = 3; + int numOperands; + ASSERT_OK(db->GetMergeOperands({}, db->DefaultColumnFamily(), "hello", + operands.data(), &mergeOperandOptions, + &numOperands)); + ASSERT_EQ(3, numOperands); + + // collapse key + { + std::unique_ptr txn0{ + db->BeginTransaction(WriteOptions{}, TransactionOptions{})}; + ASSERT_OK(txn0->CollapseKey(ReadOptions{}, "hello")); + ASSERT_OK(txn0->Commit()); + } + + // merge operands should be 1 + ASSERT_OK(db->GetMergeOperands({}, db->DefaultColumnFamily(), "hello", + operands.data(), &mergeOperandOptions, + &numOperands)); + ASSERT_EQ(1, numOperands); + + // get again after collapse + ASSERT_OK(db->Get({}, "hello", &value)); + ASSERT_EQ("world,world,world", value); + + // collapse of non-existent key + { + std::unique_ptr txn1{ + db->BeginTransaction(WriteOptions{}, TransactionOptions{})}; + ASSERT_TRUE(txn1->CollapseKey(ReadOptions{}, "dummy").IsNotFound()); + } +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/utilities/transactions/transaction_test.h b/utilities/transactions/transaction_test.h index 3b72f21c9..60c5c8a4b 100644 --- a/utilities/transactions/transaction_test.h +++ b/utilities/transactions/transaction_test.h @@ -488,6 +488,12 @@ class TransactionTest std::get<2>(GetParam()), std::get<3>(GetParam())){}; }; +class TransactionDBTest : public TransactionTestBase { + public: + TransactionDBTest() + : TransactionTestBase(false, false, WRITE_COMMITTED, kOrderedWrite) {} +}; + class TransactionStressTest : public TransactionTest {}; class MySQLStyleTransactionTest From ed913513bd1d8a57a92ac8cc2a78407e53e26fe3 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 15 Sep 2023 10:36:14 -0700 Subject: [PATCH 115/386] Fix a bug of rocksdb.file.read.verify.file.checksums.micros not being populated (#11836) Summary: **Context/Summary:** `rocksdb.file.read.verify.file.checksums.micros ` was added in https://github.com/facebook/rocksdb/pull/11444 but the related path was not populated with statistics and clock object correctly so the actual statistics collection didn't happen. This PR fixed it. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11836 Test Plan: Setup: ``` ./db_bench --benchmarks="fillrandom" --file_checksum=1 --num=100 --db=/dev/shm/rocksdb ``` Run: ``` ./db_bench --use_existing_db=1 --benchmarks="verifyfilechecksums" --file_checksum=1 --num=100 --db=/dev/shm/rocksdb --statistics=1 --stats_level=4 ``` Post-PR ``` rocksdb.file.read.verify.file.checksums.micros P50 : 9.000000 P95 : 9.000000 P99 : 9.000000 P100 : 9.000000 COUNT : 1 SUM : 9 ``` Pre-PR ``` rocksdb.file.read.verify.file.checksums.micros P50 : 0.000000 P95 : 0.000000 P99 : 0.000000 P100 : 0.000000 COUNT : 0 SUM : 0 ``` Reviewed By: ajkr Differential Revision: D49293378 Pulled By: hx235 fbshipit-source-id: 1acd8b828c28e088d0c5d63897f53cd180b82f42 --- db/db_impl/db_impl.cc | 3 ++- db/external_sst_file_ingestion_job.cc | 5 +++-- file/file_util.cc | 7 ++++--- file/file_util.h | 4 +++- .../bug_fixes/verify_file_checksum_stat_bug.md | 1 + 5 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 0df899142..52dba4e56 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -6115,7 +6115,8 @@ Status DBImpl::VerifyFullFileChecksum(const std::string& file_checksum_expected, fs_.get(), fname, immutable_db_options_.file_checksum_gen_factory.get(), func_name_expected, &file_checksum, &func_name, read_options.readahead_size, immutable_db_options_.allow_mmap_reads, - io_tracer_, immutable_db_options_.rate_limiter.get(), read_options); + io_tracer_, immutable_db_options_.rate_limiter.get(), read_options, + immutable_db_options_.stats, immutable_db_options_.clock); if (s.ok()) { assert(func_name_expected == func_name); if (file_checksum != file_checksum_expected) { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 9756f47aa..1e461b3a7 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -226,7 +226,8 @@ Status ExternalSstFileIngestionJob::Prepare( &generated_checksum_func_name, ingestion_options_.verify_checksums_readahead_size, db_options_.allow_mmap_reads, io_tracer_, - db_options_.rate_limiter.get(), ro); + db_options_.rate_limiter.get(), ro, db_options_.stats, + db_options_.clock); if (!io_s.ok()) { status = io_s; ROCKS_LOG_WARN(db_options_.info_log, @@ -1067,7 +1068,7 @@ IOStatus ExternalSstFileIngestionJob::GenerateChecksumForIngestedFile( &file_checksum, &file_checksum_func_name, ingestion_options_.verify_checksums_readahead_size, db_options_.allow_mmap_reads, io_tracer_, db_options_.rate_limiter.get(), - ro); + ro, db_options_.stats, db_options_.clock); if (!io_s.ok()) { return io_s; } diff --git a/file/file_util.cc b/file/file_util.cc index c5bb22e48..9eee10637 100644 --- a/file/file_util.cc +++ b/file/file_util.cc @@ -13,6 +13,7 @@ #include "file/sst_file_manager_impl.h" #include "file/writable_file_writer.h" #include "rocksdb/env.h" +#include "rocksdb/statistics.h" namespace ROCKSDB_NAMESPACE { @@ -137,7 +138,7 @@ IOStatus GenerateOneFileChecksum( std::string* file_checksum_func_name, size_t verify_checksums_readahead_size, bool /*allow_mmap_reads*/, std::shared_ptr& io_tracer, RateLimiter* rate_limiter, - const ReadOptions& read_options) { + const ReadOptions& read_options, Statistics* stats, SystemClock* clock) { if (checksum_factory == nullptr) { return IOStatus::InvalidArgument("Checksum factory is invalid"); } @@ -186,8 +187,8 @@ IOStatus GenerateOneFileChecksum( return io_s; } reader.reset(new RandomAccessFileReader( - std::move(r_file), file_path, nullptr /*Env*/, io_tracer, nullptr, - Histograms::HISTOGRAM_ENUM_MAX, nullptr, rate_limiter)); + std::move(r_file), file_path, clock, io_tracer, stats, + Histograms::SST_READ_MICROS, nullptr, rate_limiter)); } // Found that 256 KB readahead size provides the best performance, based on diff --git a/file/file_util.h b/file/file_util.h index 1ee297955..9c95478c7 100644 --- a/file/file_util.h +++ b/file/file_util.h @@ -11,6 +11,7 @@ #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/sst_file_writer.h" +#include "rocksdb/statistics.h" #include "rocksdb/status.h" #include "rocksdb/system_clock.h" #include "rocksdb/types.h" @@ -52,6 +53,7 @@ extern Status DeleteDBFile(const ImmutableDBOptions* db_options, const std::string& path_to_sync, const bool force_bg, const bool force_fg); +// TODO(hx235): pass the whole DBOptions intead of its individual fields extern IOStatus GenerateOneFileChecksum( FileSystem* fs, const std::string& file_path, FileChecksumGenFactory* checksum_factory, @@ -59,7 +61,7 @@ extern IOStatus GenerateOneFileChecksum( std::string* file_checksum_func_name, size_t verify_checksums_readahead_size, bool allow_mmap_reads, std::shared_ptr& io_tracer, RateLimiter* rate_limiter, - const ReadOptions& read_options); + const ReadOptions& read_options, Statistics* stats, SystemClock* clock); inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, SystemClock* clock, IOOptions& opts) { diff --git a/unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md b/unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md new file mode 100644 index 000000000..7c2f921fb --- /dev/null +++ b/unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md @@ -0,0 +1 @@ +Fixed a bug where `rocksdb.file.read.verify.file.checksums.micros` is not populated From 3ebf10e0ac8b4c4522c43a0e56145bccab0ad010 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 15 Sep 2023 10:37:25 -0700 Subject: [PATCH 116/386] Info-log stats level on db open (#11840) Summary: **Context/Summary:** It is useful to ensure users set the stats level right for enable detailed timers like ``rocksdb.file.read.{get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11840 Test Plan: - Manually checking LOG with db bench ``` ./db_bench --benchmarks="fillrandom" --file_checksum=1 --num=100 --db=/dev/shm/rocksdb --statistics=0 --stats_level=2 2023/09/14-15:30:17.139022 2353133 Options.statistics: (nil) 2023/09/14-15:30:17.139025 2353133 Options.use_fsync: 0 ./db_bench --benchmarks="fillrandom" --file_checksum=1 --num=100 --db=/dev/shm/rocksdb --statistics=1 --stats_level=0 2023/09/14-15:30:44.390827 2355026 Options.statistics: 0x7f7c6d449290 2023/09/14-15:30:44.390830 2355026 Options.statistics stats level: 0 2023/09/14-15:30:44.390833 2355026 Options.use_fsync: 0 ./db_bench --benchmarks="fillrandom" --file_checksum=1 --num=100 --db=/dev/shm/rocksdb --statistics=1 --stats_level=4 2023/09/14-15:31:04.466116 2356374 Options.statistics: 0x7f84c8649290 2023/09/14-15:31:04.466119 2356374 Options.statistics stats level: 4 2023/09/14-15:31:04.466122 2356374 Options.use_fsync: 0 ``` Reviewed By: ajkr Differential Revision: D49296354 Pulled By: hx235 fbshipit-source-id: b1b4b911544b6fa8c3fe1dbbd65c3bedfef4b50a --- options/db_options.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/options/db_options.cc b/options/db_options.cc index f009c1a59..b93e35f43 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -794,6 +794,11 @@ void ImmutableDBOptions::Dump(Logger* log) const { max_file_opening_threads); ROCKS_LOG_HEADER(log, " Options.statistics: %p", stats); + if (stats) { + ROCKS_LOG_HEADER( + log, " Options.statistics stats level: %u", + stats->get_stats_level()); + } ROCKS_LOG_HEADER(log, " Options.use_fsync: %d", use_fsync); ROCKS_LOG_HEADER( From b050751f762ad4a6744452a935f7c98c029c1b9e Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 15 Sep 2023 10:38:37 -0700 Subject: [PATCH 117/386] Use default value instead of hard-coded 0 for compaction_readhead_size in db bench (#11831) Summary: **Context/Summary:** It allows db bench reflect the default behavior of this option. For example, we recently changed its default value. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11831 Test Plan: No code change Reviewed By: cbi42 Differential Revision: D49253690 Pulled By: hx235 fbshipit-source-id: 445d4e54f62b4b538626e301a3014d2f00849d30 --- tools/db_bench_tool.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index fe9ba9b11..198f27341 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -718,7 +718,9 @@ DEFINE_int32(file_opening_threads, "If open_files is set to -1, this option set the number of " "threads that will be used to open files during DB::Open()"); -DEFINE_int32(compaction_readahead_size, 0, "Compaction readahead size"); +DEFINE_int32(compaction_readahead_size, + ROCKSDB_NAMESPACE::Options().compaction_readahead_size, + "Compaction readahead size"); DEFINE_int32(log_readahead_size, 0, "WAL and manifest readahead size"); From 68ce5d84f6a58d37e3725607385e1c68cb1254a4 Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 15 Sep 2023 10:44:43 -0700 Subject: [PATCH 118/386] Add new Iterator API Refresh(const snapshot*) (#10594) Summary: This PR resolves https://github.com/facebook/rocksdb/issues/10487 & https://github.com/facebook/rocksdb/issues/10536, user code needs to call Refresh() periodically. The main code change is to support range deletions. A range tombstone iterator uses a sequence number as upper bound to decide which range tombstones are effective. During Iterator refresh, this sequence number upper bound needs to be updated for all range tombstone iterators under DBIter and LevelIterator. LevelIterator may create new table iterators and range tombstone iterator during scanning, so it needs to be aware of iterator refresh. The code path that propagates this change is `db_iter_->set_sequence(read_seq) -> MergingIterator::SetRangeDelReadSeqno() -> TruncatedRangeDelIterator::SetRangeDelReadSeqno() and LevelIterator::SetRangeDelReadSeqno()`. This change also fixes an issue where range tombstone iterators created by LevelIterator may access ReadOptions::snapshot, even though we do not explicitly require users to keep a snapshot alive after creating an Iterator. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10594 Test Plan: * New unit tests. * Add Iterator::Refresh(snapshot) to stress test. Note that this change only adds tests for refreshing to the same snapshot since this is the main target use case. TODO in a following PR: * Stress test Iterator::Refresh() to different snapshots or no snapshot. Reviewed By: ajkr Differential Revision: D48456896 Pulled By: cbi42 fbshipit-source-id: 2e642c04e91235cc9542ef4cd37b3c20823bd779 --- db/arena_wrapped_db_iter.cc | 33 +++-- db/arena_wrapped_db_iter.h | 1 + db/db_impl/db_impl.cc | 2 +- db/db_impl/db_impl_secondary.cc | 2 +- db/db_iter.h | 1 + db/db_iterator_test.cc | 92 +++++++++++--- db/db_range_del_test.cc | 118 ++++++++++++++++++ db/range_del_aggregator.h | 4 + db/range_tombstone_fragmenter.h | 4 + db/table_cache.cc | 6 +- db/table_cache.h | 3 + db/version_set.cc | 57 +++++---- db_stress_tool/db_stress_test_base.cc | 8 ++ include/rocksdb/iterator.h | 14 ++- table/block_based/block_based_table_reader.cc | 10 ++ table/block_based/block_based_table_reader.h | 3 + table/internal_iterator.h | 11 ++ table/iterator_wrapper.h | 5 + table/merging_iterator.cc | 12 ++ table/table_reader.h | 6 + .../new_features/iterator-refresh-snapshot.md | 1 + 21 files changed, 335 insertions(+), 58 deletions(-) create mode 100644 unreleased_history/new_features/iterator-refresh-snapshot.md diff --git a/db/arena_wrapped_db_iter.cc b/db/arena_wrapped_db_iter.cc index b101fbbc7..865b1ad2e 100644 --- a/db/arena_wrapped_db_iter.cc +++ b/db/arena_wrapped_db_iter.cc @@ -19,6 +19,14 @@ namespace ROCKSDB_NAMESPACE { +inline static SequenceNumber GetSeqNum(const DBImpl* db, const Snapshot* s) { + if (s) { + return s->GetSequenceNumber(); + } else { + return db->GetLatestSequenceNumber(); + } +} + Status ArenaWrappedDBIter::GetProperty(std::string prop_name, std::string* prop) { if (prop_name == "rocksdb.iterator.super-version-number") { @@ -54,7 +62,9 @@ void ArenaWrappedDBIter::Init( } } -Status ArenaWrappedDBIter::Refresh() { +Status ArenaWrappedDBIter::Refresh() { return Refresh(nullptr); } + +Status ArenaWrappedDBIter::Refresh(const Snapshot* snapshot) { if (cfd_ == nullptr || db_impl_ == nullptr || !allow_refresh_) { return Status::NotSupported("Creating renew iterator is not allowed."); } @@ -63,6 +73,10 @@ Status ArenaWrappedDBIter::Refresh() { // correct behavior. Will be corrected automatically when we take a snapshot // here for the case of WritePreparedTxnDB. uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); + // If we recreate a new internal iterator below (NewInternalIterator()), + // we will pass in read_options_. We need to make sure it + // has the right snapshot. + read_options_.snapshot = snapshot; TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1"); TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2"); auto reinit_internal_iter = [&]() { @@ -72,18 +86,18 @@ Status ArenaWrappedDBIter::Refresh() { new (&arena_) Arena(); SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); - SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); + SequenceNumber read_seq = GetSeqNum(db_impl_, snapshot); if (read_callback_) { - read_callback_->Refresh(latest_seq); + read_callback_->Refresh(read_seq); } Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, - sv->current, latest_seq, + sv->current, read_seq, sv->mutable_cf_options.max_sequential_skip_in_iterations, cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_, allow_refresh_); InternalIterator* internal_iter = db_impl_->NewInternalIterator( - read_options_, cfd_, sv, &arena_, latest_seq, + read_options_, cfd_, sv, &arena_, read_seq, /* allow_unprepared_value */ true, /* db_iter */ this); SetIterUnderDBIter(internal_iter); }; @@ -92,13 +106,13 @@ Status ArenaWrappedDBIter::Refresh() { reinit_internal_iter(); break; } else { - SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber(); + SequenceNumber read_seq = GetSeqNum(db_impl_, snapshot); // Refresh range-tombstones in MemTable if (!read_options_.ignore_range_deletions) { SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_); TEST_SYNC_POINT_CALLBACK("ArenaWrappedDBIter::Refresh:SV", nullptr); auto t = sv->mem->NewRangeTombstoneIterator( - read_options_, latest_seq, false /* immutable_memtable */); + read_options_, read_seq, false /* immutable_memtable */); if (!t || t->empty()) { // If memtable_range_tombstone_iter_ points to a non-empty tombstone // iterator, then it means sv->mem is not the memtable that @@ -128,9 +142,6 @@ Status ArenaWrappedDBIter::Refresh() { } db_impl_->ReturnAndCleanupSuperVersion(cfd_, sv); } - // Refresh latest sequence number - db_iter_->set_sequence(latest_seq); - db_iter_->set_valid(false); // Check again if the latest super version number is changed uint64_t latest_sv_number = cfd_->GetSuperVersionNumber(); if (latest_sv_number != cur_sv_number) { @@ -139,6 +150,8 @@ Status ArenaWrappedDBIter::Refresh() { cur_sv_number = latest_sv_number; continue; } + db_iter_->set_sequence(read_seq); + db_iter_->set_valid(false); break; } } diff --git a/db/arena_wrapped_db_iter.h b/db/arena_wrapped_db_iter.h index f15be306d..678ea3e78 100644 --- a/db/arena_wrapped_db_iter.h +++ b/db/arena_wrapped_db_iter.h @@ -80,6 +80,7 @@ class ArenaWrappedDBIter : public Iterator { Status GetProperty(std::string prop_name, std::string* prop) override; Status Refresh() override; + Status Refresh(const Snapshot*) override; void Init(Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions, diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 52dba4e56..f546826a3 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3650,7 +3650,7 @@ ArenaWrappedDBIter* DBImpl::NewIteratorImpl( env_, read_options, *cfd->ioptions(), sv->mutable_cf_options, sv->current, snapshot, sv->mutable_cf_options.max_sequential_skip_in_iterations, sv->version_number, read_callback, this, cfd, expose_blob_index, - read_options.snapshot != nullptr ? false : allow_refresh); + allow_refresh); InternalIterator* internal_iter = NewInternalIterator( db_iter->GetReadOptions(), cfd, sv, db_iter->GetArena(), snapshot, diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index e6dd8e080..10680ba1e 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -535,7 +535,7 @@ ArenaWrappedDBIter* DBImplSecondary::NewIteratorImpl( super_version->current, snapshot, super_version->mutable_cf_options.max_sequential_skip_in_iterations, super_version->version_number, read_callback, this, cfd, - expose_blob_index, read_options.snapshot ? false : allow_refresh); + expose_blob_index, allow_refresh); auto internal_iter = NewInternalIterator( db_iter->GetReadOptions(), cfd, super_version, db_iter->GetArena(), snapshot, /* allow_unprepared_value */ true, db_iter); diff --git a/db/db_iter.h b/db/db_iter.h index 163da3265..e45da9dd1 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -209,6 +209,7 @@ class DBIter final : public Iterator { if (read_callback_) { read_callback_->Refresh(s); } + iter_.SetRangeDelReadSeqno(s); } void set_valid(bool v) { valid_ = v; } diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index c982fcff1..4df3448f9 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -2421,32 +2421,92 @@ TEST_P(DBIteratorTest, Refresh) { } TEST_P(DBIteratorTest, RefreshWithSnapshot) { - ASSERT_OK(Put("x", "y")); + // L1 file, uses LevelIterator internally + ASSERT_OK(Put(Key(0), "val0")); + ASSERT_OK(Put(Key(5), "val5")); + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + + // L0 file, uses table iterator internally + ASSERT_OK(Put(Key(1), "val1")); + ASSERT_OK(Put(Key(4), "val4")); + ASSERT_OK(Flush()); + + // Memtable + ASSERT_OK(Put(Key(2), "val2")); + ASSERT_OK(Put(Key(3), "val3")); const Snapshot* snapshot = db_->GetSnapshot(); + ASSERT_OK(Put(Key(2), "new val")); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(4), + Key(7))); + const Snapshot* snapshot2 = db_->GetSnapshot(); + + ASSERT_EQ(1, NumTableFilesAtLevel(1)); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + ReadOptions options; options.snapshot = snapshot; Iterator* iter = NewIterator(options); + ASSERT_OK(Put(Key(6), "val6")); ASSERT_OK(iter->status()); - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); + auto verify_iter = [&](int start, int end, bool new_key2 = false) { + for (int i = start; i < end; ++i) { + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(i)); + if (i == 2 && new_key2) { + ASSERT_EQ(iter->value(), "new val"); + } else { + ASSERT_EQ(iter->value(), "val" + std::to_string(i)); + } + iter->Next(); + } + }; - ASSERT_OK(Put("c", "d")); + for (int j = 0; j < 2; j++) { + iter->Seek(Key(1)); + verify_iter(1, 3); + // Refresh to same snapshot + ASSERT_OK(iter->Refresh(snapshot)); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + iter->Seek(Key(3)); + verify_iter(3, 6); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + + // Refresh to a newer snapshot + ASSERT_OK(iter->Refresh(snapshot2)); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + iter->SeekToFirst(); + verify_iter(0, 4, /*new_key2=*/true); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + + // Refresh to an older snapshot + ASSERT_OK(iter->Refresh(snapshot)); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + iter->Seek(Key(3)); + verify_iter(3, 6); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + + // Refresh to no snapshot + ASSERT_OK(iter->Refresh()); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + iter->Seek(Key(2)); + verify_iter(2, 4, /*new_key2=*/true); + verify_iter(6, 7); + ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + + // Change LSM shape, new SuperVersion is created. + ASSERT_OK(Flush()); - iter->Seek(Slice("a")); - ASSERT_TRUE(iter->Valid()); - ASSERT_EQ(iter->key().compare(Slice("x")), 0); - iter->Next(); - ASSERT_FALSE(iter->Valid()); + // Refresh back to original snapshot + ASSERT_OK(iter->Refresh(snapshot)); + } - ASSERT_OK(iter->status()); - Status s = iter->Refresh(); - ASSERT_TRUE(s.IsNotSupported()); - db_->ReleaseSnapshot(snapshot); delete iter; + db_->ReleaseSnapshot(snapshot); + db_->ReleaseSnapshot(snapshot2); + ASSERT_OK(db_->Close()); } TEST_P(DBIteratorTest, CreationFailure) { diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index a19912aa6..2e93f96d7 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -3643,6 +3643,124 @@ TEST_F(DBRangeDelTest, RangeDelReseekAfterFileReadError) { iter.reset(); } + +TEST_F(DBRangeDelTest, ReleaseSnapshotAfterIteratorCreation) { + // Test that range tombstone code path in LevelIterator + // does access ReadOptions::snapshot after Iterator creation. + // + // Put some data in L2 so that range tombstone in L1 will not be dropped. + ASSERT_OK(Put(Key(0), "v")); + ASSERT_OK(Put(Key(100), "v")); + ASSERT_OK(Flush()); + MoveFilesToLevel(2); + + // two L1 file with range del + ASSERT_OK(Put(Key(1), "v")); + ASSERT_OK(Put(Key(2), "v")); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), + Key(4))); + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + + ASSERT_OK(Put(Key(5), "v")); + ASSERT_OK(Put(Key(6), "v")); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(5), + Key(6))); + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + + ASSERT_EQ(2, NumTableFilesAtLevel(1)); + ASSERT_EQ(1, NumTableFilesAtLevel(2)); + + const Snapshot* snapshot = db_->GetSnapshot(); + ReadOptions ro; + ro.snapshot = snapshot; + + Iterator* iter = db_->NewIterator(ro); + db_->ReleaseSnapshot(snapshot); + + iter->Seek(Key(1)); + std::vector expected_keys{1, 2, 6, 100}; + for (int i : expected_keys) { + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(i)); + iter->Next(); + } + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); + + delete iter; +} + +TEST_F(DBRangeDelTest, RefreshWithSnapshot) { + ASSERT_OK(Put(Key(4), "4")); + ASSERT_OK(Put(Key(6), "6")); + const Snapshot* snapshot = db_->GetSnapshot(); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(3), + Key(5))); + + std::unique_ptr iter{db_->NewIterator(ReadOptions())}; + // Live Memtable + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(6)); + ASSERT_OK(iter->Refresh(snapshot)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(4)); + // Immutable Memtable + ASSERT_OK(dbfull()->TEST_SwitchMemtable()); + ASSERT_OK(iter->Refresh(nullptr)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(6)); + ASSERT_OK(iter->Refresh(snapshot)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(4)); + // L0 + ASSERT_OK(Flush()); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + ASSERT_OK(iter->Refresh(nullptr)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(6)); + ASSERT_OK(iter->Refresh(snapshot)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(4)); + // L1 + MoveFilesToLevel(1); + ASSERT_EQ(1, NumTableFilesAtLevel(1)); + ASSERT_OK(iter->Refresh(nullptr)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(6)); + ASSERT_OK(iter->Refresh(snapshot)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(4)); + // L1 with two file. + // Test that when LevelIterator enters a new file, + // it remembers which snapshot sequence number to use. + ASSERT_OK(Put(Key(2), "2")); + ASSERT_OK(Flush()); + MoveFilesToLevel(1); + ASSERT_EQ(2, NumTableFilesAtLevel(1)); + ASSERT_OK(iter->Refresh(nullptr)); + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + // LevelIterator is at the first file + ASSERT_EQ(iter->key(), Key(2)); + ASSERT_OK(iter->Refresh(snapshot)); + // Will enter the second file, and create a new range tombstone iterator. + // It should use the snapshot sequence number. + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), Key(4)); + iter.reset(); + db_->ReleaseSnapshot(snapshot); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/range_del_aggregator.h b/db/range_del_aggregator.h index dc1e73038..f7fa87af4 100644 --- a/db/range_del_aggregator.h +++ b/db/range_del_aggregator.h @@ -36,6 +36,10 @@ class TruncatedRangeDelIterator { const InternalKeyComparator* icmp, const InternalKey* smallest, const InternalKey* largest); + void SetRangeDelReadSeqno(SequenceNumber read_seqno) { + iter_->SetRangeDelReadSeqno(read_seqno); + } + bool Valid() const; void Next() { iter_->TopNext(); } diff --git a/db/range_tombstone_fragmenter.h b/db/range_tombstone_fragmenter.h index 8c7d98297..ce631d495 100644 --- a/db/range_tombstone_fragmenter.h +++ b/db/range_tombstone_fragmenter.h @@ -148,6 +148,10 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { const InternalKeyComparator& icmp, SequenceNumber upper_bound, const Slice* ts_upper_bound = nullptr, SequenceNumber lower_bound = 0); + void SetRangeDelReadSeqno(SequenceNumber read_seqno) override { + upper_bound_ = read_seqno; + } + void SeekToFirst() override; void SeekToLast() override; diff --git a/db/table_cache.cc b/db/table_cache.cc index 2a4f33505..8b3bc50df 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -224,7 +224,7 @@ InternalIterator* TableCache::NewIterator( size_t max_file_size_for_l0_meta_pin, const InternalKey* smallest_compaction_key, const InternalKey* largest_compaction_key, bool allow_unprepared_value, - uint8_t block_protection_bytes_per_key, + uint8_t block_protection_bytes_per_key, const SequenceNumber* read_seqno, TruncatedRangeDelIterator** range_del_iter) { PERF_TIMER_GUARD(new_table_iterator_nanos); @@ -273,7 +273,9 @@ InternalIterator* TableCache::NewIterator( if (s.ok() && !options.ignore_range_deletions) { if (range_del_iter != nullptr) { auto new_range_del_iter = - table_reader->NewRangeTombstoneIterator(options); + read_seqno ? table_reader->NewRangeTombstoneIterator( + *read_seqno, options.timestamp) + : table_reader->NewRangeTombstoneIterator(options); if (new_range_del_iter == nullptr || new_range_del_iter->empty()) { delete new_range_del_iter; *range_del_iter = nullptr; diff --git a/db/table_cache.h b/db/table_cache.h index 39e41cc6c..67d36d805 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -86,6 +86,8 @@ class TableCache { // not cached), depending on the CF options // @param skip_filters Disables loading/accessing the filter block // @param level The level this table is at, -1 for "not set / don't know" + // @param range_del_read_seqno If non-nullptr, will be used to create + // *range_del_iter. InternalIterator* NewIterator( const ReadOptions& options, const FileOptions& toptions, const InternalKeyComparator& internal_comparator, @@ -97,6 +99,7 @@ class TableCache { const InternalKey* smallest_compaction_key, const InternalKey* largest_compaction_key, bool allow_unprepared_value, uint8_t protection_bytes_per_key, + const SequenceNumber* range_del_read_seqno = nullptr, TruncatedRangeDelIterator** range_del_iter = nullptr); // If a seek to internal key "k" in specified file finds an entry, diff --git a/db/version_set.cc b/db/version_set.cc index 572da83e3..ef6d30944 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -957,18 +957,21 @@ class LevelIterator final : public InternalIterator { flevel_(flevel), prefix_extractor_(prefix_extractor), file_read_hist_(file_read_hist), - should_sample_(should_sample), caller_(caller), - skip_filters_(skip_filters), - allow_unprepared_value_(allow_unprepared_value), file_index_(flevel_->num_files), - level_(level), range_del_agg_(range_del_agg), pinned_iters_mgr_(nullptr), compaction_boundaries_(compaction_boundaries), - is_next_read_sequential_(false), - block_protection_bytes_per_key_(block_protection_bytes_per_key), range_tombstone_iter_(nullptr), + read_seq_(read_options.snapshot + ? read_options.snapshot->GetSequenceNumber() + : kMaxSequenceNumber), + level_(level), + block_protection_bytes_per_key_(block_protection_bytes_per_key), + should_sample_(should_sample), + skip_filters_(skip_filters), + allow_unprepared_value_(allow_unprepared_value), + is_next_read_sequential_(false), to_return_sentinel_(false) { // Empty level is not supported. assert(flevel_ != nullptr && flevel_->num_files > 0); @@ -1056,6 +1059,10 @@ class LevelIterator final : public InternalIterator { bool IsDeleteRangeSentinelKey() const override { return to_return_sentinel_; } + void SetRangeDelReadSeqno(SequenceNumber read_seq) override { + read_seq_ = read_seq; + } + private: // Return true if at least one invalid file is seen and skipped. bool SkipEmptyFileForward(); @@ -1112,7 +1119,7 @@ class LevelIterator final : public InternalIterator { /*arena=*/nullptr, skip_filters_, level_, /*max_file_size_for_l0_meta_pin=*/0, smallest_compaction_key, largest_compaction_key, allow_unprepared_value_, - block_protection_bytes_per_key_, range_tombstone_iter_); + block_protection_bytes_per_key_, &read_seq_, range_tombstone_iter_); } // Check if current file being fully within iterate_lower_bound. @@ -1142,13 +1149,8 @@ class LevelIterator final : public InternalIterator { const std::shared_ptr& prefix_extractor_; HistogramImpl* file_read_hist_; - bool should_sample_; TableReaderCaller caller_; - bool skip_filters_; - bool allow_unprepared_value_; - bool may_be_out_of_lower_bound_ = true; size_t file_index_; - int level_; RangeDelAggregator* range_del_agg_; IteratorWrapper file_iter_; // May be nullptr PinnedIteratorsManager* pinned_iters_mgr_; @@ -1157,10 +1159,6 @@ class LevelIterator final : public InternalIterator { // tombstones. const std::vector* compaction_boundaries_; - bool is_next_read_sequential_; - - uint8_t block_protection_bytes_per_key_; - // This is set when this level iterator is used under a merging iterator // that processes range tombstones. range_tombstone_iter_ points to where the // merging iterator stores the range tombstones iterator for this level. When @@ -1177,20 +1175,29 @@ class LevelIterator final : public InternalIterator { // *range_tombstone_iter_ points to range tombstones of the current SST file TruncatedRangeDelIterator** range_tombstone_iter_; - // Whether next/prev key is a sentinel key. - bool to_return_sentinel_ = false; // The sentinel key to be returned Slice sentinel_; - // Sets flags for if we should return the sentinel key next. - // The condition for returning sentinel is reaching the end of current - // file_iter_: !Valid() && status.().ok(). - void TrySetDeleteRangeSentinel(const Slice& boundary_key); - void ClearSentinel() { to_return_sentinel_ = false; } + SequenceNumber read_seq_; + int level_; + uint8_t block_protection_bytes_per_key_; + bool should_sample_; + bool skip_filters_; + bool allow_unprepared_value_; + bool may_be_out_of_lower_bound_ = true; + bool is_next_read_sequential_; // Set in Seek() when a prefix seek reaches end of the current file, // and the next file has a different prefix. SkipEmptyFileForward() // will not move to next file when this flag is set. bool prefix_exhausted_ = false; + // Whether next/prev key is a sentinel key. + bool to_return_sentinel_ = false; + + // Sets flags for if we should return the sentinel key next. + // The condition for returning sentinel is reaching the end of current + // file_iter_: !Valid() && status.().ok(). + void TrySetDeleteRangeSentinel(const Slice& boundary_key); + void ClearSentinel() { to_return_sentinel_ = false; } }; void LevelIterator::TrySetDeleteRangeSentinel(const Slice& boundary_key) { @@ -2006,7 +2013,8 @@ void Version::AddIteratorsForLevel(const ReadOptions& read_options, /*skip_filters=*/false, /*level=*/0, max_file_size_for_l0_meta_pin_, /*smallest_compaction_key=*/nullptr, /*largest_compaction_key=*/nullptr, allow_unprepared_value, - mutable_cf_options_.block_protection_bytes_per_key, &tombstone_iter); + mutable_cf_options_.block_protection_bytes_per_key, + /*range_del_read_seqno=*/nullptr, &tombstone_iter); if (read_options.ignore_range_deletions) { merge_iter_builder->AddIterator(table_iter); } else { @@ -6956,6 +6964,7 @@ InternalIterator* VersionSet::MakeInputIterator( /*largest_compaction_key=*/nullptr, /*allow_unprepared_value=*/false, c->mutable_cf_options()->block_protection_bytes_per_key, + /*range_del_read_seqno=*/nullptr, /*range_del_iter=*/&range_tombstone_iter); range_tombstones.emplace_back(range_tombstone_iter, nullptr); } diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index d54698435..98de35b8d 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1345,6 +1345,14 @@ Status StressTest::TestIterate(ThreadState* thread, const bool support_seek_first_or_last = expect_total_order; + // Write-prepared and Write-unprepared do not support Refresh() yet. + if (!(FLAGS_use_txn && FLAGS_txn_write_policy != 0 /* write committed */) && + thread->rand.OneIn(4)) { + Status s = iter->Refresh(snapshot_guard.snapshot()); + assert(s.ok()); + op_logs += "Refresh "; + } + LastIterateOp last_op; if (support_seek_first_or_last && thread->rand.OneIn(100)) { iter->SeekToFirst(); diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index 9d4c9f73a..c50c825f3 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -107,10 +107,16 @@ class Iterator : public Cleanable { // satisfied without doing some IO, then this returns Status::Incomplete(). virtual Status status() const = 0; - // If supported, renew the iterator to represent the latest state. The - // iterator will be invalidated after the call. Not supported if - // ReadOptions.snapshot is given when creating the iterator. - virtual Status Refresh() { + // If supported, the DB state that the iterator reads from is updated to + // the latest state. The iterator will be invalidated after the call. + // Regardless of whether the iterator was created/refreshed previously + // with or without a snapshot, the iterator will be reading the + // latest DB state after this call. + virtual Status Refresh() { return Refresh(nullptr); } + + // Similar to Refresh() but the iterator will be reading the latest DB state + // under the given snapshot. + virtual Status Refresh(const class Snapshot*) { return Status::NotSupported("Refresh() is not supported"); } diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 57d65d555..a454762e2 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1913,6 +1913,16 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( snapshot, read_options.timestamp); } +FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( + SequenceNumber read_seqno, const Slice* timestamp) { + if (rep_->fragmented_range_dels == nullptr) { + return nullptr; + } + return new FragmentedRangeTombstoneIterator(rep_->fragmented_range_dels, + rep_->internal_comparator, + read_seqno, timestamp); +} + bool BlockBasedTable::FullFilterKeyMayMatch( FilterBlockReader* filter, const Slice& internal_key, const bool no_io, const SliceTransform* prefix_extractor, GetContext* get_context, diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 4ea4212ae..120907240 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -138,6 +138,9 @@ class BlockBasedTable : public TableReader { FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& read_options) override; + FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( + SequenceNumber read_seqno, const Slice* timestamp) override; + // @param skip_filters Disables loading/accessing the filter block Status Get(const ReadOptions& readOptions, const Slice& key, GetContext* get_context, const SliceTransform* prefix_extractor, diff --git a/table/internal_iterator.h b/table/internal_iterator.h index 8015ed635..060306003 100644 --- a/table/internal_iterator.h +++ b/table/internal_iterator.h @@ -43,6 +43,17 @@ class InternalIteratorBase : public Cleanable { virtual ~InternalIteratorBase() {} + // This iterator will only process range tombstones with sequence + // number <= `read_seqno`. + // Noop for most child classes. + // For range tombstone iterators (TruncatedRangeDelIterator, + // FragmentedRangeTombstoneIterator), will only return range tombstones with + // sequence number <= `read_seqno`. For LevelIterator, it may open new table + // files and create new range tombstone iterators during scanning. It will use + // `read_seqno` as the sequence number for creating new range tombstone + // iterators. + virtual void SetRangeDelReadSeqno(SequenceNumber /* read_seqno */) {} + // An iterator is either positioned at a key/value pair, or // not valid. This method returns true iff the iterator is valid. // Always returns false if !status().ok(). diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index 17abef4ac..3e6f9c1ae 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -30,6 +30,11 @@ class IteratorWrapperBase { } ~IteratorWrapperBase() {} InternalIteratorBase* iter() const { return iter_; } + void SetRangeDelReadSeqno(SequenceNumber read_seqno) { + if (iter_) { + iter_->SetRangeDelReadSeqno(read_seqno); + } + } // Set the underlying Iterator to _iter and return // previous underlying Iterator. diff --git a/table/merging_iterator.cc b/table/merging_iterator.cc index 505cd76d3..247564fe7 100644 --- a/table/merging_iterator.cc +++ b/table/merging_iterator.cc @@ -135,6 +135,18 @@ class MergingIterator : public InternalIterator { status_.PermitUncheckedError(); } + void SetRangeDelReadSeqno(SequenceNumber read_seqno) override { + for (auto& child : children_) { + // This should only be needed for LevelIterator (iterators from L1+). + child.iter.SetRangeDelReadSeqno(read_seqno); + } + for (auto& child : range_tombstone_iters_) { + if (child) { + child->SetRangeDelReadSeqno(read_seqno); + } + } + } + bool Valid() const override { return current_ != nullptr && status_.ok(); } Status status() const override { return status_; } diff --git a/table/table_reader.h b/table/table_reader.h index 53c522052..87610f4fe 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -60,11 +60,17 @@ class TableReader { size_t compaction_readahead_size = 0, bool allow_unprepared_value = false) = 0; + // read_options.snapshot needs to outlive this call. virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( const ReadOptions& /*read_options*/) { return nullptr; } + virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator( + SequenceNumber /* read_seqno */, const Slice* /* timestamp */) { + return nullptr; + } + // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were // present in the file). The returned value is in terms of file diff --git a/unreleased_history/new_features/iterator-refresh-snapshot.md b/unreleased_history/new_features/iterator-refresh-snapshot.md new file mode 100644 index 000000000..f8a0e7b43 --- /dev/null +++ b/unreleased_history/new_features/iterator-refresh-snapshot.md @@ -0,0 +1 @@ +Add a new iterator API `Iterator::Refresh(const Snapshot *)` that allows iterator to be refreshed while using the input snapshot to read. \ No newline at end of file From 051cad3867efbfe1a589d4773fae43b9b74d1e76 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 15 Sep 2023 13:05:23 -0700 Subject: [PATCH 119/386] Fix CI failure due to transaction_test (#11843) Summary: Test ` build-linux-static_lib-alt_namespace-status_checked` has been failing in main branch. ``` utilities/transactions/transaction_test.cc:6777:3: error: 'rocksdb' has not been declared 6777 | rocksdb::GetMergeOperandsOptions mergeOperandOptions; | ^~~~~~~ ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11843 Test Plan: `ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 J=24 transaction_test` Reviewed By: sarangbh Differential Revision: D49330210 Pulled By: cbi42 fbshipit-source-id: 85c99236eeca6a777af0101684fbab5a33cca1c9 --- utilities/transactions/transaction_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index a122a7dd7..0d2e7ccda 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -6774,7 +6774,7 @@ TEST_F(TransactionDBTest, CollapseKey) { // get merge op info std::vector operands(3); - rocksdb::GetMergeOperandsOptions mergeOperandOptions; + GetMergeOperandsOptions mergeOperandOptions; mergeOperandOptions.expected_max_number_of_operands = 3; int numOperands; ASSERT_OK(db->GetMergeOperands({}, db->DefaultColumnFamily(), "hello", From cff6490bc4a63a9162830a2595158ff86bee1d98 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 15 Sep 2023 15:34:04 -0700 Subject: [PATCH 120/386] Add IOActivity.kMultiGetEntity (#11842) Summary: - As a follow up from https://github.com/facebook/rocksdb/issues/11799, adding `Env::IOActivity::kMultiGetEntity` support to `DBImpl::MultiGetEntity()`. ## Minor Refactor - Because both `DBImpl::MultiGet()` and `DBImpl::MultiGetEntity()` call `DBImpl::MultiGetCommon()` which later calls `DBImpl::MultiGetWithCallback()` where we check `Env::IOActivity::kMultiGet`, minor refactor was needed so that we don't check `Env::IOActivity::kMultiGet` for `DBImpl::MultiGetEntity()`. - I still see more areas for refactoring to avoid duplicate code of checking IOActivity and setting it when Unknown, but this will be addressed separately. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11842 Test Plan: - Added the `ThreadStatus::OperationType::OP_MULTIGETENTITY` in `db_stress` to verify the pass-down IOActivity in a thread aligns with the actual activity the thread is doing. ``` python3 tools/db_crashtest.py blackbox --max_key=25000000 --write_buffer_size=4194304 --max_bytes_for_level_base=2097152 --target_file_size_base=2097152 --periodic_compaction_seconds=0 --use_put_entity_one_in=10 --use_get_entity=1 --duration=60 --interval=10 python3 tools/db_crashtest.py blackbox --simple --max_key=25000000 --write_buffer_size=4194304 --max_bytes_for_level_base=2097152 --target_file_size_base=2097152 --periodic_compaction_seconds=0 --use_put_entity_one_in=10 --use_get_entity=1 --duration=60 --interval=10 python3 tools/db_crashtest.py blackbox --cf_consistency --max_key=25000000 --write_buffer_size=4194304 --max_bytes_for_level_base=2097152 --target_file_size_base=2097152 --periodic_compaction_seconds=0 --use_put_entity_one_in=10 --use_get_entity=1 --duration=60 --interval=10 ``` Reviewed By: ltamasi Differential Revision: D49329575 Pulled By: jaykorean fbshipit-source-id: 05198f1d3f92e6be3d42a3d184bacb3ab2ce6923 --- db/db_impl/db_impl.cc | 54 +++++++++++++++++++++++---- db/db_impl/db_impl.h | 5 +++ db_stress_tool/db_stress_test_base.cc | 12 ++---- 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index f546826a3..9a947cf67 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2995,7 +2995,7 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, sorted_keys[i] = &key_context[i]; } PrepareMultiGetKeys(num_keys, sorted_input, &sorted_keys); - MultiGetWithCallback(read_options, column_family, nullptr, &sorted_keys); + MultiGetWithCallbackImpl(read_options, column_family, nullptr, &sorted_keys); } void DBImpl::MultiGetWithCallback( @@ -3012,7 +3012,13 @@ void DBImpl::MultiGetWithCallback( if (read_options.io_activity == Env::IOActivity::kUnknown) { read_options.io_activity = Env::IOActivity::kMultiGet; } + MultiGetWithCallbackImpl(read_options, column_family, callback, sorted_keys); +} +void DBImpl::MultiGetWithCallbackImpl( + const ReadOptions& read_options, ColumnFamilyHandle* column_family, + ReadCallback* callback, + autovector* sorted_keys) { std::array multiget_cf_data; multiget_cf_data[0] = MultiGetColumnFamilyData(column_family, nullptr); std::function* sorted_keys, SuperVersion* sv, SequenceNumber snap_seqnum, ReadCallback* callback); + void MultiGetWithCallbackImpl( + const ReadOptions& read_options, ColumnFamilyHandle* column_family, + ReadCallback* callback, + autovector* sorted_keys); + Status DisableFileDeletionsWithLock(); Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 98de35b8d..57a7aa39b 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1044,6 +1044,7 @@ void StressTest::OperateDb(ThreadState* thread) { if (prob_op >= 0 && prob_op < static_cast(FLAGS_readpercent)) { assert(0 <= prob_op); // OPERATION read + ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); if (FLAGS_use_multi_get_entity) { constexpr uint64_t max_batch_size = 64; const uint64_t batch_size = std::min( @@ -1054,17 +1055,15 @@ void StressTest::OperateDb(ThreadState* thread) { assert(i + batch_size <= ops_per_open); rand_keys = GenerateNKeys(thread, static_cast(batch_size), i); - + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_MULTIGETENTITY); TestMultiGetEntity(thread, read_opts, rand_column_families, rand_keys); - i += batch_size - 1; } else if (FLAGS_use_get_entity) { - ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); ThreadStatusUtil::SetThreadOperation( ThreadStatus::OperationType::OP_GETENTITY); TestGetEntity(thread, read_opts, rand_column_families, rand_keys); - ThreadStatusUtil::ResetThreadStatus(); } else if (FLAGS_use_multiget) { // Leave room for one more iteration of the loop with a single key // batch. This is to ensure that each thread does exactly the same @@ -1075,19 +1074,16 @@ void StressTest::OperateDb(ThreadState* thread) { // If its the last iteration, ensure that multiget_batch_size is 1 multiget_batch_size = std::max(multiget_batch_size, 1); rand_keys = GenerateNKeys(thread, multiget_batch_size, i); - ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); ThreadStatusUtil::SetThreadOperation( ThreadStatus::OperationType::OP_MULTIGET); TestMultiGet(thread, read_opts, rand_column_families, rand_keys); - ThreadStatusUtil::ResetThreadStatus(); i += multiget_batch_size - 1; } else { - ThreadStatusUtil::SetEnableTracking(FLAGS_enable_thread_tracking); ThreadStatusUtil::SetThreadOperation( ThreadStatus::OperationType::OP_GET); TestGet(thread, read_opts, rand_column_families, rand_keys); - ThreadStatusUtil::ResetThreadStatus(); } + ThreadStatusUtil::ResetThreadStatus(); } else if (prob_op < prefix_bound) { assert(static_cast(FLAGS_readpercent) <= prob_op); // OPERATION prefix scan From 1c6faf35871a236222bcbf0b69718ee43376a951 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 15 Sep 2023 15:46:10 -0700 Subject: [PATCH 121/386] Make RibbonFilterPolicy::bloom_before_level mutable (SetOptions()) (#11838) Summary: An internal user wants to be able to dynamically switch between Bloom and Ribbon filters, without a custom FilterPolicy. Making `filter_policy` mutable would actually make issue https://github.com/facebook/rocksdb/issues/10079 worse, because it would be a race on a pointer field, not just on scalars. As a reasonable compromise until that is fixed, I am enabling dynamic control over Bloom vs. Ribbon choice by making RibbonFilterPolicy::bloom_before_level mutable, and doing that safely by using an atomic. I've also slightly tweaked the interpretation of that field so that setting it to INT_MAX really means "always Bloom." Pull Request resolved: https://github.com/facebook/rocksdb/pull/11838 Test Plan: unit tests added/extended. crash test updated for SetOptions call and tested under TSAN with amplified probability (lower set_options_one_in). Reviewed By: ajkr Differential Revision: D49296284 Pulled By: pdillinger fbshipit-source-id: e4251c077510df9a9c719876f482448c0d15402a --- db/db_bloom_filter_test.cc | 58 +++++++++++++++ db_stress_tool/db_stress_common.h | 2 +- db_stress_tool/db_stress_gflags.cc | 7 +- db_stress_tool/db_stress_test_base.cc | 13 +++- include/rocksdb/filter_policy.h | 11 ++- include/rocksdb/utilities/options_type.h | 3 +- options/options_helper.cc | 11 +++ table/block_based/filter_policy.cc | 70 ++++++++++++------- table/block_based/filter_policy_internal.h | 3 +- tools/db_crashtest.py | 2 +- .../new_features/mutable_bloom_before.md | 1 + util/bloom_test.cc | 43 +++++++++--- 12 files changed, 174 insertions(+), 50 deletions(-) create mode 100644 unreleased_history/new_features/mutable_bloom_before.md diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index bdeb5706d..9e9c75473 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -1775,6 +1775,64 @@ TEST_F(DBBloomFilterTest, ContextCustomFilterPolicy) { } } +TEST_F(DBBloomFilterTest, MutatingRibbonFilterPolicy) { + // Test that RibbonFilterPolicy has a mutable bloom_before_level fields that + // can be updated through SetOptions + + Options options = CurrentOptions(); + options.statistics = CreateDBStatistics(); + auto& stats = *options.statistics; + BlockBasedTableOptions table_options; + // First config forces Bloom filter, to establish a baseline before + // SetOptions(). + table_options.filter_policy.reset(NewRibbonFilterPolicy(10, INT_MAX)); + double expected_bpk = 10.0; + // Other configs to try, with approx expected bits per key + std::vector> configs = {{"-1", 7.0}, + {"0", 10.0}}; + + table_options.cache_index_and_filter_blocks = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + ASSERT_OK(TryReopen(options)); + + char v[] = "a"; + + for (;; ++(v[0])) { + const int maxKey = 8000; + for (int i = 0; i < maxKey; i++) { + ASSERT_OK(Put(Key(i), v)); + } + ASSERT_OK(Flush()); + + for (int i = 0; i < maxKey; i++) { + ASSERT_EQ(Get(Key(i)), v); + } + + uint64_t filter_bytes = + stats.getAndResetTickerCount(BLOCK_CACHE_FILTER_BYTES_INSERT); + + EXPECT_NEAR(filter_bytes * 8.0 / maxKey, expected_bpk, 0.3); + + if (configs.empty()) { + break; + } + + ASSERT_OK( + db_->SetOptions({{"table_factory.filter_policy.bloom_before_level", + configs.back().first}})); + + // Ensure original object is mutated + std::string val; + ASSERT_OK( + table_options.filter_policy->GetOption({}, "bloom_before_level", &val)); + ASSERT_EQ(configs.back().first, val); + + expected_bpk = configs.back().second; + configs.pop_back(); + } +} + class SliceTransformLimitedDomain : public SliceTransform { const char* Name() const override { return "SliceTransformLimitedDomain"; } diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 4d011434f..93b5f32d2 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -159,7 +159,7 @@ DECLARE_double(experimental_mempurge_threshold); DECLARE_bool(enable_write_thread_adaptive_yield); DECLARE_int32(reopen); DECLARE_double(bloom_bits); -DECLARE_int32(ribbon_starting_level); +DECLARE_int32(bloom_before_level); DECLARE_bool(partition_filters); DECLARE_bool(optimize_filters_for_memory); DECLARE_bool(detect_filter_construct_corruption); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 776ef357a..e8cf07068 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -526,10 +526,11 @@ DEFINE_double(bloom_bits, 10, "Negative means use default settings."); DEFINE_int32( - ribbon_starting_level, 999, + bloom_before_level, 999, "Use Bloom filter on levels below specified and Ribbon beginning on level " - "specified. Flush is considered level -1. 999 or more -> always Bloom. 0 " - "-> Ribbon except Bloom for flush. -1 -> always Ribbon."); + "specified. Flush is considered level -1. Setting -1 -> always Ribbon. " + "0 -> Ribbon except Bloom for flush. INT_MAX (typically 2147483647) -> " + "always Bloom."); DEFINE_bool(partition_filters, false, "use partitioned filters " diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 57a7aa39b..e7df7cf0b 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -39,12 +39,12 @@ std::shared_ptr CreateFilterPolicy() { return BlockBasedTableOptions().filter_policy; } const FilterPolicy* new_policy; - if (FLAGS_ribbon_starting_level >= 999) { + if (FLAGS_bloom_before_level == INT_MAX) { // Use Bloom API new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false); } else { - new_policy = NewRibbonFilterPolicy( - FLAGS_bloom_bits, /* bloom_before_level */ FLAGS_ribbon_starting_level); + new_policy = + NewRibbonFilterPolicy(FLAGS_bloom_bits, FLAGS_bloom_before_level); } return std::shared_ptr(new_policy); } @@ -273,6 +273,13 @@ bool StressTest::BuildOptionsTable() { std::vector{"kDisable", "kFlushOnly"}); } + if (FLAGS_bloom_before_level != INT_MAX) { + // Can modify RibbonFilterPolicy field + options_tbl.emplace("table_factory.filter_policy.bloom_before_level", + std::vector{"-1", "0", "1", "2", + "2147483646", "2147483647"}); + } + options_table_ = std::move(options_tbl); for (const auto& iter : options_table_) { diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 954d15b4a..039b826de 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -162,7 +162,7 @@ class FilterPolicy : public Customizable { // ignores trailing spaces, it would be incorrect to use a // FilterPolicy (like NewBloomFilterPolicy) that does not ignore // trailing spaces in keys. -extern const FilterPolicy* NewBloomFilterPolicy( +const FilterPolicy* NewBloomFilterPolicy( double bits_per_key, bool IGNORED_use_block_based_builder = false); // A new Bloom alternative that saves about 30% space compared to @@ -184,6 +184,11 @@ extern const FilterPolicy* NewBloomFilterPolicy( // flushes under Level and Universal compaction styles. // bloom_before_level=-1 -> Always generate Ribbon filters (except in // some extreme or exceptional cases). +// bloom_before_level=INT_MAX -> Always generate Bloom filters. +// +// The bloom_before_level option is mutable in the Configurable interface +// and through the SetOptions() API, as in +// db->SetOptions({{"table_factory.filter_policy.bloom_before_level", "3"}}); // // Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier // versions reading the data will behave as if no filter was used @@ -200,7 +205,7 @@ extern const FilterPolicy* NewBloomFilterPolicy( // // Also consider using optimize_filters_for_memory to save filter // memory. -extern const FilterPolicy* NewRibbonFilterPolicy( - double bloom_equivalent_bits_per_key, int bloom_before_level = 0); +FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key, + int bloom_before_level = 0); } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/utilities/options_type.h b/include/rocksdb/utilities/options_type.h index cd340ed59..782b14e65 100644 --- a/include/rocksdb/utilities/options_type.h +++ b/include/rocksdb/utilities/options_type.h @@ -40,8 +40,9 @@ enum class OptionType { kUInt32T, kUInt64T, kSizeT, - kString, kDouble, + kAtomicInt, + kString, kCompactionStyle, kCompactionPri, kCompressionType, diff --git a/options/options_helper.cc b/options/options_helper.cc index 65467f765..8d32640c9 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -4,6 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). #include "options/options_helper.h" +#include #include #include #include @@ -432,6 +433,10 @@ static bool ParseOptionHelper(void* opt_address, const OptionType& opt_type, case OptionType::kSizeT: PutUnaligned(static_cast(opt_address), ParseSizeT(value)); break; + case OptionType::kAtomicInt: + static_cast*>(opt_address) + ->store(ParseInt(value), std::memory_order_release); + break; case OptionType::kString: *static_cast(opt_address) = value; break; @@ -521,6 +526,10 @@ bool SerializeSingleOptionHelper(const void* opt_address, case OptionType::kDouble: *value = std::to_string(*(static_cast(opt_address))); break; + case OptionType::kAtomicInt: + *value = std::to_string(static_cast*>(opt_address) + ->load(std::memory_order_acquire)); + break; case OptionType::kString: *value = EscapeOptionString(*(static_cast(opt_address))); @@ -1169,6 +1178,8 @@ static bool AreOptionsEqual(OptionType type, const void* this_offset, GetUnaligned(static_cast(that_offset), &v2); return (v1 == v2); } + case OptionType::kAtomicInt: + return IsOptionEqual>(this_offset, that_offset); case OptionType::kString: return IsOptionEqual(this_offset, that_offset); case OptionType::kDouble: diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 36f3b16d4..19b880a90 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -10,6 +10,7 @@ #include "rocksdb/filter_policy.h" #include +#include #include #include #include @@ -24,6 +25,7 @@ #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "rocksdb/utilities/object_registry.h" +#include "rocksdb/utilities/options_type.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/filter_policy_internal.h" #include "table/block_based/full_filter_block.h" @@ -1730,7 +1732,15 @@ const FilterPolicy* NewBloomFilterPolicy(double bits_per_key, RibbonFilterPolicy::RibbonFilterPolicy(double bloom_equivalent_bits_per_key, int bloom_before_level) : BloomLikeFilterPolicy(bloom_equivalent_bits_per_key), - bloom_before_level_(bloom_before_level) {} + bloom_before_level_(bloom_before_level) { + static const std::unordered_map type_info = { + {"bloom_before_level", + {offsetof(class RibbonFilterPolicy, bloom_before_level_), + OptionType::kAtomicInt, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, + }; + RegisterOptions(this, &type_info); +} FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext( const FilterBuildingContext& context) const { @@ -1738,31 +1748,38 @@ FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext( // "No filter" special case return nullptr; } - // Treat unknown same as bottommost - int levelish = INT_MAX; - - switch (context.compaction_style) { - case kCompactionStyleLevel: - case kCompactionStyleUniversal: { - if (context.reason == TableFileCreationReason::kFlush) { - // Treat flush as level -1 - assert(context.level_at_creation == 0); - levelish = -1; - } else if (context.level_at_creation == -1) { - // Unknown level - assert(levelish == INT_MAX); - } else { - levelish = context.level_at_creation; + // Treat unknown same as bottommost, INT_MAX - 1. + // INT_MAX is reserved for "always use Bloom". + int levelish = INT_MAX - 1; + + int bloom_before_level = bloom_before_level_.load(std::memory_order_relaxed); + if (bloom_before_level < INT_MAX) { + switch (context.compaction_style) { + case kCompactionStyleLevel: + case kCompactionStyleUniversal: { + if (context.reason == TableFileCreationReason::kFlush) { + // Treat flush as level -1 + assert(context.level_at_creation == 0); + levelish = -1; + } else if (context.level_at_creation == -1) { + // Unknown level + assert(levelish == INT_MAX - 1); + } else { + levelish = context.level_at_creation; + } + break; } - break; + case kCompactionStyleFIFO: + case kCompactionStyleNone: + // Treat as bottommost + assert(levelish == INT_MAX - 1); + break; } - case kCompactionStyleFIFO: - case kCompactionStyleNone: - // Treat as bottommost - assert(levelish == INT_MAX); - break; + } else { + // INT_MAX == always Bloom + assert(levelish < bloom_before_level); } - if (levelish < bloom_before_level_) { + if (levelish < bloom_before_level) { return GetFastLocalBloomBuilderWithContext(context); } else { return GetStandard128RibbonBuilderWithContext(context); @@ -1771,14 +1788,15 @@ FilterBitsBuilder* RibbonFilterPolicy::GetBuilderWithContext( const char* RibbonFilterPolicy::kClassName() { return "ribbonfilter"; } const char* RibbonFilterPolicy::kNickName() { return "rocksdb.RibbonFilter"; } +const char* RibbonFilterPolicy::kName() { return "RibbonFilterPolicy"; } std::string RibbonFilterPolicy::GetId() const { return BloomLikeFilterPolicy::GetId() + ":" + - std::to_string(bloom_before_level_); + std::to_string(bloom_before_level_.load(std::memory_order_acquire)); } -const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key, - int bloom_before_level) { +FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key, + int bloom_before_level) { return new RibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level); } diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index 9bc3a2482..3919c8c6d 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -290,10 +290,11 @@ class RibbonFilterPolicy : public BloomLikeFilterPolicy { const char* Name() const override { return kClassName(); } static const char* kNickName(); const char* NickName() const override { return kNickName(); } + static const char* kName(); std::string GetId() const override; private: - const int bloom_before_level_; + std::atomic bloom_before_level_; }; // For testing only, but always constructable with internal names diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 8f3219ee6..8b4b36c74 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -133,7 +133,7 @@ # use_put_entity_one_in has to be the same across invocations for verification to work, hence no lambda "use_put_entity_one_in": random.choice([0] * 7 + [1, 5, 10]), # 999 -> use Bloom API - "ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]), + "bloom_before_level": lambda: random.choice([random.randint(-1, 2), random.randint(-1, 10), 0x7fffffff - 1, 0x7fffffff]), "value_size_mult": 32, "verification_only": 0, "verify_checksum": 1, diff --git a/unreleased_history/new_features/mutable_bloom_before.md b/unreleased_history/new_features/mutable_bloom_before.md new file mode 100644 index 000000000..c811b6aea --- /dev/null +++ b/unreleased_history/new_features/mutable_bloom_before.md @@ -0,0 +1 @@ +For `NewRibbonFilterPolicy()`, made the `bloom_before_level` option mutable through the Configurable interface and the SetOptions API, allowing dynamic switching between all-Bloom and all-Ribbon configurations, and configurations in between. See comments on `NewRibbonFilterPolicy()` diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 06dd1de06..b0a5cae56 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -23,6 +23,7 @@ int main() { #include "cache/cache_reservation_manager.h" #include "memory/arena.h" #include "port/jemalloc_helper.h" +#include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "table/block_based/filter_policy_internal.h" #include "test_util/testharness.h" @@ -1109,12 +1110,16 @@ static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) { TEST(RibbonTest, RibbonTestLevelThreshold) { BlockBasedTableOptions opts; FilterBuildingContext ctx(opts); + + std::shared_ptr reused{NewRibbonFilterPolicy(10)}; + // A few settings for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal, kCompactionStyleFIFO, kCompactionStyleNone}) { ctx.compaction_style = cs; - for (int bloom_before_level : {-1, 0, 1, 10}) { - std::vector > policies; + for (int bloom_before_level : {-1, 0, 1, 10, INT_MAX - 1, INT_MAX}) { + SCOPED_TRACE("bloom_before_level=" + std::to_string(bloom_before_level)); + std::vector > policies; policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level)); if (bloom_before_level == 0) { @@ -1122,16 +1127,22 @@ TEST(RibbonTest, RibbonTestLevelThreshold) { policies.emplace_back(NewRibbonFilterPolicy(10)); } - for (std::unique_ptr& policy : policies) { - // Claim to be generating filter for this level - SetTestingLevel(bloom_before_level, &ctx); + ASSERT_OK(reused->ConfigureOption({}, "bloom_before_level", + std::to_string(bloom_before_level))); - std::unique_ptr builder{ - policy->GetBuilderWithContext(ctx)}; + policies.push_back(reused); - // Must be Ribbon (more space efficient than 10 bits per key) - ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); + for (auto& policy : policies) { + std::unique_ptr builder; + if (bloom_before_level < INT_MAX) { + // Claim to be generating filter for this level + SetTestingLevel(bloom_before_level, &ctx); + + builder.reset(policy->GetBuilderWithContext(ctx)); + // Must be Ribbon (more space efficient than 10 bits per key) + ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); + } if (bloom_before_level >= 0) { // Claim to be generating filter for previous level SetTestingLevel(bloom_before_level - 1, &ctx); @@ -1142,6 +1153,10 @@ TEST(RibbonTest, RibbonTestLevelThreshold) { // Level is considered. // Must be Bloom (~ 10 bits per key) ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9); + } else if (bloom_before_level == INT_MAX) { + // Force bloom option + // Must be Bloom (~ 10 bits per key) + ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9); } else { // Level is ignored under non-traditional compaction styles. // Must be Ribbon (more space efficient than 10 bits per key) @@ -1155,8 +1170,14 @@ TEST(RibbonTest, RibbonTestLevelThreshold) { builder.reset(policy->GetBuilderWithContext(ctx)); - // Must be Ribbon (more space efficient than 10 bits per key) - ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); + if (bloom_before_level < INT_MAX) { + // Must be Ribbon (more space efficient than 10 bits per key) + ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8); + } else { + // Force bloom option + // Must be Bloom (~ 10 bits per key) + ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9); + } } } } From 99f8820054898d44e5cd5f67edd8c940ce1633bb Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 15 Sep 2023 22:50:49 -0700 Subject: [PATCH 122/386] Fix test on IOActivity check for MultiGetEntity (#11850) Summary: After https://github.com/facebook/rocksdb/issues/11842 merged, we started to see some crash_test failures. There is a flow inside `TestMultiGetEntity()` that it calls `GetEntity()` to compare the result between `MultiGetEntity()` and `GetEntity()` https://github.com/facebook/rocksdb/blob/1c6faf35871a236222bcbf0b69718ee43376a951/db_stress_tool/no_batched_ops_stress.cc#L1068-L1072 However, IOActivity check inside DbStressRandomAccessFileWrapper was expecting IOActivity::MultiGet when GetEntity() was called. We are fixing the test by setting expected operation to be GetEntity before calling GetEntity() Pull Request resolved: https://github.com/facebook/rocksdb/pull/11850 Test Plan: Error repro'ed by the following run before fix and no more error after the fix. ``` ./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=0 --allow_concurrent_memtable_write=0 --allow_data_in_errors=True --async_io=1 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=8 --block_protection_bytes_per_key=0 --block_size=16384 --bloom_before_level=1 --bloom_bits=9.880688060667444 --bottommost_compression_type=zstd --bottommost_file_compaction_delay=86400 --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=0 --charge_filter_construction=0 --charge_table_reader=1 --checkpoint_one_in=1000000 --checksum_type=kxxHash64 --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=3 --compaction_readahead_size=1048576 --compaction_ttl=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=none --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=1 --db=/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=0 --delpercent=4 --delrangepercent=1 --destroy_db_initially=0 --detect_filter_construct_corruption=1 --disable_wal=0 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=1 --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=0 --fifo_allow_compaction=1 --file_checksum_impl=big --flush_one_in=1000000 --format_version=6 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=4 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=0 --lock_wal_one_in=1000000 --long_running_snapshots=0 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=0 --max_auto_readahead_size=524288 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --max_key=25000000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16777216 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=2097152 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=2 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=2 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=0 --open_files=-1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=0 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=3 --pause_background_one_in=1000000 --periodic_compaction_seconds=1 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --read_fault_one_in=32 --readahead_size=16384 --readpercent=50 --recycle_log_file_num=1 --reopen=0 --secondary_cache_fault_one_in=0 --secondary_cache_uri=compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true --set_options_one_in=0 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --subcompactions=2 --sync=0 --sync_fault_injection=1 --target_file_size_base=16777216 --target_file_size_multiplier=1 --test_batches_snapshots=0 --top_level_index_pinning=3 --unpartitioned_pinning=0 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=1 --use_multi_get_entity=1 --use_multiget=1 --use_put_entity_one_in=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --wal_compression=none --write_buffer_size=33554432 --write_dbid_to_manifest=0 --writepercent=35 ``` Reviewed By: cbi42 Differential Revision: D49344996 Pulled By: jaykorean fbshipit-source-id: 8059b8127c0e3cb8af96cf222f47398413c92c50 --- db_stress_tool/no_batched_ops_stress.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 9fc818b09..ebc252a63 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1067,7 +1067,8 @@ class NonBatchedOpsStressTest : public StressTest { is_consistent = false; } else if (check_get_entity && (s.ok() || s.IsNotFound())) { PinnableWideColumns cmp_result; - + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_GETENTITY); const Status cmp_s = db_->GetEntity(read_opts_copy, cfh, key_slices[i], &cmp_result); From 60de713e1555ad48779c1b2ccec4d1491e64e21a Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Sat, 16 Sep 2023 12:08:55 -0700 Subject: [PATCH 123/386] Use uint64_t for `compaction_readahead_size` in stress test (#11849) Summary: Internal clang check complains: `tools/db_bench_tool.cc:722:43: error: implicit conversion loses integer precision: 'size_t' (aka 'unsigned long') to 'const gflags::int32' (aka 'const int') [-Werror,-Wshorten-64-to-32] ROCKSDB_NAMESPACE::Options().compaction_readahead_size,` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11849 Test Plan: `make -C internal_repo_rocksdb/repo -j64 USE_CLANG=1 J=40 check`, I can only repro when using on-demand devserver. Reviewed By: hx235 Differential Revision: D49344491 Pulled By: cbi42 fbshipit-source-id: 8c2c0bf2a075c3190b8b91f14f64e26ee252f20f --- tools/db_bench_tool.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 198f27341..f6662c09f 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -718,9 +718,9 @@ DEFINE_int32(file_opening_threads, "If open_files is set to -1, this option set the number of " "threads that will be used to open files during DB::Open()"); -DEFINE_int32(compaction_readahead_size, - ROCKSDB_NAMESPACE::Options().compaction_readahead_size, - "Compaction readahead size"); +DEFINE_uint64(compaction_readahead_size, + ROCKSDB_NAMESPACE::Options().compaction_readahead_size, + "Compaction readahead size"); DEFINE_int32(log_readahead_size, 0, "WAL and manifest readahead size"); From 1a9b42bbddf269e2398439de5d14e50c49a4ef52 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Sun, 17 Sep 2023 19:51:28 -0700 Subject: [PATCH 124/386] Add C API for ReadOptions::auto_readahead_size (#11837) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11837 Reviewed By: cbi42 Differential Revision: D49303447 Pulled By: ajkr fbshipit-source-id: 7debf722339f4fd551760ef8d6801b7a41498565 --- db/c.cc | 5 +++++ include/rocksdb/c.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/db/c.cc b/db/c.cc index 0e508d326..05935d539 100644 --- a/db/c.cc +++ b/db/c.cc @@ -4589,6 +4589,11 @@ void rocksdb_readoptions_set_iter_start_ts(rocksdb_readoptions_t* opt, } } +void rocksdb_readoptions_set_auto_readahead_size(rocksdb_readoptions_t* opt, + unsigned char v) { + opt->rep.auto_readahead_size = v; +} + rocksdb_writeoptions_t* rocksdb_writeoptions_create() { return new rocksdb_writeoptions_t; } diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index f215c899d..eecbe1c64 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1946,6 +1946,8 @@ extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_timestamp( rocksdb_readoptions_t*, const char* ts, size_t tslen); extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_iter_start_ts( rocksdb_readoptions_t*, const char* ts, size_t tslen); +extern ROCKSDB_LIBRARY_API void rocksdb_readoptions_set_auto_readahead_size( + rocksdb_readoptions_t*, unsigned char); /* Write options */ From 4196ad81e372f3adac77e14abfd48189a2119441 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 18 Sep 2023 09:26:29 -0700 Subject: [PATCH 125/386] LZ4 set acceleration parameter (#11844) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11844 Test Plan: Command: ``` for level in 1 1234 32767 -1 -10 -100 -1000 -10000 -100000 -1000000; do echo -n "level=$level " && ./db_bench -benchmarks=compress -compression_type=lz4 -compression_level=$level |& awk '/^compress / {print $5, $6}' ; done ``` Output: ``` level=1 181340 ops/sec level=1234 183197 ops/sec level=32767 181480 ops/sec level=-1 181053 ops/sec level=-10 662858 ops/sec level=-100 2611516 ops/sec level=-1000 3043125 ops/sec level=-10000 3001351 ops/sec level=-100000 2861834 ops/sec level=-1000000 2906413 ops/sec ``` Reviewed By: cbi42 Differential Revision: D49331443 Pulled By: ajkr fbshipit-source-id: c8909708c3b2b9b83bf2bda2d3f24b8a92d4c2ea --- include/rocksdb/advanced_options.h | 11 +++++++++-- include/rocksdb/options.h | 1 + .../compression_options_level_lz4.md | 1 + util/compression.h | 12 +++++++++--- 4 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 unreleased_history/public_api_changes/compression_options_level_lz4.md diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 08e4e08b7..4f481a8ee 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -76,9 +76,16 @@ struct CompressionOptions { // zlib only: windowBits parameter. See https://www.zlib.net/manual.html int window_bits = -14; - // Compression "level" applicable to zstd, zlib, LZ4. Except for + // Compression "level" applicable to zstd, zlib, LZ4, and LZ4HC. Except for // kDefaultCompressionLevel (see above), the meaning of each value depends - // on the compression algorithm. + // on the compression algorithm. Decreasing across non- + // `kDefaultCompressionLevel` values will either favor speed over + // compression ratio or have no effect. + // + // In LZ4 specifically, the absolute value of a negative `level` internally + // configures the `acceleration` parameter. For example, set `level=-10` for + // `acceleration=10`. This negation is necessary to ensure decreasing `level` + // values favor speed over compression ratio. int level = kDefaultCompressionLevel; // zlib only: strategy parameter. See https://www.zlib.net/manual.html diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index d11ccc62f..2d164eb30 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -206,6 +206,7 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions { // - kZSTD: 3 // - kZlibCompression: Z_DEFAULT_COMPRESSION (currently -1) // - kLZ4HCCompression: 0 + // - kLZ4: -1 (i.e., `acceleration=1`; see `CompressionOptions::level` doc) // - For all others, we do not specify a compression level // // Dynamically changeable through SetOptions() API diff --git a/unreleased_history/public_api_changes/compression_options_level_lz4.md b/unreleased_history/public_api_changes/compression_options_level_lz4.md new file mode 100644 index 000000000..b0f0b56f4 --- /dev/null +++ b/unreleased_history/public_api_changes/compression_options_level_lz4.md @@ -0,0 +1 @@ +* When using LZ4 compression, the `acceleration` parameter is configurable by setting the negated value in `CompressionOptions::level`. For example, `CompressionOptions::level=-10` will set `acceleration=10` diff --git a/util/compression.h b/util/compression.h index 3e21a669b..5620969d7 100644 --- a/util/compression.h +++ b/util/compression.h @@ -1153,9 +1153,15 @@ inline bool LZ4_Compress(const CompressionInfo& info, static_cast(compression_dict.size())); } #if LZ4_VERSION_NUMBER >= 10700 // r129+ - outlen = - LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len], - static_cast(length), compress_bound, 1); + int acceleration; + if (info.options().level < 0) { + acceleration = -info.options().level; + } else { + acceleration = 1; + } + outlen = LZ4_compress_fast_continue( + stream, input, &(*output)[output_header_len], static_cast(length), + compress_bound, acceleration); #else // up to r128 outlen = LZ4_compress_limitedOutput_continue( stream, input, &(*output)[output_header_len], static_cast(length), From 6997a06c63b152de76b9933fee347c6afb5ccde9 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 18 Sep 2023 09:37:40 -0700 Subject: [PATCH 126/386] Invalidate threadlocal SV before incrementing `super_version_number_` (#11848) Summary: CI has been hitting assertion error like ``` https://github.com/facebook/rocksdb/issues/8 0x00007fafd9294fd6 in __GI___assert_fail (assertion=assertion@entry=0x7fafda270300 "!*memtable_range_tombstone_iter_ || sv_number_ != cfd_->GetSuperVersionNumber()", file=file@entry=0x7fafda270350 "db/arena_wrapped_db_iter.cc", line=line@entry=124, function=function@entry=0x7fafda270288 "virtual rocksdb::Status rocksdb::ArenaWrappedDBIter::Refresh(const rocksdb::Snapshot*)") at assert.c:101 ``` This is due to * Iterator::Refresh() passing in `cur_sv_number` instead of `sv->version_number` here: https://github.com/facebook/rocksdb/blob/1c6faf35871a236222bcbf0b69718ee43376a951/db/arena_wrapped_db_iter.cc#L94-L96 * `super_version_number_` can be incremented before thread local SV is installed: https://github.com/facebook/rocksdb/blob/main/db/column_family.cc#L1287-L1306 * The optimization in https://github.com/facebook/rocksdb/issues/11452 removed the check for SV number, such that `cur_sv_number > sv.version_number` is possible in the following code. ``` uint64_t cur_sv_number = cfd_->GetSuperVersionNumber(); SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); ``` Not sure why assertion only started failing after https://github.com/facebook/rocksdb/issues/10594, maybe it's because Refresh() is called more often in stress test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11848 Test Plan: * This repros hits the assertion pretty consistently before this change: ``` ./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_data_in_errors=True --async_io=0 --atomic_flush=1 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_one_in=0 --block_size=16384 --bloom_bits=0.7161318870366848 --cache_index_and_filter_blocks=0 --cache_size=8388608 --charge_table_reader=0 --checkpoint_one_in=1000000 --checksum_type=kxxHash --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=3 --compaction_readahead_size=0 --compaction_ttl=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=zlib --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=0 --db_write_buffer_size=8388608 --delpercent=4 --delrangepercent=1 --destroy_db_initially=1 --detect_filter_construct_corruption=0 --disable_wal=1 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=1 --fail_if_options_file_error=0 --fifo_allow_compaction=1 --file_checksum_impl=none --flush_one_in=1000000 --format_version=5 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=14 --index_type=2 --ingest_external_file_one_in=0 --initial_auto_readahead_size=524288 --iterpercent=30 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --long_running_snapshots=1 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=0 --max_auto_readahead_size=524288 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=2500000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16777216 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=1048576 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0.5 --memtable_protection_bytes_per_key=0 --memtable_whole_key_filtering=1 --memtablerep=skip_list --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=500000 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=0 --partition_filters=0 --partition_pinning=3 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --read_fault_one_in=32 --readahead_size=16384 --readpercent=30 --recycle_log_file_num=1 --reopen=0 --ribbon_starting_level=999 --secondary_cache_fault_one_in=0 --secondary_cache_uri= --set_options_one_in=10000 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=600 --subcompactions=1 --sync=0 --sync_fault_injection=1 --target_file_size_base=2097152 --target_file_size_multiplier=2 --test_batches_snapshots=0 --test_cf_consistency=1 --top_level_index_pinning=3 --unpartitioned_pinning=3 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=0 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=0 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=none --write_buffer_size=1048576 --write_dbid_to_manifest=1 --write_fault_one_in=0 --writepercent=35 --use_io_uring=0 --db=/tmp/rocksdb_crashtest_blackboxnf3pyv_0 --expected_values_dir=/tmp/rocksdb_crashtest_expected_6opy9nqg ``` Reviewed By: ajkr Differential Revision: D49344066 Pulled By: cbi42 fbshipit-source-id: d5373ddb48d933acb42a5dd8fae3f3019b0241e5 --- db/arena_wrapped_db_iter.cc | 3 ++- db/column_family.cc | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/db/arena_wrapped_db_iter.cc b/db/arena_wrapped_db_iter.cc index 865b1ad2e..e6dcb6696 100644 --- a/db/arena_wrapped_db_iter.cc +++ b/db/arena_wrapped_db_iter.cc @@ -86,6 +86,7 @@ Status ArenaWrappedDBIter::Refresh(const Snapshot* snapshot) { new (&arena_) Arena(); SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_); + assert(sv->version_number >= cur_sv_number); SequenceNumber read_seq = GetSeqNum(db_impl_, snapshot); if (read_callback_) { read_callback_->Refresh(read_seq); @@ -93,7 +94,7 @@ Status ArenaWrappedDBIter::Refresh(const Snapshot* snapshot) { Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options, sv->current, read_seq, sv->mutable_cf_options.max_sequential_skip_in_iterations, - cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_, + sv->version_number, read_callback_, db_impl_, cfd_, expose_blob_index_, allow_refresh_); InternalIterator* internal_iter = db_impl_->NewInternalIterator( diff --git a/db/column_family.cc b/db/column_family.cc index 9c126f3c0..8bc3c43e0 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1284,8 +1284,6 @@ void ColumnFamilyData::InstallSuperVersion( new_superversion->Init(this, mem_, imm_.current(), current_); SuperVersion* old_superversion = super_version_; super_version_ = new_superversion; - ++super_version_number_; - super_version_->version_number = super_version_number_; if (old_superversion == nullptr || old_superversion->current != current() || old_superversion->mem != mem_ || old_superversion->imm != imm_.current()) { @@ -1320,6 +1318,8 @@ void ColumnFamilyData::InstallSuperVersion( sv_context->superversions_to_free.push_back(old_superversion); } } + ++super_version_number_; + super_version_->version_number = super_version_number_; } void ColumnFamilyData::ResetThreadLocalSuperVersions() { From 5b5b011cdd8429ce5bcdf35a058ea747cef2083b Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Mon, 18 Sep 2023 11:32:30 -0700 Subject: [PATCH 127/386] Avoid double block cache lookup during Seek with async_io option (#11616) Summary: With the async_io option, the Seek happens in 2 phases. Phase 1 starts an asynchronous read on a block cache miss, and phase 2 waits for it to complete and finishes the seek. In both phases, BlockBasedTable::NewDataBlockIterator is called, which tries to lookup the block cache for the data block first before looking in the prefetch buffer. It's optimized by doing the block cache lookup only in the first phase and save some CPU. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11616 Test Plan: Added unit test Reviewed By: jaykorean Differential Revision: D47477887 Pulled By: akankshamahajan15 fbshipit-source-id: 0355e0a68fc0ea2eb92340ae42735afcdbcbfd79 --- file/prefetch_test.cc | 63 +++++++++++++++ .../block_based/block_based_table_iterator.cc | 9 ++- table/block_based/block_based_table_reader.cc | 81 ++++++++++--------- table/block_based/block_based_table_reader.h | 19 +++-- .../block_based_table_reader_impl.h | 12 +-- .../block_based_table_reader_sync_and_async.h | 17 ++-- .../block_based/filter_block_reader_common.cc | 12 +-- table/block_based/index_reader_common.cc | 2 +- table/block_based/partitioned_filter_block.cc | 15 ++-- .../block_based/partitioned_index_iterator.cc | 3 +- table/block_based/partitioned_index_reader.cc | 2 +- .../block_based/uncompression_dict_reader.cc | 2 +- .../avoid_double_lookup.md | 1 + 13 files changed, 157 insertions(+), 81 deletions(-) create mode 100644 unreleased_history/performance_improvements/avoid_double_lookup.md diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 909b0eba6..69e122392 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -1459,6 +1459,69 @@ TEST_P(PrefetchTest, DBIterLevelReadAheadWithAsyncIO) { Close(); } +TEST_P(PrefetchTest, AvoidBlockCacheLookupTwice) { + const int kNumKeys = 1000; + // Set options + std::shared_ptr fs = + std::make_shared(env_->GetFileSystem(), false); + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + + bool use_direct_io = std::get<0>(GetParam()); + bool async_io = std::get<1>(GetParam()); + + Options options; + SetGenericOptions(env.get(), use_direct_io, options); + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions table_options; + SetBlockBasedTableOptions(table_options); + std::shared_ptr cache = NewLRUCache(4 * 1024 * 1024, 2); // 8MB + table_options.block_cache = cache; + table_options.no_block_cache = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + Status s = TryReopen(options); + if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { + // If direct IO is not supported, skip the test + return; + } else { + ASSERT_OK(s); + } + + // Write to DB. + { + WriteBatch batch; + Random rnd(309); + for (int i = 0; i < kNumKeys; i++) { + ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000))); + } + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + std::string start_key = BuildKey(0); + std::string end_key = BuildKey(kNumKeys - 1); + Slice least(start_key.data(), start_key.size()); + Slice greatest(end_key.data(), end_key.size()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); + } + + ReadOptions ro; + ro.async_io = async_io; + // Iterate over the keys. + { + // Each block contains around 4 keys. + auto iter = std::unique_ptr(db_->NewIterator(ro)); + ASSERT_OK(options.statistics->Reset()); + + iter->Seek(BuildKey(99)); // Prefetch data because of seek parallelization. + ASSERT_TRUE(iter->Valid()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS), + 1); + } + + Close(); +} + TEST_P(PrefetchTest, DBIterAsyncIONoIOUring) { if (mem_env_ || encrypted_env_) { ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index ac47043f0..9cc8ca8c9 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -303,7 +303,8 @@ void BlockBasedTableIterator::InitDataBlock() { read_options_, data_block_handle, &block_iter_, BlockType::kData, /*get_context=*/nullptr, &lookup_context_, block_prefetcher_.prefetch_buffer(), - /*for_compaction=*/is_for_compaction, /*async_read=*/false, s); + /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, + /*use_block_cache_for_lookup=*/true); block_iter_points_to_real_block_ = true; CheckDataBlockWithinUpperBound(); if (!is_for_compaction && @@ -349,7 +350,8 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { read_options_, data_block_handle, &block_iter_, BlockType::kData, /*get_context=*/nullptr, &lookup_context_, block_prefetcher_.prefetch_buffer(), - /*for_compaction=*/is_for_compaction, /*async_read=*/true, s); + /*for_compaction=*/is_for_compaction, /*async_read=*/true, s, + /*use_block_cache_for_lookup=*/true); if (s.IsTryAgain()) { async_read_in_progress_ = true; @@ -364,7 +366,8 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { read_options_, data_block_handle, &block_iter_, BlockType::kData, /*get_context=*/nullptr, &lookup_context_, block_prefetcher_.prefetch_buffer(), - /*for_compaction=*/is_for_compaction, /*async_read=*/false, s); + /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, + /*use_block_cache_for_lookup=*/false); } block_iter_points_to_real_block_ = true; CheckDataBlockWithinUpperBound(); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index a454762e2..528b87bb9 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -88,19 +88,20 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) { // Explicitly instantiate templates for each "blocklike" type we use (and // before implicit specialization). // This makes it possible to keep the template definitions in the .cc file. -#define INSTANTIATE_BLOCKLIKE_TEMPLATES(T) \ - template Status BlockBasedTable::RetrieveBlock( \ - FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \ - const BlockHandle& handle, const UncompressionDict& uncompression_dict, \ - CachableEntry* out_parsed_block, GetContext* get_context, \ - BlockCacheLookupContext* lookup_context, bool for_compaction, \ - bool use_cache, bool async_read) const; \ - template Status BlockBasedTable::MaybeReadBlockAndLoadToCache( \ - FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \ - const BlockHandle& handle, const UncompressionDict& uncompression_dict, \ - bool for_compaction, CachableEntry* block_entry, \ - GetContext* get_context, BlockCacheLookupContext* lookup_context, \ - BlockContents* contents, bool async_read) const; +#define INSTANTIATE_BLOCKLIKE_TEMPLATES(T) \ + template Status BlockBasedTable::RetrieveBlock( \ + FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \ + const BlockHandle& handle, const UncompressionDict& uncompression_dict, \ + CachableEntry* out_parsed_block, GetContext* get_context, \ + BlockCacheLookupContext* lookup_context, bool for_compaction, \ + bool use_cache, bool async_read, bool use_block_cache_for_lookup) const; \ + template Status BlockBasedTable::MaybeReadBlockAndLoadToCache( \ + FilePrefetchBuffer * prefetch_buffer, const ReadOptions& ro, \ + const BlockHandle& handle, const UncompressionDict& uncompression_dict, \ + bool for_compaction, CachableEntry* block_entry, \ + GetContext* get_context, BlockCacheLookupContext* lookup_context, \ + BlockContents* contents, bool async_read, \ + bool use_block_cache_for_lookup) const; INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock); INSTANTIATE_BLOCKLIKE_TEMPLATES(UncompressionDict); @@ -994,7 +995,8 @@ Status BlockBasedTable::ReadRangeDelBlock( read_options, range_del_handle, /*input_iter=*/nullptr, BlockType::kRangeDeletion, /*get_context=*/nullptr, lookup_context, prefetch_buffer, - /*for_compaction= */ false, /*async_read= */ false, tmp_status)); + /*for_compaction= */ false, /*async_read= */ false, tmp_status, + /*use_block_cache_for_lookup=*/true)); assert(iter != nullptr); s = iter->status(); if (!s.ok()) { @@ -1477,12 +1479,12 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( const BlockHandle& handle, const UncompressionDict& uncompression_dict, bool for_compaction, CachableEntry* out_parsed_block, GetContext* get_context, BlockCacheLookupContext* lookup_context, - BlockContents* contents, bool async_read) const { + BlockContents* contents, bool async_read, + bool use_block_cache_for_lookup) const { assert(out_parsed_block != nullptr); const bool no_io = (ro.read_tier == kBlockCacheTier); BlockCacheInterface block_cache{ rep_->table_options.block_cache.get()}; - // First, try to get the block from the cache // // If either block cache is enabled, we'll try to read from it. @@ -1496,21 +1498,25 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( key = key_data.AsSlice(); if (!contents) { - s = GetDataBlockFromCache(key, block_cache, out_parsed_block, - get_context); - // Value could still be null at this point, so check the cache handle - // and update the read pattern for prefetching - if (out_parsed_block->GetValue() || out_parsed_block->GetCacheHandle()) { - // TODO(haoyu): Differentiate cache hit on uncompressed block cache and - // compressed block cache. - is_cache_hit = true; - if (prefetch_buffer) { - // Update the block details so that PrefetchBuffer can use the read - // pattern to determine if reads are sequential or not for - // prefetching. It should also take in account blocks read from cache. - prefetch_buffer->UpdateReadPattern( - handle.offset(), BlockSizeWithTrailer(handle), - ro.adaptive_readahead /*decrease_readahead_size*/); + if (use_block_cache_for_lookup) { + s = GetDataBlockFromCache(key, block_cache, out_parsed_block, + get_context); + // Value could still be null at this point, so check the cache handle + // and update the read pattern for prefetching + if (out_parsed_block->GetValue() || + out_parsed_block->GetCacheHandle()) { + // TODO(haoyu): Differentiate cache hit on uncompressed block cache + // and compressed block cache. + is_cache_hit = true; + if (prefetch_buffer) { + // Update the block details so that PrefetchBuffer can use the read + // pattern to determine if reads are sequential or not for + // prefetching. It should also take in account blocks read from + // cache. + prefetch_buffer->UpdateReadPattern( + handle.offset(), BlockSizeWithTrailer(handle), + ro.adaptive_readahead /*decrease_readahead_size*/); + } } } } @@ -1693,7 +1699,7 @@ WithBlocklikeCheck BlockBasedTable::RetrieveBlock( const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* out_parsed_block, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, - bool use_cache, bool async_read) const { + bool use_cache, bool async_read, bool use_block_cache_for_lookup) const { assert(out_parsed_block); assert(out_parsed_block->IsEmpty()); @@ -1702,7 +1708,7 @@ WithBlocklikeCheck BlockBasedTable::RetrieveBlock( s = MaybeReadBlockAndLoadToCache( prefetch_buffer, ro, handle, uncompression_dict, for_compaction, out_parsed_block, get_context, lookup_context, - /*contents=*/nullptr, async_read); + /*contents=*/nullptr, async_read, use_block_cache_for_lookup); if (!s.ok()) { return s; @@ -2165,7 +2171,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, NewDataBlockIterator( read_options, v.handle, &biter, BlockType::kData, get_context, &lookup_data_block_context, /*prefetch_buffer=*/nullptr, - /*for_compaction=*/false, /*async_read=*/false, tmp_status); + /*for_compaction=*/false, /*async_read=*/false, tmp_status, + /*use_block_cache_for_lookup=*/true); if (no_io && biter.status().IsIncomplete()) { // couldn't get block from block_cache @@ -2335,7 +2342,7 @@ Status BlockBasedTable::Prefetch(const ReadOptions& read_options, read_options, block_handle, &biter, /*type=*/BlockType::kData, /*get_context=*/nullptr, &lookup_context, /*prefetch_buffer=*/nullptr, /*for_compaction=*/false, - /*async_read=*/false, tmp_status); + /*async_read=*/false, tmp_status, /*use_block_cache_for_lookup=*/true); if (!biter.status().ok()) { // there was an unexpected error while pre-fetching @@ -2760,7 +2767,7 @@ Status BlockBasedTable::GetKVPairsFromDataBlocks( /*input_iter=*/nullptr, /*type=*/BlockType::kData, /*get_context=*/nullptr, /*lookup_context=*/nullptr, /*prefetch_buffer=*/nullptr, /*for_compaction=*/false, - /*async_read=*/false, tmp_status)); + /*async_read=*/false, tmp_status, /*use_block_cache_for_lookup=*/true)); s = datablock_iter->status(); if (!s.ok()) { @@ -2999,7 +3006,7 @@ Status BlockBasedTable::DumpDataBlocks(std::ostream& out_stream) { /*input_iter=*/nullptr, /*type=*/BlockType::kData, /*get_context=*/nullptr, /*lookup_context=*/nullptr, /*prefetch_buffer=*/nullptr, /*for_compaction=*/false, - /*async_read=*/false, tmp_status)); + /*async_read=*/false, tmp_status, /*use_block_cache_for_lookup=*/true)); s = datablock_iter->status(); if (!s.ok()) { diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 120907240..6162c5889 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -287,14 +287,12 @@ class BlockBasedTable : public TableReader { // input_iter: if it is not null, update this one and return it as Iterator template - TBlockIter* NewDataBlockIterator(const ReadOptions& ro, - const BlockHandle& block_handle, - TBlockIter* input_iter, BlockType block_type, - GetContext* get_context, - BlockCacheLookupContext* lookup_context, - FilePrefetchBuffer* prefetch_buffer, - bool for_compaction, bool async_read, - Status& s) const; + TBlockIter* NewDataBlockIterator( + const ReadOptions& ro, const BlockHandle& block_handle, + TBlockIter* input_iter, BlockType block_type, GetContext* get_context, + BlockCacheLookupContext* lookup_context, + FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read, + Status& s, bool use_block_cache_for_lookup) const; // input_iter: if it is not null, update this one and return it as Iterator template @@ -351,7 +349,8 @@ class BlockBasedTable : public TableReader { const BlockHandle& handle, const UncompressionDict& uncompression_dict, bool for_compaction, CachableEntry* block_entry, GetContext* get_context, BlockCacheLookupContext* lookup_context, - BlockContents* contents, bool async_read) const; + BlockContents* contents, bool async_read, + bool use_block_cache_for_lookup) const; // Similar to the above, with one crucial difference: it will retrieve the // block from the file even if there are no caches configured (assuming the @@ -362,7 +361,7 @@ class BlockBasedTable : public TableReader { const BlockHandle& handle, const UncompressionDict& uncompression_dict, CachableEntry* block_entry, GetContext* get_context, BlockCacheLookupContext* lookup_context, bool for_compaction, - bool use_cache, bool async_read) const; + bool use_cache, bool async_read, bool use_block_cache_for_lookup) const; template WithBlocklikeCheck SaveLookupContextOrTraceRecord( diff --git a/table/block_based/block_based_table_reader_impl.h b/table/block_based/block_based_table_reader_impl.h index 801b4614f..5f8456bee 100644 --- a/table/block_based/block_based_table_reader_impl.h +++ b/table/block_based/block_based_table_reader_impl.h @@ -49,7 +49,7 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( BlockType block_type, GetContext* get_context, BlockCacheLookupContext* lookup_context, FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read, - Status& s) const { + Status& s, bool use_block_cache_for_lookup) const { using IterBlocklike = typename IterTraits::IterBlocklike; PERF_TIMER_GUARD(new_table_block_iter_nanos); @@ -77,15 +77,15 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( const UncompressionDict& dict = uncompression_dict.GetValue() ? *uncompression_dict.GetValue() : UncompressionDict::GetEmptyDict(); - s = RetrieveBlock(prefetch_buffer, ro, handle, dict, - &block.As(), get_context, lookup_context, - for_compaction, - /* use_cache */ true, async_read); + s = RetrieveBlock( + prefetch_buffer, ro, handle, dict, &block.As(), + get_context, lookup_context, for_compaction, + /* use_cache */ true, async_read, use_block_cache_for_lookup); } else { s = RetrieveBlock( prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(), &block.As(), get_context, lookup_context, for_compaction, - /* use_cache */ true, async_read); + /* use_cache */ true, async_read, use_block_cache_for_lookup); } if (s.IsTryAgain() && async_read) { diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index ab3ee01bb..8ee594db6 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -50,12 +50,12 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks) } // XXX: use_cache=true means double cache query? - statuses[idx_in_batch] = - RetrieveBlock(nullptr, options, handle, uncompression_dict, - &results[idx_in_batch].As(), - mget_iter->get_context, /* lookup_context */ nullptr, - /* for_compaction */ false, /* use_cache */ true, - /* async_read */ false); + statuses[idx_in_batch] = RetrieveBlock( + nullptr, options, handle, uncompression_dict, + &results[idx_in_batch].As(), mget_iter->get_context, + /* lookup_context */ nullptr, + /* for_compaction */ false, /* use_cache */ true, + /* async_read */ false, /* use_block_cache_for_lookup */ true); } assert(idx_in_batch == handles->size()); CO_RETURN; @@ -269,7 +269,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks) nullptr, options, handle, uncompression_dict, /*for_compaction=*/false, block_entry, mget_iter->get_context, /*lookup_context=*/nullptr, &serialized_block, - /*async_read=*/false); + /*async_read=*/false, /*use_block_cache_for_lookup=*/true); // block_entry value could be null if no block cache is present, i.e // BlockBasedTableOptions::no_block_cache is true and no compressed @@ -628,7 +628,8 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) read_options, iiter->value().handle, &next_biter, BlockType::kData, get_context, lookup_data_block_context, /* prefetch_buffer= */ nullptr, /* for_compaction = */ false, - /*async_read = */ false, tmp_s); + /*async_read = */ false, tmp_s, + /* use_block_cache_for_lookup = */ true); biter = &next_biter; reusing_prev_block = false; later_reused = false; diff --git a/table/block_based/filter_block_reader_common.cc b/table/block_based/filter_block_reader_common.cc index 32f800db7..e459b00ad 100644 --- a/table/block_based/filter_block_reader_common.cc +++ b/table/block_based/filter_block_reader_common.cc @@ -28,12 +28,12 @@ Status FilterBlockReaderCommon::ReadFilterBlock( const BlockBasedTable::Rep* const rep = table->get_rep(); assert(rep); - const Status s = - table->RetrieveBlock(prefetch_buffer, read_options, rep->filter_handle, - UncompressionDict::GetEmptyDict(), filter_block, - get_context, lookup_context, - /* for_compaction */ false, use_cache, - /* async_read */ false); + const Status s = table->RetrieveBlock( + prefetch_buffer, read_options, rep->filter_handle, + UncompressionDict::GetEmptyDict(), filter_block, get_context, + lookup_context, + /* for_compaction */ false, use_cache, + /* async_read */ false, /* use_block_cache_for_lookup */ true); return s; } diff --git a/table/block_based/index_reader_common.cc b/table/block_based/index_reader_common.cc index e68be2a10..a1b05c2d6 100644 --- a/table/block_based/index_reader_common.cc +++ b/table/block_based/index_reader_common.cc @@ -29,7 +29,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock( prefetch_buffer, read_options, rep->index_handle, UncompressionDict::GetEmptyDict(), &index_block->As(), get_context, lookup_context, /* for_compaction */ false, use_cache, - /* async_read */ false); + /* async_read */ false, /* use_block_cache_for_lookup */ true); return s; } diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 84888d0e9..8b4174645 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -317,12 +317,12 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock( read_options.read_tier = kBlockCacheTier; } - const Status s = - table()->RetrieveBlock(prefetch_buffer, read_options, fltr_blk_handle, - UncompressionDict::GetEmptyDict(), filter_block, - get_context, lookup_context, - /* for_compaction */ false, /* use_cache */ true, - /* async_read */ false); + const Status s = table()->RetrieveBlock( + prefetch_buffer, read_options, fltr_blk_handle, + UncompressionDict::GetEmptyDict(), filter_block, get_context, + lookup_context, + /* for_compaction */ false, /* use_cache */ true, + /* async_read */ false, /* use_block_cache_for_lookup */ true); return s; } @@ -521,7 +521,8 @@ Status PartitionedFilterBlockReader::CacheDependencies( prefetch_buffer ? prefetch_buffer.get() : tail_prefetch_buffer, ro, handle, UncompressionDict::GetEmptyDict(), /* for_compaction */ false, &block, nullptr /* get_context */, - &lookup_context, nullptr /* contents */, false); + &lookup_context, nullptr /* contents */, false, + /* use_block_cache_for_lookup */ true); if (!s.ok()) { return s; } diff --git a/table/block_based/partitioned_index_iterator.cc b/table/block_based/partitioned_index_iterator.cc index 3b0527033..db1250f22 100644 --- a/table/block_based/partitioned_index_iterator.cc +++ b/table/block_based/partitioned_index_iterator.cc @@ -98,7 +98,8 @@ void PartitionedIndexIterator::InitPartitionedIndexBlock() { BlockType::kIndex, /*get_context=*/nullptr, &lookup_context_, block_prefetcher_.prefetch_buffer(), - /*for_compaction=*/is_for_compaction, /*async_read=*/false, s); + /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, + /*use_block_cache_for_lookup=*/true); block_iter_points_to_real_block_ = true; // We could check upper bound here but it is complicated to reason about // upper bound in index iterator. On the other than, in large scans, index diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index b4dc5fce2..2b8b5bce1 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -200,7 +200,7 @@ Status PartitionIndexReader::CacheDependencies( handle, UncompressionDict::GetEmptyDict(), /*for_compaction=*/false, &block.As(), /*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr, - /*async_read=*/false); + /*async_read=*/false, /*use_block_cache_for_lookup=*/true); if (!s.ok()) { return s; diff --git a/table/block_based/uncompression_dict_reader.cc b/table/block_based/uncompression_dict_reader.cc index 4ac442b6b..3656b35d5 100644 --- a/table/block_based/uncompression_dict_reader.cc +++ b/table/block_based/uncompression_dict_reader.cc @@ -63,7 +63,7 @@ Status UncompressionDictReader::ReadUncompressionDictionary( UncompressionDict::GetEmptyDict(), uncompression_dict, get_context, lookup_context, /* for_compaction */ false, use_cache, - /* async_read */ false); + /* async_read */ false, /* use_block_cache_for_lookup */ true); if (!s.ok()) { ROCKS_LOG_WARN( diff --git a/unreleased_history/performance_improvements/avoid_double_lookup.md b/unreleased_history/performance_improvements/avoid_double_lookup.md new file mode 100644 index 000000000..d99a8707c --- /dev/null +++ b/unreleased_history/performance_improvements/avoid_double_lookup.md @@ -0,0 +1 @@ +During async_io, the Seek happens in 2 phases. Phase 1 starts an asynchronous read on a block cache miss, and phase 2 waits for it to complete and finishes the seek. In both phases, it tries to lookup the block cache for the data block first before looking in the prefetch buffer. It's optimized by doing the block cache lookup only in the first phase that would save some CPU. From 0dac75d54232cac007cb9a2ca0a3962a83d8caa9 Mon Sep 17 00:00:00 2001 From: dengyan <837123564@qq.com> Date: Mon, 18 Sep 2023 12:06:58 -0700 Subject: [PATCH 128/386] Fix a bug in MultiGet when skip_memtable is true (#11700) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: When skip_memtable is true in MultiGetImpl, The lookup_current is always false, Causes data to be unable to be queried in super_version->current。 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11700 Reviewed By: anand1976 Differential Revision: D49342877 Pulled By: jowlyzhang fbshipit-source-id: 270a36d049b4cb7fd151a1fa3080300310111271 --- db/db_impl/db_impl.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 9a947cf67..0b23c3db0 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -3129,7 +3129,7 @@ Status DBImpl::MultiGetImpl( stats_); MultiGetRange range = ctx.GetMultiGetRange(); range.AddValueSize(curr_value_size); - bool lookup_current = false; + bool lookup_current = true; keys_left -= batch_size; for (auto mget_iter = range.begin(); mget_iter != range.end(); @@ -3148,9 +3148,10 @@ Status DBImpl::MultiGetImpl( super_version->imm->MultiGet(read_options, &range, callback); } if (!range.empty()) { - lookup_current = true; uint64_t left = range.KeysLeft(); RecordTick(stats_, MEMTABLE_MISS, left); + } else { + lookup_current = false; } } if (lookup_current) { From 920d72e6faf1a2cf941d9f6aaf8f771e4dbdca38 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Mon, 18 Sep 2023 12:11:15 -0700 Subject: [PATCH 129/386] cmake: check PORTABLE for well-known boolean representations (#11724) Summary: before 459969e9, we were using CMake `option()` to represent `PORTABLE`. so the CMake boolean representations like ON, OFF, 0 and 1 are supported. this is also the downstream package maintainers were using before v8.3.2. in 459969e9, this option is expanded to specify the argument of `-march` passed to compiler in order to be more flexible and hence allows user to specify CPU type directly. but in the typical use cases, user would just want to use "ON" for the best performance on the building host, and "OFF" for a portable build when it comes to a distro package maintainer. so, in this change, let's check for the boolean representations supported by CMake. Fixes https://github.com/facebook/rocksdb/issues/11558 Signed-off-by: Kefu Chai Pull Request resolved: https://github.com/facebook/rocksdb/pull/11724 Reviewed By: anand1976 Differential Revision: D48827447 Pulled By: ajkr fbshipit-source-id: b2fef7076b2e90ad13a1fbec80e197841fa06d38 --- CMakeLists.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2be25e2c0..1a53fcba3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -254,7 +254,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64") endif(CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64") set(PORTABLE 0 CACHE STRING "Minimum CPU arch to support, or 0 = current CPU, 1 = baseline CPU") -if(PORTABLE STREQUAL 1) +if(PORTABLE MATCHES "1|ON|YES|TRUE|Y") # Usually nothing to do; compiler default is typically the most general if(NOT MSVC) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^s390x") @@ -264,14 +264,7 @@ if(PORTABLE STREQUAL 1) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=loongarch64") endif() endif() -elseif(PORTABLE MATCHES [^0]+) - # Name of a CPU arch spec or feature set to require - if(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:${PORTABLE}") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${PORTABLE}") - endif() -else() +elseif(PORTABLE MATCHES "0|OFF|NO|FALSE|N") if(MSVC) # NOTE: No auto-detection of current CPU, but instead assume some useful # level of optimization is supported @@ -285,6 +278,13 @@ else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() endif() +else() + # Name of a CPU arch spec or feature set to require + if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:${PORTABLE}") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${PORTABLE}") + endif() endif() include(CheckCXXSourceCompiles) From cc254efea6b03e2e88bf3089ab4c31404b08bf10 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 18 Sep 2023 13:11:53 -0700 Subject: [PATCH 130/386] Release compaction files in manifest write callback (#11764) Summary: Fixes https://github.com/facebook/rocksdb/issues/10257 (also see [here](https://github.com/facebook/rocksdb/pull/10355#issuecomment-1684308556)) by releasing compaction files earlier when writing to manifest in LogAndApply(). This is done by passing in a [callback](https://github.com/facebook/rocksdb/blob/ba597514309b686d8addb59616f067d5522186b7/db/version_set.h#L1199) to LogAndApply(). The new Version is created in the same critical section where compaction files are released. When compaction picker is picking compaction based on the new version, these compaction files will already be released. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11764 Test Plan: * Existing unit tests * A repro unit test to validate that compaction files are released: `./db_compaction_test --gtest_filter=DBCompactionTest.ReleaseCompactionDuringManifestWrite` * `python3 ./tools/db_crashtest.py --simple whitebox` with some assertions to check compaction files are released Reviewed By: ajkr Differential Revision: D48742152 Pulled By: cbi42 fbshipit-source-id: 7560fd0e723a63fe692234015d2b96850f8b5d77 --- db/compaction/compaction_job.cc | 19 +++- db/compaction/compaction_job.h | 8 +- db/compaction/compaction_job_test.cc | 4 +- db/db_compaction_test.cc | 106 ++++++++++++++++++ db/db_impl/db_impl_compaction_flush.cc | 48 ++++++-- db/version_set.h | 5 +- .../bug_fixes/no_compaction_scheduled_bug.md | 1 + 7 files changed, 171 insertions(+), 20 deletions(-) create mode 100644 unreleased_history/bug_fixes/no_compaction_scheduled_bug.md diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 66f377a5f..904a10743 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -844,7 +844,8 @@ Status CompactionJob::Run() { return status; } -Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { +Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options, + bool* compaction_released) { assert(compact_); AutoThreadOperationStageUpdater stage_updater( @@ -860,7 +861,7 @@ Status CompactionJob::Install(const MutableCFOptions& mutable_cf_options) { compaction_stats_); if (status.ok()) { - status = InstallCompactionResults(mutable_cf_options); + status = InstallCompactionResults(mutable_cf_options, compaction_released); } if (!versions_->io_status().ok()) { io_status_ = versions_->io_status(); @@ -1697,7 +1698,7 @@ Status CompactionJob::FinishCompactionOutputFile( } Status CompactionJob::InstallCompactionResults( - const MutableCFOptions& mutable_cf_options) { + const MutableCFOptions& mutable_cf_options, bool* compaction_released) { assert(compact_); db_mutex_->AssertHeld(); @@ -1779,9 +1780,15 @@ Status CompactionJob::InstallCompactionResults( } } - return versions_->LogAndApply(compaction->column_family_data(), - mutable_cf_options, read_options, edit, - db_mutex_, db_directory_); + auto manifest_wcb = [&compaction, &compaction_released](const Status& s) { + compaction->ReleaseCompactionFiles(s); + *compaction_released = true; + }; + + return versions_->LogAndApply( + compaction->column_family_data(), mutable_cf_options, read_options, edit, + db_mutex_, db_directory_, /*new_descriptor_log=*/false, + /*column_family_options=*/nullptr, manifest_wcb); } void CompactionJob::RecordCompactionIOStats() { diff --git a/db/compaction/compaction_job.h b/db/compaction/compaction_job.h index 926f4a8f9..72d256735 100644 --- a/db/compaction/compaction_job.h +++ b/db/compaction/compaction_job.h @@ -186,7 +186,10 @@ class CompactionJob { // REQUIRED: mutex held // Add compaction input/output to the current version - Status Install(const MutableCFOptions& mutable_cf_options); + // Releases compaction file through Compaction::ReleaseCompactionFiles(). + // Sets *compaction_released to true if compaction is released. + Status Install(const MutableCFOptions& mutable_cf_options, + bool* compaction_released); // Return the IO status IOStatus io_status() const { return io_status_; } @@ -273,7 +276,8 @@ class CompactionJob { const Slice& next_table_min_key, const Slice* comp_start_user_key, const Slice* comp_end_user_key); - Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options); + Status InstallCompactionResults(const MutableCFOptions& mutable_cf_options, + bool* compaction_released); Status OpenCompactionOutputFile(SubcompactionState* sub_compact, CompactionOutputs& outputs); void UpdateCompactionJobStats( diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 8f91cc04c..66a47a26f 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -674,7 +674,9 @@ class CompactionJobTestBase : public testing::Test { ASSERT_OK(s); ASSERT_OK(compaction_job.io_status()); mutex_.Lock(); - ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions())); + bool compaction_released = false; + ASSERT_OK(compaction_job.Install(*cfd->GetLatestMutableCFOptions(), + &compaction_released)); ASSERT_OK(compaction_job.io_status()); mutex_.Unlock(); log_buffer.FlushBufferToLog(); diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index d1bd59155..b0f6a792b 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -10114,6 +10114,112 @@ TEST_F(DBCompactionTest, ErrorWhenReadFileHead) { } } +TEST_F(DBCompactionTest, ReleaseCompactionDuringManifestWrite) { + // Tests the fix for issue #10257. + // Compactions are released in LogAndApply() so that picking a compaction + // from the new Version won't see these compactions as registered. + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleLevel; + // Make sure we can run multiple compactions at the same time. + env_->SetBackgroundThreads(3, Env::Priority::LOW); + env_->SetBackgroundThreads(3, Env::Priority::BOTTOM); + options.max_background_compactions = 3; + options.num_levels = 4; + DestroyAndReopen(options); + Random rnd(301); + + // Construct the following LSM + // L2: [K1-K2] [K10-K11] [k100-k101] + // L3: [K1] [K10] [k100] + // We will have 3 threads to run 3 manual compactions. + // The first thread that writes to MANIFEST will not finish + // until the next two threads enters LogAndApply() and form + // a write group. + // We check that compactions are all released after the first + // thread from the write group finishes writing to MANIFEST. + + // L3 + ASSERT_OK(Put(Key(1), rnd.RandomString(20))); + ASSERT_OK(Flush()); + MoveFilesToLevel(3); + ASSERT_OK(Put(Key(10), rnd.RandomString(20))); + ASSERT_OK(Flush()); + MoveFilesToLevel(3); + ASSERT_OK(Put(Key(100), rnd.RandomString(20))); + ASSERT_OK(Flush()); + MoveFilesToLevel(3); + // L2 + ASSERT_OK(Put(Key(100), rnd.RandomString(20))); + ASSERT_OK(Put(Key(101), rnd.RandomString(20))); + ASSERT_OK(Flush()); + MoveFilesToLevel(2); + ASSERT_OK(Put(Key(1), rnd.RandomString(20))); + ASSERT_OK(Put(Key(2), rnd.RandomString(20))); + ASSERT_OK(Flush()); + MoveFilesToLevel(2); + ASSERT_OK(Put(Key(10), rnd.RandomString(20))); + ASSERT_OK(Put(Key(11), rnd.RandomString(20))); + ASSERT_OK(Flush()); + MoveFilesToLevel(2); + + ASSERT_EQ(NumTableFilesAtLevel(1), 0); + ASSERT_EQ(NumTableFilesAtLevel(2), 3); + ASSERT_EQ(NumTableFilesAtLevel(3), 3); + + SyncPoint::GetInstance()->ClearAllCallBacks(); + std::atomic_int count = 0; + SyncPoint::GetInstance()->SetCallBack( + "VersionSet::LogAndApply:BeforeWriterWaiting", [&](void*) { + int c = count.fetch_add(1); + if (c == 2) { + TEST_SYNC_POINT("all threads to enter LogAndApply"); + } + }); + SyncPoint::GetInstance()->LoadDependency( + {{"all threads to enter LogAndApply", + "VersionSet::LogAndApply:WriteManifestStart"}}); + // Verify that compactions are released after writing to MANIFEST + std::atomic_int after_compact_count = 0; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:AfterCompaction", [&](void* ptr) { + int c = after_compact_count.fetch_add(1); + if (c > 0) { + ColumnFamilyData* cfd = (ColumnFamilyData*)(ptr); + ASSERT_TRUE( + cfd->compaction_picker()->compactions_in_progress()->empty()); + } + }); + SyncPoint::GetInstance()->EnableProcessing(); + + std::vector threads; + threads.emplace_back(std::thread([&]() { + std::string k1_str = Key(1); + std::string k2_str = Key(2); + Slice k1 = k1_str; + Slice k2 = k2_str; + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &k1, &k2)); + })); + threads.emplace_back(std::thread([&]() { + std::string k10_str = Key(10); + std::string k11_str = Key(11); + Slice k10 = k10_str; + Slice k11 = k11_str; + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &k10, &k11)); + })); + std::string k100_str = Key(100); + std::string k101_str = Key(101); + Slice k100 = k100_str; + Slice k101 = k101_str; + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &k100, &k101)); + + for (auto& thread : threads) { + thread.join(); + } + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index a1a544eef..333d4ad15 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1512,7 +1512,12 @@ Status DBImpl::CompactFilesImpl( TEST_SYNC_POINT("CompactFilesImpl:3"); mutex_.Lock(); - Status status = compaction_job.Install(*c->mutable_cf_options()); + bool compaction_released = false; + Status status = + compaction_job.Install(*c->mutable_cf_options(), &compaction_released); + if (!compaction_released) { + c->ReleaseCompactionFiles(s); + } if (status.ok()) { assert(compaction_job.io_status().ok()); InstallSuperVersionAndScheduleWork(c->column_family_data(), @@ -1523,7 +1528,6 @@ Status DBImpl::CompactFilesImpl( // not check compaction_job.io_status() explicitly if we're not calling // SetBGError compaction_job.io_status().PermitUncheckedError(); - c->ReleaseCompactionFiles(s); // Need to make sure SstFileManager does its bookkeeping auto sfm = static_cast( immutable_db_options_.sst_file_manager.get()); @@ -3388,8 +3392,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, std::unique_ptr task_token; - // InternalKey manual_end_storage; - // InternalKey* manual_end = &manual_end_storage; bool sfm_reserved_compact_space = false; if (is_manual) { ManualCompactionState* m = manual_compaction; @@ -3525,6 +3527,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, } IOStatus io_s; + bool compaction_released = false; if (!c) { // Nothing to do ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do"); @@ -3547,7 +3550,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, } status = versions_->LogAndApply( c->column_family_data(), *c->mutable_cf_options(), read_options, - c->edit(), &mutex_, directories_.GetDbDir()); + c->edit(), &mutex_, directories_.GetDbDir(), + /*new_descriptor_log=*/false, /*column_family_options=*/nullptr, + [&c, &compaction_released](const Status& s) { + c->ReleaseCompactionFiles(s); + compaction_released = true; + }); io_s = versions_->io_status(); InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], @@ -3613,7 +3621,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, } status = versions_->LogAndApply( c->column_family_data(), *c->mutable_cf_options(), read_options, - c->edit(), &mutex_, directories_.GetDbDir()); + c->edit(), &mutex_, directories_.GetDbDir(), + /*new_descriptor_log=*/false, /*column_family_options=*/nullptr, + [&c, &compaction_released](const Status& s) { + c->ReleaseCompactionFiles(s); + compaction_released = true; + }); io_s = versions_->io_status(); // Use latest MutableCFOptions InstallSuperVersionAndScheduleWork(c->column_family_data(), @@ -3663,6 +3676,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, // Transfer requested token, so it doesn't need to do it again. ca->prepicked_compaction->task_token = std::move(task_token); ++bg_bottom_compaction_scheduled_; + assert(c == nullptr); env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca, Env::Priority::BOTTOM, this, &DBImpl::UnscheduleCompactionCallback); } else { @@ -3706,8 +3720,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, compaction_job.Run().PermitUncheckedError(); TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun"); mutex_.Lock(); - - status = compaction_job.Install(*c->mutable_cf_options()); + status = + compaction_job.Install(*c->mutable_cf_options(), &compaction_released); io_s = compaction_job.io_status(); if (status.ok()) { InstallSuperVersionAndScheduleWork(c->column_family_data(), @@ -3726,7 +3740,23 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, } if (c != nullptr) { - c->ReleaseCompactionFiles(status); + if (!compaction_released) { + c->ReleaseCompactionFiles(status); + } else { +#ifndef NDEBUG + // Sanity checking that compaction files are freed. + for (size_t i = 0; i < c->num_input_levels(); i++) { + for (size_t j = 0; j < c->inputs(i)->size(); j++) { + assert(!c->input(i, j)->being_compacted); + } + } + std::unordered_set* cip = c->column_family_data() + ->compaction_picker() + ->compactions_in_progress(); + assert(cip->find(c.get()) == cip->end()); +#endif + } + *made_progress = true; // Need to make sure SstFileManager does its bookkeeping diff --git a/db/version_set.h b/db/version_set.h index 87c6eb514..1d7c70592 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1174,7 +1174,8 @@ class VersionSet { const MutableCFOptions& mutable_cf_options, const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log = false, - const ColumnFamilyOptions* column_family_options = nullptr) { + const ColumnFamilyOptions* column_family_options = nullptr, + const std::function& manifest_wcb = {}) { autovector cfds; cfds.emplace_back(column_family_data); autovector mutable_cf_options_list; @@ -1185,7 +1186,7 @@ class VersionSet { edit_lists.emplace_back(edit_list); return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists, mu, dir_contains_current_file, new_descriptor_log, - column_family_options); + column_family_options, {manifest_wcb}); } // The batch version. If edit_list.size() > 1, caller must ensure that // no edit in the list column family add or drop diff --git a/unreleased_history/bug_fixes/no_compaction_scheduled_bug.md b/unreleased_history/bug_fixes/no_compaction_scheduled_bug.md new file mode 100644 index 000000000..8ac2f1ebb --- /dev/null +++ b/unreleased_history/bug_fixes/no_compaction_scheduled_bug.md @@ -0,0 +1 @@ +* Fix a bug (Issue #10257) where DB can hang after write stall since no compaction is scheduled (#11764). \ No newline at end of file From c90807d10300e5222ac317579d0d7c9777f27ad5 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 18 Sep 2023 16:23:26 -0700 Subject: [PATCH 131/386] Inject retryable write IOError when writing to SST files in stress test (#11829) Summary: * db_crashtest.py now may set `write_fault_one_in` to 500 for blackbox and whitebox simple test. * Error injection only applies to writing to SST files. Flush error will cause DB to pause background operations and auto-resume. Compaction error will just re-schedule later. * File ingestion and back up tests are updated to check if the result status is due to an injected error. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11829 Test Plan: a full round of whitebox simple and blackbox simple crash test * `python3 ./tools/db_crashtest.py whitebox/blackbox --simple --write_fault_one_in=500` Reviewed By: ajkr Differential Revision: D49256962 Pulled By: cbi42 fbshipit-source-id: 68e0c9648d8e03bad39c7672b25d5500fc286d97 --- db_stress_tool/db_stress_driver.cc | 24 ++++++++++++-- db_stress_tool/db_stress_gflags.cc | 3 +- db_stress_tool/db_stress_test_base.cc | 43 ++++++++++++------------- db_stress_tool/db_stress_tool.cc | 5 --- db_stress_tool/no_batched_ops_stress.cc | 12 ++++--- tools/db_crashtest.py | 12 ++++++- utilities/fault_injection_fs.cc | 28 ++++++++-------- utilities/fault_injection_fs.h | 10 +++--- 8 files changed, 80 insertions(+), 57 deletions(-) diff --git a/db_stress_tool/db_stress_driver.cc b/db_stress_tool/db_stress_driver.cc index 15c1ad5e5..2ab0b0d71 100644 --- a/db_stress_tool/db_stress_driver.cc +++ b/db_stress_tool/db_stress_driver.cc @@ -81,12 +81,30 @@ bool RunStressTestImpl(SharedState* shared) { stress->InitDb(shared); stress->FinishInitDb(shared); - if (FLAGS_sync_fault_injection) { - fault_fs_guard->SetFilesystemDirectWritable(false); - } if (FLAGS_write_fault_one_in) { + if (!FLAGS_sync_fault_injection) { + // unsynced WAL loss is not supported without sync_fault_injection + fault_fs_guard->SetDirectWritableTypes({kWalFile}); + } + IOStatus error_msg; + if (FLAGS_inject_error_severity <= 1 || FLAGS_inject_error_severity > 2) { + error_msg = IOStatus::IOError("Retryable injected write error"); + error_msg.SetRetryable(true); + } else if (FLAGS_inject_error_severity == 2) { + error_msg = IOStatus::IOError("Fatal injected write error"); + error_msg.SetDataLoss(true); + } + // TODO: inject write error for other file types including + // MANIFEST, CURRENT, and WAL files. + fault_fs_guard->SetRandomWriteError( + shared->GetSeed(), FLAGS_write_fault_one_in, error_msg, + /*inject_for_all_file_types=*/false, {FileType::kTableFile}); + fault_fs_guard->SetFilesystemDirectWritable(false); fault_fs_guard->EnableWriteErrorInjection(); } + if (FLAGS_sync_fault_injection) { + fault_fs_guard->SetFilesystemDirectWritable(false); + } uint32_t n = FLAGS_threads; uint64_t now = clock->NowMicros(); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index e8cf07068..cdea77c19 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1007,7 +1007,8 @@ DEFINE_string(file_checksum_impl, "none", "\"none\" for null."); DEFINE_int32(write_fault_one_in, 0, - "On non-zero, enables fault injection on write"); + "On non-zero, enables fault injection on write. Currently only" + "injects write error when writing to SST files."); DEFINE_uint64(user_timestamp_size, 0, "Number of bytes for a user-defined timestamp. Currently, only " diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index e7df7cf0b..daaa66f92 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -785,23 +785,6 @@ void StressTest::OperateDb(ThreadState* thread) { FLAGS_inject_error_severity == 1 /* retryable */); } #endif // NDEBUG - if (FLAGS_write_fault_one_in) { - IOStatus error_msg; - if (FLAGS_inject_error_severity <= 1 || FLAGS_inject_error_severity > 2) { - error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - } else if (FLAGS_inject_error_severity == 2) { - // Inject a fatal error - error_msg = IOStatus::IOError("Fatal IO Error"); - error_msg.SetDataLoss(true); - } - std::vector types = {FileType::kTableFile, - FileType::kDescriptorFile, - FileType::kCurrentFile}; - fault_fs_guard->SetRandomWriteError( - thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg, - /*inject_for_all_file_types=*/false, types); - } thread->stats.Start(); for (int open_cnt = 0; open_cnt <= FLAGS_reopen; ++open_cnt) { if (thread->shared->HasVerificationFailedYet() || @@ -1004,8 +987,13 @@ void StressTest::OperateDb(ThreadState* thread) { if (total_size <= FLAGS_backup_max_size) { Status s = TestBackupRestore(thread, rand_column_families, rand_keys); if (!s.ok()) { - VerificationAbort(shared, "Backup/restore gave inconsistent state", - s); + if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + VerificationAbort(shared, + "Backup/restore gave inconsistent state", s); + } else { + fprintf(stdout, "Backup/restore failed: %s\n", + s.ToString().c_str()); + } } } } @@ -1013,7 +1001,11 @@ void StressTest::OperateDb(ThreadState* thread) { if (thread->rand.OneInOpt(FLAGS_checkpoint_one_in)) { Status s = TestCheckpoint(thread, rand_column_families, rand_keys); if (!s.ok()) { - VerificationAbort(shared, "Checkpoint gave inconsistent state", s); + if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + VerificationAbort(shared, "Checkpoint gave inconsistent state", s); + } else { + fprintf(stdout, "Checkpoint failed: %s\n", s.ToString().c_str()); + } } } @@ -2699,6 +2691,9 @@ void StressTest::Open(SharedState* shared, bool reopen) { FLAGS_db, options_.db_paths, cf_descriptors, db_stress_listener_env)); RegisterAdditionalListeners(); + // If this is for DB reopen, write error injection may have been enabled. + // Disable it here in case there is no open fault injection. + fault_fs_guard->DisableWriteErrorInjection(); if (!FLAGS_use_txn) { // Determine whether we need to inject file metadata write failures // during DB reopen. If it does, enable it. @@ -2718,7 +2713,7 @@ void StressTest::Open(SharedState* shared, bool reopen) { // WAL is durable. Buffering unsynced writes will cause false // positive in crash tests. Before we figure out a way to // solve it, skip WAL from failure injection. - fault_fs_guard->SetSkipDirectWritableTypes({kWalFile}); + fault_fs_guard->SetDirectWritableTypes({kWalFile}); } inject_meta_error = FLAGS_open_metadata_write_fault_one_in; inject_write_error = FLAGS_open_write_fault_one_in; @@ -2733,7 +2728,7 @@ void StressTest::Open(SharedState* shared, bool reopen) { fault_fs_guard->EnableWriteErrorInjection(); fault_fs_guard->SetRandomWriteError( static_cast(FLAGS_seed), FLAGS_open_write_fault_one_in, - IOStatus::IOError("Injected Open Error"), + IOStatus::IOError("Injected Open Write Error"), /*inject_for_all_file_types=*/true, /*types=*/{}); } if (inject_read_error) { @@ -2769,10 +2764,12 @@ void StressTest::Open(SharedState* shared, bool reopen) { } if (inject_meta_error || inject_write_error || inject_read_error) { + // TODO: re-enable write error injection after reopen. Same for + // sync fault injection. fault_fs_guard->SetFilesystemDirectWritable(true); fault_fs_guard->DisableMetadataWriteErrorInjection(); fault_fs_guard->DisableWriteErrorInjection(); - fault_fs_guard->SetSkipDirectWritableTypes({}); + fault_fs_guard->SetDirectWritableTypes({}); fault_fs_guard->SetRandomReadError(0); if (s.ok()) { // Injected errors might happen in background compactions. We diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 787efe47d..10535b820 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -88,11 +88,6 @@ int db_stress_tool(int argc, char** argv) { FaultInjectionTestFS* fs = new FaultInjectionTestFS(raw_env->GetFileSystem()); fault_fs_guard.reset(fs); - if (FLAGS_write_fault_one_in) { - fault_fs_guard->SetFilesystemDirectWritable(false); - } else { - fault_fs_guard->SetFilesystemDirectWritable(true); - } fault_env_guard = std::make_shared(raw_env, fault_fs_guard); raw_env = fault_env_guard.get(); diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index ebc252a63..321406a2f 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1568,11 +1568,13 @@ class NonBatchedOpsStressTest : public StressTest { } if (!s.ok()) { fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str()); - thread->shared->SafeTerminate(); - } - - for (size_t i = 0; i < pending_expected_values.size(); ++i) { - pending_expected_values[i].Commit(); + if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + thread->shared->SafeTerminate(); + } + } else { + for (size_t i = 0; i < pending_expected_values.size(); ++i) { + pending_expected_values[i].Commit(); + } } } diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 8b4b36c74..09bb11d1a 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -179,6 +179,7 @@ "max_key_len": 3, "key_len_percent_dist": "1,30,69", "read_fault_one_in": lambda: random.choice([0, 32, 1000]), + "write_fault_one_in": lambda: random.choice([0, 500]), "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]), "open_write_fault_one_in": lambda: random.choice([0, 0, 16]), "open_read_fault_one_in": lambda: random.choice([0, 0, 32]), @@ -374,6 +375,10 @@ def is_direct_io_supported(dbname): # use small value for write_buffer_size so that RocksDB triggers flush # more frequently "write_buffer_size": 1024 * 1024, + # Small write buffer size with more frequent flush has a higher chance + # of hitting write error. DB may be stopped if memtable fills up during + # auto resume. + "write_fault_one_in": 0, "enable_pipelined_write": lambda: random.randint(0, 1), # Snapshots are used heavily in this test mode, while they are incompatible # with compaction filter. @@ -506,6 +511,9 @@ def is_direct_io_supported(dbname): "enable_compaction_filter": 0, "create_timestamped_snapshot_one_in": 50, "sync_fault_injection": 0, + # This test has aggressive flush frequency and small write buffer size. + # Disabling write fault to avoid writes being stopped. + "write_fault_one_in": 0, # PutEntity in transactions is not yet implemented "use_put_entity_one_in": 0, "use_get_entity": 0, @@ -671,7 +679,9 @@ def finalize_and_sanitize(src_params): dest_params["use_full_merge_v1"] = 0 if dest_params["file_checksum_impl"] == "none": dest_params["verify_file_checksums_one_in"] = 0 - + if dest_params["write_fault_one_in"] > 0: + # background work may be disabled while DB is resuming after some error + dest_params["max_write_buffer_number"] = max(dest_params["max_write_buffer_number"], 6) return dest_params diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 8db8be45f..53bbaeb07 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -408,7 +408,7 @@ IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n, scratch, /*need_count_increase=*/true, /*fault_injected=*/nullptr); } if (s.ok() && fs_->ShouldInjectRandomReadError()) { - return IOStatus::IOError("Injected read error"); + return IOStatus::IOError("injected read error"); } return s; } @@ -430,7 +430,7 @@ IOStatus TestFSRandomAccessFile::ReadAsync( } if (ret.ok()) { if (fs_->ShouldInjectRandomReadError()) { - ret = IOStatus::IOError("Injected read error"); + ret = IOStatus::IOError("injected read error"); } else { s = target_->ReadAsync(req, opts, cb, cb_arg, io_handle, del_fn, nullptr); } @@ -470,7 +470,7 @@ IOStatus TestFSRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs, /*fault_injected=*/nullptr); } if (s.ok() && fs_->ShouldInjectRandomReadError()) { - return IOStatus::IOError("Injected read error"); + return IOStatus::IOError("injected read error"); } return s; } @@ -487,7 +487,7 @@ IOStatus TestFSSequentialFile::Read(size_t n, const IOOptions& options, IODebugContext* dbg) { IOStatus s = target()->Read(n, options, result, scratch, dbg); if (s.ok() && fs_->ShouldInjectRandomReadError()) { - return IOStatus::IOError("Injected seq read error"); + return IOStatus::IOError("injected seq read error"); } return s; } @@ -499,7 +499,7 @@ IOStatus TestFSSequentialFile::PositionedRead(uint64_t offset, size_t n, IOStatus s = target()->PositionedRead(offset, n, options, result, scratch, dbg); if (s.ok() && fs_->ShouldInjectRandomReadError()) { - return IOStatus::IOError("Injected seq positioned read error"); + return IOStatus::IOError("injected seq positioned read error"); } return s; } @@ -678,7 +678,7 @@ IOStatus FaultInjectionTestFS::NewRandomAccessFile( return GetError(); } if (ShouldInjectRandomReadError()) { - return IOStatus::IOError("Injected error when open random access file"); + return IOStatus::IOError("injected error when open random access file"); } IOStatus io_s = InjectThreadSpecificReadError(ErrorOperation::kOpen, nullptr, false, nullptr, @@ -701,7 +701,7 @@ IOStatus FaultInjectionTestFS::NewSequentialFile( } if (ShouldInjectRandomReadError()) { - return IOStatus::IOError("Injected read error when creating seq file"); + return IOStatus::IOError("injected read error when creating seq file"); } IOStatus io_s = target()->NewSequentialFile(fname, file_opts, result, dbg); if (io_s.ok()) { @@ -971,15 +971,15 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( if (op != ErrorOperation::kMultiReadSingleReq) { // Likely non-per read status code for MultiRead - ctx->message += "error; "; + ctx->message += "injected read error; "; ret_fault_injected = true; - ret = IOStatus::IOError(); + ret = IOStatus::IOError(ctx->message); } else if (Random::GetTLSInstance()->OneIn(8)) { assert(result); // For a small chance, set the failure to status but turn the // result to be empty, which is supposed to be caught for a check. *result = Slice(); - ctx->message += "inject empty result; "; + ctx->message += "injected empty result; "; ret_fault_injected = true; } else if (!direct_io && Random::GetTLSInstance()->OneIn(7) && scratch != nullptr && result->data() == scratch) { @@ -996,12 +996,12 @@ IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( // It would work for CRC. Not 100% sure for xxhash and will adjust // if it is not the case. const_cast(result->data())[result->size() - 1]++; - ctx->message += "corrupt last byte; "; + ctx->message += "injected corrupt last byte; "; ret_fault_injected = true; } else { - ctx->message += "error result multiget single; "; + ctx->message += "injected error result multiget single; "; ret_fault_injected = true; - ret = IOStatus::IOError(); + ret = IOStatus::IOError(ctx->message); } } if (ctx->retryable) { @@ -1056,7 +1056,7 @@ IOStatus FaultInjectionTestFS::InjectMetadataWriteError() { } } TEST_SYNC_POINT("FaultInjectionTestFS::InjectMetadataWriteError:Injected"); - return IOStatus::IOError(); + return IOStatus::IOError("injected metadata write error"); } void FaultInjectionTestFS::PrintFaultBacktrace() { diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index a481d86af..afd770dde 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -323,8 +323,8 @@ class FaultInjectionTestFS : public FileSystemWrapper { if (!TryParseFileName(file_name, &file_number, &file_type)) { return false; } - return skip_direct_writable_types_.find(file_type) != - skip_direct_writable_types_.end(); + return direct_writable_types_.find(file_type) != + direct_writable_types_.end(); } void SetFilesystemActiveNoLock( bool active, IOStatus error = IOStatus::Corruption("Not active")) { @@ -439,9 +439,9 @@ class FaultInjectionTestFS : public FileSystemWrapper { write_error_allowed_types_ = types; } - void SetSkipDirectWritableTypes(const std::set& types) { + void SetDirectWritableTypes(const std::set& types) { MutexLock l(&mutex_); - skip_direct_writable_types_ = types; + direct_writable_types_ = types; } void SetRandomMetadataWriteError(int one_in) { @@ -583,7 +583,7 @@ class FaultInjectionTestFS : public FileSystemWrapper { bool inject_for_all_file_types_; std::vector write_error_allowed_types_; // File types where direct writable is skipped. - std::set skip_direct_writable_types_; + std::set direct_writable_types_; bool ingest_data_corruption_before_write_; ChecksumType checksum_handoff_func_tpye_; bool fail_get_file_unique_id_; From 20dbf512473b570c62b83258c058eff448779601 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 18 Sep 2023 23:04:38 -0700 Subject: [PATCH 132/386] DB Stress Fix - Commit pending value after checking for status (#11856) Summary: We've seen occasional crash test failure in optimistic transaction DB with the following error message. ``` stderr: WARNING: prefix_size is non-zero but memtablerep != prefix_hash Verification failed for column family 0 key 0000000000001EDB0000000000000036787878787878 (789064): value_from_db: 010000000504070609080B0A0D0C0F0E111013121514171619181B1A1D1C1F1E212023222524272629282B2A2D2C2F2E313033323534373639383B3A3D3C3F3E, value_from_expected: , msg: MultiGet verification: Unexpected value found Crash-recovery verification failed :( No writes or ops? Verification failed :( ``` There was a possibility if the db stress crashes after `pending_expected_value.Commit()` but before `thread->shared->SafeTerminate();`, we may have expected value committed while actual DB value was not. Moving the `pending_expected_value.Commit()` after `s.ok()` check to fix the test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11856 Test Plan: Ran the following in a script with while loop. (It doesn't always repro the issue even without this fix, though..) ``` ./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=0 --allow_data_in_errors=True --async_io=1 --atomic_flush=1 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=0 --block_protection_bytes_per_key=0 --block_size=16384 --bloom_before_level=8 --bloom_bits=11 --bottommost_compression_type=lz4 --bottommost_file_compaction_delay=0 --bytes_per_sync=0 --cache_index_and_filter_blocks=0 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=0 --charge_file_metadata=0 --charge_filter_construction=0 --charge_table_reader=0 --checkpoint_one_in=1000000 --checksum_type=kxxHash --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=4 --compaction_readahead_size=0 --compaction_ttl=100 --compression_checksum=0 --compression_max_dict_buffer_bytes=2097151 --compression_max_dict_bytes=16384 --compression_parallel_threads=1 --compression_type=lz4 --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=0 --db=/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=1048576 --delpercent=5 --delrangepercent=0 --destroy_db_initially=0 --detect_filter_construct_corruption=0 --disable_wal=1 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=1 --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=1 --file_checksum_impl=crc32c --flush_one_in=1000000 --format_version=5 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=3 --index_type=2 --ingest_external_file_one_in=0 --initial_auto_readahead_size=524288 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=0 --lock_wal_one_in=1000000 --long_running_snapshots=0 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=524288 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=25000000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16777216 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=2097152 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0.001 --memtable_protection_bytes_per_key=2 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --occ_lock_bucket_count=500 --occ_validation_policy=0 --open_files=-1 --open_metadata_write_fault_one_in=8 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=1 --pause_background_one_in=1000000 --periodic_compaction_seconds=2 --prefix_size=5 --prefixpercent=5 --prepopulate_block_cache=1 --preserve_internal_time_seconds=36000 --progress_reports=0 --read_fault_one_in=0 --readahead_size=0 --readpercent=45 --recycle_log_file_num=1 --reopen=0 --secondary_cache_fault_one_in=0 --secondary_cache_uri=compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true --set_options_one_in=10000 --share_occ_lock_buckets=0 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --subcompactions=3 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=3 --unpartitioned_pinning=2 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=0 --use_multi_get_entity=0 --use_multiget=1 --use_optimistic_txn=1 --use_put_entity_one_in=0 --use_txn=1 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=zstd --write_buffer_size=4194304 --write_dbid_to_manifest=1 --write_fault_one_in=0 --writepercent=35& ``` ``` pid=$! sleep 10 kill -9 $pid sleep 1 ``` ``` db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_data_in_errors=True --async_io=0 --atomic_flush=1 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=8 --block_protection_bytes_per_key=0 --block_size=16384 --bloom_before_level=2147483647 --bloom_bits=75.01353068032098 --bottommost_compression_type=xpress --bottommost_file_compaction_delay=3600 --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_size=8388608 --cache_type=auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=0 --charge_filter_construction=1 --charge_table_reader=0 --checkpoint_one_in=1000000 --checksum_type=kCRC32c --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=4 --compaction_readahead_size=0 --compaction_ttl=0 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=xpress --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=1 --db=/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=134217728 --delpercent=5 --delrangepercent=0 --destroy_db_initially=0 --detect_filter_construct_corruption=1 --disable_wal=1 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=1 --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=0 --file_checksum_impl=none --flush_one_in=1000000 --format_version=5 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=10 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=524288 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --long_running_snapshots=0 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=524288 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=25000000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=1048576 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=1048576 --memtable_max_range_deletions=100 --memtable_prefix_bloom_size_ratio=0.01 --memtable_protection_bytes_per_key=8 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=2 --mmap_read=0 --mock_direct_io=True --nooverwritepercent=1 --num_file_reads_for_auto_readahead=2 --occ_lock_bucket_count=10 --occ_validation_policy=0 --open_files=-1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=32 --open_write_fault_one_in=16 --ops_per_thread=100000000 --optimize_filters_for_memory=0 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=0 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=1 --prefixpercent=5 --prepopulate_block_cache=0 --preserve_internal_time_seconds=36000 --progress_reports=0 --read_fault_one_in=32 --readahead_size=0 --readpercent=45 --recycle_log_file_num=1 --reopen=0 --secondary_cache_fault_one_in=0 --secondary_cache_uri=compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true --set_options_one_in=10000 --share_occ_lock_buckets=0 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --subcompactions=4 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=1 --unpartitioned_pinning=1 --use_direct_io_for_flush_and_compaction=1 --use_direct_reads=0 --use_full_merge_v1=1 --use_get_entity=0 --use_merge=1 --use_multi_get_entity=0 --use_multiget=1 --use_optimistic_txn=1 --use_put_entity_one_in=0 --use_txn=1 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=0 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --wal_compression=zstd --write_buffer_size=4194304 --write_dbid_to_manifest=1 --write_fault_one_in=0 --writepercent=35 ``` Reviewed By: cbi42 Differential Revision: D49403091 Pulled By: jaykorean fbshipit-source-id: 5ee6136133bbdc46aa733e5101c1f998f658c200 --- db_stress_tool/no_batched_ops_stress.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 321406a2f..92d0c9fca 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1309,8 +1309,6 @@ class NonBatchedOpsStressTest : public StressTest { } } - pending_expected_value.Commit(); - if (!s.ok()) { if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) { @@ -1325,7 +1323,7 @@ class NonBatchedOpsStressTest : public StressTest { thread->shared->SafeTerminate(); } } - + pending_expected_value.Commit(); thread->stats.AddBytesForWrites(1, sz); PrintKeyValue(rand_column_family, static_cast(rand_key), value, sz); @@ -1367,9 +1365,7 @@ class NonBatchedOpsStressTest : public StressTest { return txn.Delete(cfh, key); }); } - pending_expected_value.Commit(); - thread->stats.AddDeletes(1); if (!s.ok()) { if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && @@ -1385,6 +1381,8 @@ class NonBatchedOpsStressTest : public StressTest { thread->shared->SafeTerminate(); } } + pending_expected_value.Commit(); + thread->stats.AddDeletes(1); } else { PendingExpectedValue pending_expected_value = shared->PrepareSingleDelete(rand_column_family, rand_key); @@ -1399,8 +1397,7 @@ class NonBatchedOpsStressTest : public StressTest { return txn.SingleDelete(cfh, key); }); } - pending_expected_value.Commit(); - thread->stats.AddSingleDeletes(1); + if (!s.ok()) { if (FLAGS_inject_error_severity >= 2) { if (!is_db_stopped_ && @@ -1416,6 +1413,8 @@ class NonBatchedOpsStressTest : public StressTest { thread->shared->SafeTerminate(); } } + pending_expected_value.Commit(); + thread->stats.AddSingleDeletes(1); } return s; } From ba5897ada899a3d72887ce764bee108076f694ff Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Tue, 19 Sep 2023 08:33:05 -0700 Subject: [PATCH 133/386] Fix stress test failure due to write fault injections and disable write fault injection (#11859) Summary: This PR contains two fixes: 1. disable write fault injection since it caused several other kinds of internal stress test failures. I'll try to fix those separately before enabling it again. 2. Fix segfault like ``` https://github.com/facebook/rocksdb/issues/5 0x000000000083dc43 in rocksdb::port::Mutex::Lock (this=0x30) at internal_repo_rocksdb/repo/port/port_posix.cc:80 80 internal_repo_rocksdb/repo/port/port_posix.cc: No such file or directory. https://github.com/facebook/rocksdb/issues/6 0x0000000000465142 in rocksdb::MutexLock::MutexLock (mu=0x30, this=) at internal_repo_rocksdb/repo/util/mutexlock.h:37 37 internal_repo_rocksdb/repo/util/mutexlock.h: No such file or directory. https://github.com/facebook/rocksdb/issues/7 rocksdb::FaultInjectionTestFS::DisableWriteErrorInjection (this=0x0) at internal_repo_rocksdb/repo/utilities/fault_injection_fs.h:505 505 internal_repo_rocksdb/repo/utilities/fault_injection_fs.h: No such file or directory. ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11859 Test Plan: db_stress with no fault injection: `./db_stress --write_fault_one_in=0 --read_fault_one_in=0 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --sync_fault_injection=0` Reviewed By: jaykorean Differential Revision: D49408247 Pulled By: cbi42 fbshipit-source-id: 0ca01f20e6e81bf52af77818b50d562ef7462165 --- db_stress_tool/db_stress_test_base.cc | 4 +++- tools/db_crashtest.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index daaa66f92..c79c0e237 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -2693,7 +2693,9 @@ void StressTest::Open(SharedState* shared, bool reopen) { // If this is for DB reopen, write error injection may have been enabled. // Disable it here in case there is no open fault injection. - fault_fs_guard->DisableWriteErrorInjection(); + if (fault_fs_guard) { + fault_fs_guard->DisableWriteErrorInjection(); + } if (!FLAGS_use_txn) { // Determine whether we need to inject file metadata write failures // during DB reopen. If it does, enable it. diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 09bb11d1a..49e9094f1 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -179,7 +179,7 @@ "max_key_len": 3, "key_len_percent_dist": "1,30,69", "read_fault_one_in": lambda: random.choice([0, 32, 1000]), - "write_fault_one_in": lambda: random.choice([0, 500]), + "write_fault_one_in": 0, "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]), "open_write_fault_one_in": lambda: random.choice([0, 0, 16]), "open_read_fault_one_in": lambda: random.choice([0, 0, 32]), @@ -681,7 +681,7 @@ def finalize_and_sanitize(src_params): dest_params["verify_file_checksums_one_in"] = 0 if dest_params["write_fault_one_in"] > 0: # background work may be disabled while DB is resuming after some error - dest_params["max_write_buffer_number"] = max(dest_params["max_write_buffer_number"], 6) + dest_params["max_write_buffer_number"] = max(dest_params["max_write_buffer_number"], 10) return dest_params From 44e4ffd60d5c7020223c3ca1b5c68f85e9d103de Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 19 Sep 2023 11:35:43 -0700 Subject: [PATCH 134/386] Release note for #11738 AutoHCC (#11855) Summary: Neglected in original PR Pull Request resolved: https://github.com/facebook/rocksdb/pull/11855 Test Plan: no code change Reviewed By: jowlyzhang Differential Revision: D49424759 Pulled By: pdillinger fbshipit-source-id: 9a2ef2c0992a65f2a8f04dce9cefe978f8087407 --- unreleased_history/new_features/auto_hcc.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 unreleased_history/new_features/auto_hcc.md diff --git a/unreleased_history/new_features/auto_hcc.md b/unreleased_history/new_features/auto_hcc.md new file mode 100644 index 000000000..f0c83d181 --- /dev/null +++ b/unreleased_history/new_features/auto_hcc.md @@ -0,0 +1 @@ +Added an experimental new "automatic" variant of HyperClockCache that does not require a prior estimate of the average size of cache entries. This variant is activated when HyperClockCacheOptions::estimated\_entry\_charge = 0 and has essentially the same concurrency benefits as the existing HyperClockCache. From cfe0e0b037cfd4067985b19409a077bde799e847 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Tue, 19 Sep 2023 12:23:38 -0700 Subject: [PATCH 135/386] Initialize FaultInjectionTestFS DirectWritable field (#11862) Summary: FaultInjectionTestFS is not directly writable by default. Should set it to direct writable if there is no write fault injection. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11862 Test Plan: internal stress test failure reduces. Reviewed By: jaykorean Differential Revision: D49428108 Pulled By: cbi42 fbshipit-source-id: 5dfa1fbb454272a14f8228a5c496d480d7138ef1 --- db_stress_tool/db_stress_tool.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 10535b820..787efe47d 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -88,6 +88,11 @@ int db_stress_tool(int argc, char** argv) { FaultInjectionTestFS* fs = new FaultInjectionTestFS(raw_env->GetFileSystem()); fault_fs_guard.reset(fs); + if (FLAGS_write_fault_one_in) { + fault_fs_guard->SetFilesystemDirectWritable(false); + } else { + fault_fs_guard->SetFilesystemDirectWritable(true); + } fault_env_guard = std::make_shared(raw_env, fault_fs_guard); raw_env = fault_env_guard.get(); From f42e70bf561d4be9b6bbe7316d1c2c0c8a3818e6 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Tue, 19 Sep 2023 17:27:04 -0700 Subject: [PATCH 136/386] Integrate FullMergeV3 into the query and compaction paths (#11858) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11858 The patch builds on https://github.com/facebook/rocksdb/pull/11807 and integrates the `FullMergeV3` API into the read and compaction code paths by updating and extending the logic in `MergeHelper`. In particular, when it comes to merge inputs, the existing `TimedFullMergeWithEntity` is folded into `TimedFullMerge`, since wide-column base values are now handled the same way as plain base values (or no base values for that matter), e.g. they are passed directly to the `MergeOperator`. On the other hand, there is some new differentiation on the output side. Namely, there are now two sets of `TimedFullMerge` variants: one set for contexts where the complete merge result and its value type are needed (used by iterators and compactions), and another set where the merge result is needed in a form determined by the client (used by the point lookup APIs, where e.g. for `Get` we have to extract the value of the default column of any wide-column results). Implementation-wise, the two sets of overloads use different visitors to process the `std::variant` produced by `FullMergeV3`. This has the benefit of eliminating some repeated code e.g. in the point lookup paths, since `TimedFullMerge` now populates the application's result object (`PinnableSlice`/`string` or `PinnableWideColumns`) directly. Moreover, within each set of variants, there is a separate overload for the no base value/plain base value/wide-column base value cases, which eliminates some repeated branching w/r/t to the type of the base value if any. Reviewed By: jaykorean Differential Revision: D49352562 fbshipit-source-id: c2fb9853dba3fbbc6918665bde4195c4ea150a0c --- db/db_iter.cc | 116 +++-- db/db_iter.h | 10 +- db/memtable.cc | 88 +--- db/merge_helper.cc | 415 +++++++++++++----- db/merge_helper.h | 95 +++- db/version_set.cc | 24 +- db/write_batch.cc | 20 +- include/rocksdb/merge_operator.h | 9 +- table/get_context.cc | 130 ++---- table/get_context.h | 12 +- .../write_batch_with_index.cc | 10 +- .../write_batch_with_index_internal.cc | 141 +++--- .../write_batch_with_index_internal.h | 22 +- 13 files changed, 667 insertions(+), 425 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 7e801135b..3549d5f34 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -238,6 +238,31 @@ bool DBIter::SetValueAndColumnsFromEntity(Slice slice) { return true; } +bool DBIter::SetValueAndColumnsFromMergeResult(const Status& merge_status, + ValueType result_type) { + if (!merge_status.ok()) { + valid_ = false; + status_ = merge_status; + return false; + } + + if (result_type == kTypeWideColumnEntity) { + if (!SetValueAndColumnsFromEntity(saved_value_)) { + assert(!valid_); + return false; + } + + valid_ = true; + return true; + } + + assert(result_type == kTypeValue); + SetValueAndColumnsFromPlain(pinned_value_.data() ? pinned_value_ + : saved_value_); + valid_ = true; + return true; +} + // PRE: saved_key_ has the current user key if skipping_saved_key // POST: saved_key_ should have the next user key if valid_, // if the current entry is a result of merge @@ -554,8 +579,7 @@ bool DBIter::MergeValuesNewToOld() { if (kTypeValue == ikey.type) { // hit a put, merge the put value with operands and store the // final result in saved_value_. We are done! - const Slice val = iter_.value(); - if (!Merge(&val, ikey.user_key)) { + if (!MergeWithPlainBaseValue(iter_.value(), ikey.user_key)) { return false; } // iter_ is positioned after put @@ -584,7 +608,7 @@ bool DBIter::MergeValuesNewToOld() { return false; } valid_ = true; - if (!Merge(&blob_value_, ikey.user_key)) { + if (!MergeWithPlainBaseValue(blob_value_, ikey.user_key)) { return false; } @@ -598,7 +622,7 @@ bool DBIter::MergeValuesNewToOld() { } return true; } else if (kTypeWideColumnEntity == ikey.type) { - if (!MergeEntity(iter_.value(), ikey.user_key)) { + if (!MergeWithWideColumnBaseValue(iter_.value(), ikey.user_key)) { return false; } @@ -628,7 +652,7 @@ bool DBIter::MergeValuesNewToOld() { // a deletion marker. // feed null as the existing value to the merge operator, such that // client can differentiate this scenario and do things accordingly. - if (!Merge(nullptr, saved_key_.GetUserKey())) { + if (!MergeWithNoBaseValue(saved_key_.GetUserKey())) { return false; } assert(status_.ok()); @@ -979,7 +1003,7 @@ bool DBIter::FindValueForCurrentKey() { if (last_not_merge_type == kTypeDeletion || last_not_merge_type == kTypeSingleDeletion || last_not_merge_type == kTypeDeletionWithTimestamp) { - if (!Merge(nullptr, saved_key_.GetUserKey())) { + if (!MergeWithNoBaseValue(saved_key_.GetUserKey())) { return false; } return true; @@ -994,7 +1018,7 @@ bool DBIter::FindValueForCurrentKey() { return false; } valid_ = true; - if (!Merge(&blob_value_, saved_key_.GetUserKey())) { + if (!MergeWithPlainBaseValue(blob_value_, saved_key_.GetUserKey())) { return false; } @@ -1002,14 +1026,15 @@ bool DBIter::FindValueForCurrentKey() { return true; } else if (last_not_merge_type == kTypeWideColumnEntity) { - if (!MergeEntity(pinned_value_, saved_key_.GetUserKey())) { + if (!MergeWithWideColumnBaseValue(pinned_value_, + saved_key_.GetUserKey())) { return false; } return true; } else { assert(last_not_merge_type == kTypeValue); - if (!Merge(&pinned_value_, saved_key_.GetUserKey())) { + if (!MergeWithPlainBaseValue(pinned_value_, saved_key_.GetUserKey())) { return false; } return true; @@ -1185,8 +1210,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() { } if (ikey.type == kTypeValue) { - const Slice val = iter_.value(); - if (!Merge(&val, saved_key_.GetUserKey())) { + if (!MergeWithPlainBaseValue(iter_.value(), saved_key_.GetUserKey())) { return false; } return true; @@ -1205,7 +1229,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() { return false; } valid_ = true; - if (!Merge(&blob_value_, saved_key_.GetUserKey())) { + if (!MergeWithPlainBaseValue(blob_value_, saved_key_.GetUserKey())) { return false; } @@ -1213,7 +1237,8 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() { return true; } else if (ikey.type == kTypeWideColumnEntity) { - if (!MergeEntity(iter_.value(), saved_key_.GetUserKey())) { + if (!MergeWithWideColumnBaseValue(iter_.value(), + saved_key_.GetUserKey())) { return false; } @@ -1227,7 +1252,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() { } } - if (!Merge(nullptr, saved_key_.GetUserKey())) { + if (!MergeWithNoBaseValue(saved_key_.GetUserKey())) { return false; } @@ -1250,47 +1275,42 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() { return true; } -bool DBIter::Merge(const Slice* val, const Slice& user_key) { +bool DBIter::MergeWithNoBaseValue(const Slice& user_key) { // `op_failure_scope` (an output parameter) is not provided (set to nullptr) // since a failure must be propagated regardless of its value. - Status s = MergeHelper::TimedFullMerge( - merge_operator_, user_key, val, merge_context_.GetOperands(), - &saved_value_, logger_, statistics_, clock_, &pinned_value_, - /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); - if (!s.ok()) { - valid_ = false; - status_ = s; - return false; - } - - SetValueAndColumnsFromPlain(pinned_value_.data() ? pinned_value_ - : saved_value_); - - valid_ = true; - return true; + ValueType result_type; + const Status s = MergeHelper::TimedFullMerge( + merge_operator_, user_key, MergeHelper::kNoBaseValue, + merge_context_.GetOperands(), logger_, statistics_, clock_, + /* update_num_ops_stats */ true, &saved_value_, &pinned_value_, + &result_type, /* op_failure_scope */ nullptr); + return SetValueAndColumnsFromMergeResult(s, result_type); } -bool DBIter::MergeEntity(const Slice& entity, const Slice& user_key) { +bool DBIter::MergeWithPlainBaseValue(const Slice& value, + const Slice& user_key) { // `op_failure_scope` (an output parameter) is not provided (set to nullptr) // since a failure must be propagated regardless of its value. - Status s = MergeHelper::TimedFullMergeWithEntity( - merge_operator_, user_key, entity, merge_context_.GetOperands(), - &saved_value_, logger_, statistics_, clock_, - /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); - if (!s.ok()) { - valid_ = false; - status_ = s; - return false; - } - - if (!SetValueAndColumnsFromEntity(saved_value_)) { - return false; - } + ValueType result_type; + const Status s = MergeHelper::TimedFullMerge( + merge_operator_, user_key, MergeHelper::kPlainBaseValue, value, + merge_context_.GetOperands(), logger_, statistics_, clock_, + /* update_num_ops_stats */ true, &saved_value_, &pinned_value_, + &result_type, /* op_failure_scope */ nullptr); + return SetValueAndColumnsFromMergeResult(s, result_type); +} - valid_ = true; - return true; +bool DBIter::MergeWithWideColumnBaseValue(const Slice& entity, + const Slice& user_key) { + // `op_failure_scope` (an output parameter) is not provided (set to nullptr) + // since a failure must be propagated regardless of its value. + ValueType result_type; + const Status s = MergeHelper::TimedFullMerge( + merge_operator_, user_key, MergeHelper::kWideBaseValue, entity, + merge_context_.GetOperands(), logger_, statistics_, clock_, + /* update_num_ops_stats */ true, &saved_value_, &pinned_value_, + &result_type, /* op_failure_scope */ nullptr); + return SetValueAndColumnsFromMergeResult(s, result_type); } // Move backwards until the key smaller than saved_key_. diff --git a/db/db_iter.h b/db/db_iter.h index e45da9dd1..5022405c3 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -313,14 +313,20 @@ class DBIter final : public Iterator { bool SetValueAndColumnsFromEntity(Slice slice); + bool SetValueAndColumnsFromMergeResult(const Status& merge_status, + ValueType result_type); + void ResetValueAndColumns() { value_.clear(); wide_columns_.clear(); } + // The following methods perform the actual merge operation for the + // no base value/plain base value/wide-column base value cases. // If user-defined timestamp is enabled, `user_key` includes timestamp. - bool Merge(const Slice* val, const Slice& user_key); - bool MergeEntity(const Slice& entity, const Slice& user_key); + bool MergeWithNoBaseValue(const Slice& user_key); + bool MergeWithPlainBaseValue(const Slice& value, const Slice& user_key); + bool MergeWithWideColumnBaseValue(const Slice& entity, const Slice& user_key); const SliceTransform* prefix_extractor_; Env* const env_; diff --git a/db/memtable.cc b/db/memtable.cc index 8a71a6494..630d35fed 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -1054,25 +1054,15 @@ static bool SaveValue(void* arg, const char* entry) { assert(s->do_merge); if (s->value || s->columns) { - std::string result; // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its // value. *(s->status) = MergeHelper::TimedFullMerge( - merge_operator, s->key->user_key(), &v, - merge_context->GetOperands(), &result, s->logger, s->statistics, - s->clock, /* result_operand */ nullptr, - /* update_num_ops_stats */ true, + merge_operator, s->key->user_key(), + MergeHelper::kPlainBaseValue, v, merge_context->GetOperands(), + s->logger, s->statistics, s->clock, + /* update_num_ops_stats */ true, s->value, s->columns, /* op_failure_scope */ nullptr); - - if (s->status->ok()) { - if (s->value) { - *(s->value) = std::move(result); - } else { - assert(s->columns); - s->columns->SetPlainValue(std::move(result)); - } - } } } else if (s->value) { s->value->assign(v.data(), v.size()); @@ -1117,35 +1107,15 @@ static bool SaveValue(void* arg, const char* entry) { } else if (*(s->merge_in_progress)) { assert(s->do_merge); - if (s->value) { - Slice value_of_default; - *(s->status) = WideColumnSerialization::GetValueOfDefaultColumn( - v, value_of_default); - if (s->status->ok()) { - // `op_failure_scope` (an output parameter) is not provided (set - // to nullptr) since a failure must be propagated regardless of - // its value. - *(s->status) = MergeHelper::TimedFullMerge( - merge_operator, s->key->user_key(), &value_of_default, - merge_context->GetOperands(), s->value, s->logger, - s->statistics, s->clock, /* result_operand */ nullptr, - /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); - } - } else if (s->columns) { - std::string result; - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its - // value. - *(s->status) = MergeHelper::TimedFullMergeWithEntity( - merge_operator, s->key->user_key(), v, - merge_context->GetOperands(), &result, s->logger, s->statistics, - s->clock, /* update_num_ops_stats */ true, + if (s->value || s->columns) { + // `op_failure_scope` (an output parameter) is not provided (set + // to nullptr) since a failure must be propagated regardless of + // its value. + *(s->status) = MergeHelper::TimedFullMerge( + merge_operator, s->key->user_key(), MergeHelper::kWideBaseValue, + v, merge_context->GetOperands(), s->logger, s->statistics, + s->clock, /* update_num_ops_stats */ true, s->value, s->columns, /* op_failure_scope */ nullptr); - - if (s->status->ok()) { - *(s->status) = s->columns->SetWideColumnValue(std::move(result)); - } } } else if (s->value) { Slice value_of_default; @@ -1176,25 +1146,14 @@ static bool SaveValue(void* arg, const char* entry) { case kTypeRangeDeletion: { if (*(s->merge_in_progress)) { if (s->value || s->columns) { - std::string result; // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its // value. *(s->status) = MergeHelper::TimedFullMerge( - merge_operator, s->key->user_key(), nullptr, - merge_context->GetOperands(), &result, s->logger, s->statistics, - s->clock, /* result_operand */ nullptr, - /* update_num_ops_stats */ true, + merge_operator, s->key->user_key(), MergeHelper::kNoBaseValue, + merge_context->GetOperands(), s->logger, s->statistics, + s->clock, /* update_num_ops_stats */ true, s->value, s->columns, /* op_failure_scope */ nullptr); - - if (s->status->ok()) { - if (s->value) { - *(s->value) = std::move(result); - } else { - assert(s->columns); - s->columns->SetPlainValue(std::move(result)); - } - } } else { // We have found a final value (a base deletion) and have newer // merge operands that we do not intend to merge. Nothing remains @@ -1227,25 +1186,14 @@ static bool SaveValue(void* arg, const char* entry) { if (s->do_merge && merge_operator->ShouldMerge( merge_context->GetOperandsDirectionBackward())) { if (s->value || s->columns) { - std::string result; // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its // value. *(s->status) = MergeHelper::TimedFullMerge( - merge_operator, s->key->user_key(), nullptr, - merge_context->GetOperands(), &result, s->logger, s->statistics, - s->clock, /* result_operand */ nullptr, - /* update_num_ops_stats */ true, + merge_operator, s->key->user_key(), MergeHelper::kNoBaseValue, + merge_context->GetOperands(), s->logger, s->statistics, + s->clock, /* update_num_ops_stats */ true, s->value, s->columns, /* op_failure_scope */ nullptr); - - if (s->status->ok()) { - if (s->value) { - *(s->value) = std::move(result); - } else { - assert(s->columns); - s->columns->SetPlainValue(std::move(result)); - } - } } *(s->found_final_value) = true; diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 8c7e3d441..9d212fc51 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -24,6 +24,7 @@ #include "rocksdb/system_clock.h" #include "table/format.h" #include "table/internal_iterator.h" +#include "util/overload.h" namespace ROCKSDB_NAMESPACE { @@ -57,120 +58,297 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator, } } -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, const Slice* value, - const std::vector& operands, std::string* result, Logger* logger, - Statistics* statistics, SystemClock* clock, Slice* result_operand, - bool update_num_ops_stats, - MergeOperator::OpFailureScope* op_failure_scope) { - assert(merge_operator != nullptr); - - if (operands.empty()) { - assert(value != nullptr && result != nullptr); - result->assign(value->data(), value->size()); - return Status::OK(); - } +template +Status MergeHelper::TimedFullMergeCommonImpl( + const MergeOperator* merge_operator, const Slice& key, + MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, + const std::vector& operands, Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, + MergeOperator::OpFailureScope* op_failure_scope, Visitor&& visitor) { + assert(merge_operator); + assert(!operands.empty()); if (update_num_ops_stats) { RecordInHistogram(statistics, READ_NUM_MERGE_OPERANDS, static_cast(operands.size())); } + const MergeOperator::MergeOperationInputV3 merge_in( + key, std::move(existing_value), operands, logger); + MergeOperator::MergeOperationOutputV3 merge_out; + bool success = false; - Slice tmp_result_operand(nullptr, 0); - const MergeOperator::MergeOperationInput merge_in(key, value, operands, - logger); - MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand); + { - // Setup to time the merge StopWatchNano timer(clock, statistics != nullptr); PERF_TIMER_GUARD(merge_operator_time_nanos); - // Do the merge - success = merge_operator->FullMergeV2(merge_in, &merge_out); - - if (tmp_result_operand.data()) { - // FullMergeV2 result is an existing operand - if (result_operand != nullptr) { - *result_operand = tmp_result_operand; - } else { - result->assign(tmp_result_operand.data(), tmp_result_operand.size()); - } - } else if (result_operand) { - *result_operand = Slice(nullptr, 0); - } + success = merge_operator->FullMergeV3(merge_in, &merge_out); RecordTick(statistics, MERGE_OPERATION_TOTAL_TIME, statistics ? timer.ElapsedNanos() : 0); } - if (op_failure_scope != nullptr) { - *op_failure_scope = merge_out.op_failure_scope; - // Apply default per merge_operator.h - if (*op_failure_scope == MergeOperator::OpFailureScope::kDefault) { - *op_failure_scope = MergeOperator::OpFailureScope::kTryMerge; - } - } - if (!success) { RecordTick(statistics, NUMBER_MERGE_FAILURES); + + if (op_failure_scope) { + *op_failure_scope = merge_out.op_failure_scope; + // Apply default per merge_operator.h + if (*op_failure_scope == MergeOperator::OpFailureScope::kDefault) { + *op_failure_scope = MergeOperator::OpFailureScope::kTryMerge; + } + } + return Status::Corruption(Status::SubCode::kMergeOperatorFailed); } - return Status::OK(); + return std::visit(std::forward(visitor), + std::move(merge_out.new_value)); } -Status MergeHelper::TimedFullMergeWithEntity( - const MergeOperator* merge_operator, const Slice& key, Slice base_entity, - const std::vector& operands, std::string* result, Logger* logger, +Status MergeHelper::TimedFullMergeImpl( + const MergeOperator* merge_operator, const Slice& key, + MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, + const std::vector& operands, Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, std::string* result, + Slice* result_operand, ValueType* result_type, + MergeOperator::OpFailureScope* op_failure_scope) { + assert(result); + assert(result_type); + + auto visitor = overload{ + [&](std::string&& new_value) -> Status { + *result_type = kTypeValue; + + if (result_operand) { + *result_operand = Slice(nullptr, 0); + } + + *result = std::move(new_value); + + return Status::OK(); + }, + [&](MergeOperator::MergeOperationOutputV3::NewColumns&& new_columns) + -> Status { + *result_type = kTypeWideColumnEntity; + + if (result_operand) { + *result_operand = Slice(nullptr, 0); + } + + result->clear(); + + WideColumns sorted_columns; + sorted_columns.reserve(new_columns.size()); + + for (const auto& column : new_columns) { + sorted_columns.emplace_back(column.first, column.second); + } + + WideColumnsHelper::SortColumns(sorted_columns); + + return WideColumnSerialization::Serialize(sorted_columns, *result); + }, + [&](Slice&& operand) -> Status { + *result_type = kTypeValue; + + if (result_operand) { + *result_operand = operand; + result->clear(); + } else { + result->assign(operand.data(), operand.size()); + } + + return Status::OK(); + }}; + + return TimedFullMergeCommonImpl(merge_operator, key, + std::move(existing_value), operands, logger, + statistics, clock, update_num_ops_stats, + op_failure_scope, std::move(visitor)); +} + +Status MergeHelper::TimedFullMergeImpl( + const MergeOperator* merge_operator, const Slice& key, + MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, + const std::vector& operands, Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, std::string* result_value, + PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope) { + assert(result_value || result_entity); + assert(!result_value || !result_entity); + + auto visitor = overload{ + [&](std::string&& new_value) -> Status { + if (result_value) { + *result_value = std::move(new_value); + + return Status::OK(); + } + + assert(result_entity); + result_entity->SetPlainValue(std::move(new_value)); + + return Status::OK(); + }, + [&](MergeOperator::MergeOperationOutputV3::NewColumns&& new_columns) + -> Status { + if (result_value) { + if (!new_columns.empty() && + new_columns.front().first == kDefaultWideColumnName) { + *result_value = std::move(new_columns.front().second); + } else { + result_value->clear(); + } + + return Status::OK(); + } + + assert(result_entity); + + WideColumns sorted_columns; + sorted_columns.reserve(new_columns.size()); + + for (const auto& column : new_columns) { + sorted_columns.emplace_back(column.first, column.second); + } + + WideColumnsHelper::SortColumns(sorted_columns); + + std::string result; + const Status s = + WideColumnSerialization::Serialize(sorted_columns, result); + if (!s.ok()) { + result_entity->Reset(); + return s; + } + + return result_entity->SetWideColumnValue(std::move(result)); + }, + [&](Slice&& operand) -> Status { + if (result_value) { + result_value->assign(operand.data(), operand.size()); + + return Status::OK(); + } + + assert(result_entity); + result_entity->SetPlainValue(operand); + + return Status::OK(); + }}; + + return TimedFullMergeCommonImpl(merge_operator, key, + std::move(existing_value), operands, logger, + statistics, clock, update_num_ops_stats, + op_failure_scope, std::move(visitor)); +} + +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, NoBaseValueTag, + const std::vector& operands, Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, std::string* result, + Slice* result_operand, ValueType* result_type, + MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; + + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result, result_operand, + result_type, op_failure_scope); +} + +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, + const Slice& value, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result, Slice* result_operand, ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope) { - WideColumns base_columns; + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(value); - { - const Status s = - WideColumnSerialization::Deserialize(base_entity, base_columns); - if (!s.ok()) { - return s; - } - } + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result, result_operand, + result_type, op_failure_scope); +} + +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const Slice& entity, const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result, Slice* result_operand, ValueType* result_type, + MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - const bool has_default_column = - WideColumnsHelper::HasDefaultColumn(base_columns); + Slice entity_copy(entity); + WideColumns existing_columns; - Slice value_of_default; - if (has_default_column) { - value_of_default = base_columns[0].value(); + const Status s = + WideColumnSerialization::Deserialize(entity_copy, existing_columns); + if (!s.ok()) { + return s; } - std::string merge_result; + existing_value = std::move(existing_columns); - { - const Status s = TimedFullMerge(merge_operator, key, &value_of_default, - operands, &merge_result, logger, statistics, - clock, nullptr /* result_operand */, - update_num_ops_stats, op_failure_scope); - if (!s.ok()) { - return s; - } - } + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result, result_operand, + result_type, op_failure_scope); +} - if (has_default_column) { - base_columns[0].value() = merge_result; +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, NoBaseValueTag, + const std::vector& operands, Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, std::string* result_value, + PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - const Status s = WideColumnSerialization::Serialize(base_columns, *result); - if (!s.ok()) { - return s; - } - } else { - const Status s = - WideColumnSerialization::Serialize(merge_result, base_columns, *result); - if (!s.ok()) { - return s; - } + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result_value, result_entity, + op_failure_scope); +} + +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, + const Slice& value, const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(value); + + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result_value, result_entity, + op_failure_scope); +} + +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const Slice& entity, const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; + + Slice entity_copy(entity); + WideColumns existing_columns; + + const Status s = + WideColumnSerialization::Deserialize(entity_copy, existing_columns); + if (!s.ok()) { + return s; } - return Status::OK(); + existing_value = std::move(existing_columns); + + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result_value, result_entity, + op_failure_scope); } // PRE: iter points to the first merge type entry @@ -288,7 +466,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, // hit a put/delete/single delete // => merge the put value or a nullptr with operands_ // => store result in operands_.back() (and update keys_.back()) - // => change the entry type to kTypeValue for keys_.back() + // => change the entry type for keys_.back() // We are done! Success! // If there are no operands, just return the Status::OK(). That will cause @@ -301,24 +479,23 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, // TODO: if we're in compaction and it's a put, it would be nice to run // compaction filter on it. std::string merge_result; + ValueType merge_result_type; MergeOperator::OpFailureScope op_failure_scope; if (range_del_agg && range_del_agg->ShouldDelete( ikey, RangeDelPositioningMode::kForwardTraversal)) { - s = TimedFullMerge(user_merge_operator_, ikey.user_key, nullptr, - merge_context_.GetOperands(), &merge_result, logger_, - stats_, clock_, - /* result_operand */ nullptr, - /* update_num_ops_stats */ false, &op_failure_scope); + s = TimedFullMerge(user_merge_operator_, ikey.user_key, kNoBaseValue, + merge_context_.GetOperands(), logger_, stats_, + clock_, /* update_num_ops_stats */ false, + &merge_result, /* result_operand */ nullptr, + &merge_result_type, &op_failure_scope); } else if (ikey.type == kTypeValue) { - const Slice val = iter->value(); - - s = TimedFullMerge(user_merge_operator_, ikey.user_key, &val, - merge_context_.GetOperands(), &merge_result, logger_, - stats_, clock_, - /* result_operand */ nullptr, - /* update_num_ops_stats */ false, &op_failure_scope); + s = TimedFullMerge(user_merge_operator_, ikey.user_key, kPlainBaseValue, + iter->value(), merge_context_.GetOperands(), logger_, + stats_, clock_, /* update_num_ops_stats */ false, + &merge_result, /* result_operand */ nullptr, + &merge_result_type, &op_failure_scope); } else if (ikey.type == kTypeBlobIndex) { BlobIndex blob_index; @@ -348,22 +525,23 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, c_iter_stats->total_blob_bytes_read += bytes_read; } - s = TimedFullMerge(user_merge_operator_, ikey.user_key, &blob_value, - merge_context_.GetOperands(), &merge_result, logger_, - stats_, clock_, - /* result_operand */ nullptr, - /* update_num_ops_stats */ false, &op_failure_scope); + s = TimedFullMerge(user_merge_operator_, ikey.user_key, kPlainBaseValue, + blob_value, merge_context_.GetOperands(), logger_, + stats_, clock_, /* update_num_ops_stats */ false, + &merge_result, /* result_operand */ nullptr, + &merge_result_type, &op_failure_scope); } else if (ikey.type == kTypeWideColumnEntity) { - s = TimedFullMergeWithEntity( - user_merge_operator_, ikey.user_key, iter->value(), - merge_context_.GetOperands(), &merge_result, logger_, stats_, - clock_, /* update_num_ops_stats */ false, &op_failure_scope); + s = TimedFullMerge(user_merge_operator_, ikey.user_key, kWideBaseValue, + iter->value(), merge_context_.GetOperands(), logger_, + stats_, clock_, /* update_num_ops_stats */ false, + &merge_result, /* result_operand */ nullptr, + &merge_result_type, &op_failure_scope); } else { - s = TimedFullMerge(user_merge_operator_, ikey.user_key, nullptr, - merge_context_.GetOperands(), &merge_result, logger_, - stats_, clock_, - /* result_operand */ nullptr, - /* update_num_ops_stats */ false, &op_failure_scope); + s = TimedFullMerge(user_merge_operator_, ikey.user_key, kNoBaseValue, + merge_context_.GetOperands(), logger_, stats_, + clock_, /* update_num_ops_stats */ false, + &merge_result, /* result_operand */ nullptr, + &merge_result_type, &op_failure_scope); } // We store the result in keys_.back() and operands_.back() @@ -371,10 +549,12 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, if (s.ok()) { // The original key encountered original_key = std::move(keys_.back()); - orig_ikey.type = ikey.type == kTypeWideColumnEntity - ? kTypeWideColumnEntity - : kTypeValue; + + assert(merge_result_type == kTypeValue || + merge_result_type == kTypeWideColumnEntity); + orig_ikey.type = merge_result_type; UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); + keys_.clear(); merge_context_.Clear(); keys_.emplace_front(std::move(original_key)); @@ -499,19 +679,24 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, assert(merge_context_.GetNumOperands() >= 1); assert(merge_context_.GetNumOperands() == keys_.size()); std::string merge_result; + ValueType merge_result_type; MergeOperator::OpFailureScope op_failure_scope; - s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr, - merge_context_.GetOperands(), &merge_result, logger_, - stats_, clock_, - /* result_operand */ nullptr, - /* update_num_ops_stats */ false, &op_failure_scope); + s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, kNoBaseValue, + merge_context_.GetOperands(), logger_, stats_, clock_, + /* update_num_ops_stats */ false, &merge_result, + /* result_operand */ nullptr, &merge_result_type, + &op_failure_scope); if (s.ok()) { // The original key encountered // We are certain that keys_ is not empty here (see assertions couple of // lines before). original_key = std::move(keys_.back()); - orig_ikey.type = kTypeValue; + + assert(merge_result_type == kTypeValue || + merge_result_type == kTypeWideColumnEntity); + orig_ikey.type = merge_result_type; UpdateInternalKey(&original_key, orig_ikey.sequence, orig_ikey.type); + keys_.clear(); merge_context_.Clear(); keys_.emplace_front(std::move(original_key)); diff --git a/db/merge_helper.h b/db/merge_helper.h index 7f624b743..93267c9a9 100644 --- a/db/merge_helper.h +++ b/db/merge_helper.h @@ -41,28 +41,75 @@ class MergeHelper { Statistics* stats = nullptr, const std::atomic* shutting_down = nullptr); - // Wrapper around MergeOperator::FullMergeV2() that records perf statistics. - // Result of merge will be written to result if status returned is OK. - // If operands is empty, the value will simply be copied to result. - // Set `update_num_ops_stats` to true if it is from a user read, so that - // the latency is sensitive. + // Wrappers around MergeOperator::FullMergeV3() that record perf statistics. + // Set `update_num_ops_stats` to true if it is from a user read so that + // the corresponding statistics are updated. // Returns one of the following statuses: // - OK: Entries were successfully merged. // - Corruption: Merge operator reported unsuccessful merge. The scope of the // damage will be stored in `*op_failure_scope` when `op_failure_scope` is // not nullptr + + // Empty tag types to disambiguate overloads + struct NoBaseValueTag {}; + static constexpr NoBaseValueTag kNoBaseValue{}; + + struct PlainBaseValueTag {}; + static constexpr PlainBaseValueTag kPlainBaseValue{}; + + struct WideBaseValueTag {}; + static constexpr WideBaseValueTag kWideBaseValue{}; + + // Variants that expose the merge result directly (in serialized form for wide + // columns) as well as its value type. Used by iterator and compaction. static Status TimedFullMerge(const MergeOperator* merge_operator, - const Slice& key, const Slice* value, + const Slice& key, NoBaseValueTag, const std::vector& operands, - std::string* result, Logger* logger, - Statistics* statistics, SystemClock* clock, - Slice* result_operand, bool update_num_ops_stats, + Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, + std::string* result, Slice* result_operand, + ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope); - static Status TimedFullMergeWithEntity( - const MergeOperator* merge_operator, const Slice& key, Slice base_entity, - const std::vector& operands, std::string* result, Logger* logger, + static Status TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, + const Slice& value, const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result, Slice* result_operand, ValueType* result_type, + MergeOperator::OpFailureScope* op_failure_scope); + + static Status TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const Slice& entity, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result, Slice* result_operand, ValueType* result_type, + MergeOperator::OpFailureScope* op_failure_scope); + + // Variants that expose the merge result translated to the form requested by + // the client. (For example, if the result is a wide-column structure but the + // client requested the results in plain-value form, the value of the default + // column is returned.) Used by point lookups. + static Status TimedFullMerge(const MergeOperator* merge_operator, + const Slice& key, NoBaseValueTag, + const std::vector& operands, + Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, + PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope); + + static Status TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, + const Slice& value, const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope); + + static Status TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const Slice& entity, const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, PinnableWideColumns* result_entity, MergeOperator::OpFailureScope* op_failure_scope); // During compaction, merge entries until we hit @@ -198,6 +245,30 @@ class MergeHelper { // This is a best-effort facility, so memory_order_relaxed is sufficient. return shutting_down_ && shutting_down_->load(std::memory_order_relaxed); } + + template + static Status TimedFullMergeCommonImpl( + const MergeOperator* merge_operator, const Slice& key, + MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, + const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + MergeOperator::OpFailureScope* op_failure_scope, Visitor&& visitor); + + static Status TimedFullMergeImpl( + const MergeOperator* merge_operator, const Slice& key, + MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, + const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result, Slice* result_operand, ValueType* result_type, + MergeOperator::OpFailureScope* op_failure_scope); + + static Status TimedFullMergeImpl( + const MergeOperator* merge_operator, const Slice& key, + MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, + const std::vector& operands, Logger* logger, + Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope); }; // MergeOutputIterator can be used to iterate over the result of a merge. diff --git a/db/version_set.cc b/db/version_set.cc index ef6d30944..7b20adedc 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2527,21 +2527,16 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; if (value || columns) { - std::string result; // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its value. *status = MergeHelper::TimedFullMerge( - merge_operator_, user_key, nullptr, merge_context->GetOperands(), - &result, info_log_, db_statistics_, clock_, - /* result_operand */ nullptr, /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); + merge_operator_, user_key, MergeHelper::kNoBaseValue, + merge_context->GetOperands(), info_log_, db_statistics_, clock_, + /* update_num_ops_stats */ true, value ? value->GetSelf() : nullptr, + columns, /* op_failure_scope */ nullptr); if (status->ok()) { if (LIKELY(value != nullptr)) { - *(value->GetSelf()) = std::move(result); value->PinSelf(); - } else { - assert(columns != nullptr); - columns->SetPlainValue(std::move(result)); } } } @@ -2778,22 +2773,19 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, } // merge_operands are in saver and we hit the beginning of the key history // do a final merge of nullptr and operands; - std::string result; - // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its value. *status = MergeHelper::TimedFullMerge( - merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(), - &result, info_log_, db_statistics_, clock_, - /* result_operand */ nullptr, /* update_num_ops_stats */ true, + merge_operator_, user_key, MergeHelper::kNoBaseValue, + iter->merge_context.GetOperands(), info_log_, db_statistics_, clock_, + /* update_num_ops_stats */ true, + iter->value ? iter->value->GetSelf() : nullptr, iter->columns, /* op_failure_scope */ nullptr); if (LIKELY(iter->value != nullptr)) { - *iter->value->GetSelf() = std::move(result); iter->value->PinSelf(); range->AddValueSize(iter->value->size()); } else { assert(iter->columns); - iter->columns->SetPlainValue(std::move(result)); range->AddValueSize(iter->columns->serialized_size()); } diff --git a/db/write_batch.cc b/db/write_batch.cc index 2851b8559..17ccca2fa 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -2483,6 +2483,8 @@ class MemTableInserter : public WriteBatch::Handler { } if (perform_merge) { + // TODO: support wide-column base values for max_successive_merges + // 1) Get the existing value std::string get_value; @@ -2510,13 +2512,15 @@ class MemTableInserter : public WriteBatch::Handler { assert(merge_operator); std::string new_value; + ValueType new_value_type; // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its value. Status merge_status = MergeHelper::TimedFullMerge( - merge_operator, key, &get_value_slice, {value}, &new_value, - moptions->info_log, moptions->statistics, - SystemClock::Default().get(), /* result_operand */ nullptr, - /* update_num_ops_stats */ false, + merge_operator, key, MergeHelper::kPlainBaseValue, get_value_slice, + {value}, moptions->info_log, moptions->statistics, + SystemClock::Default().get(), + /* update_num_ops_stats */ false, &new_value, + /* result_operand */ nullptr, &new_value_type, /* op_failure_scope */ nullptr); if (!merge_status.ok()) { @@ -2530,11 +2534,13 @@ class MemTableInserter : public WriteBatch::Handler { auto merged_kv_prot_info = kv_prot_info->StripC(column_family_id).ProtectS(sequence_); merged_kv_prot_info.UpdateV(value, new_value); - merged_kv_prot_info.UpdateO(kTypeMerge, kTypeValue); - ret_status = mem->Add(sequence_, kTypeValue, key, new_value, + assert(new_value_type == kTypeValue || + new_value_type == kTypeWideColumnEntity); + merged_kv_prot_info.UpdateO(kTypeMerge, new_value_type); + ret_status = mem->Add(sequence_, new_value_type, key, new_value, &merged_kv_prot_info); } else { - ret_status = mem->Add(sequence_, kTypeValue, key, new_value, + ret_status = mem->Add(sequence_, new_value_type, key, new_value, nullptr /* kv_prot_info */); } } diff --git a/include/rocksdb/merge_operator.h b/include/rocksdb/merge_operator.h index 4db9380b6..6be9e3962 100644 --- a/include/rocksdb/merge_operator.h +++ b/include/rocksdb/merge_operator.h @@ -36,7 +36,7 @@ class Logger; // into rocksdb); numeric addition and string concatenation are examples; // // b) MergeOperator - the generic class for all the more abstract / complex -// operations; one method (FullMergeV2) to merge a Put/Delete value with a +// operations; one method (FullMergeV3) to merge a Put/Delete value with a // merge operand; and another method (PartialMerge) that merges multiple // operands together. this is especially useful if your key values have // complex structures but you would still like to support client-specific @@ -198,7 +198,6 @@ class MergeOperator : public Customizable { OpFailureScope op_failure_scope = OpFailureScope::kDefault; }; - // ************************** UNDER CONSTRUCTION ***************************** // An extended version of FullMergeV2() that supports wide columns on both the // input and the output side, enabling the application to perform general // transformations during merges. For backward compatibility, the default @@ -238,7 +237,7 @@ class MergeOperator : public Customizable { // TODO: Presently there is no way to differentiate between error/corruption // and simply "return false". For now, the client should simply return // false in any case it cannot perform partial-merge, regardless of reason. - // If there is corruption in the data, handle it in the FullMergeV2() function + // If there is corruption in the data, handle it in the FullMergeV3() function // and return false there. The default implementation of PartialMerge will // always return false. virtual bool PartialMerge(const Slice& /*key*/, const Slice& /*left_operand*/, @@ -295,8 +294,8 @@ class MergeOperator : public Customizable { // Doesn't help with iterators. // // Note: the merge operands are passed to this function in the reversed order - // relative to how they were merged (passed to FullMerge or FullMergeV2) - // for performance reasons, see also: + // relative to how they were merged (passed to + // FullMerge/FullMergeV2/FullMergeV3) for performance reasons, see also: // https://github.com/facebook/rocksdb/issues/3865 virtual bool ShouldMerge(const std::vector& /*operands*/) const { return false; diff --git a/table/get_context.cc b/table/get_context.cc index 8f5cd75f1..ada7a3539 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -374,7 +374,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, Slice blob_value(pin_val); state_ = kFound; if (do_merge_) { - Merge(&blob_value); + MergeWithPlainBaseValue(blob_value); } else { // It means this function is called as part of DB GetMergeOperands // API and the current value should be part of @@ -385,7 +385,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, state_ = kFound; if (do_merge_) { - MergeWithEntity(value); + MergeWithWideColumnBaseValue(value); } else { // It means this function is called as part of DB GetMergeOperands // API and the current value should be part of @@ -407,7 +407,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, state_ = kFound; if (do_merge_) { - Merge(&value); + MergeWithPlainBaseValue(value); } else { // It means this function is called as part of DB GetMergeOperands // API and the current value should be part of @@ -430,7 +430,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, } else if (kMerge == state_) { state_ = kFound; if (do_merge_) { - Merge(nullptr); + MergeWithNoBaseValue(); } // If do_merge_ = false then the current value shouldn't be part of // merge_context_->operand_list @@ -448,7 +448,7 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, merge_operator_->ShouldMerge( merge_context_->GetOperandsDirectionBackward())) { state_ = kFound; - Merge(nullptr); + MergeWithNoBaseValue(); return false; } return true; @@ -463,20 +463,9 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, return false; } -void GetContext::Merge(const Slice* value) { - assert(do_merge_); - assert(!pinnable_val_ || !columns_); - - std::string result; - // `op_failure_scope` (an output parameter) is not provided (set to nullptr) - // since a failure must be propagated regardless of its value. - const Status s = MergeHelper::TimedFullMerge( - merge_operator_, user_key_, value, merge_context_->GetOperands(), &result, - logger_, statistics_, clock_, /* result_operand */ nullptr, - /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); - if (!s.ok()) { - if (s.subcode() == Status::SubCode::kMergeOperatorFailed) { +void GetContext::PostprocessMerge(const Status& merge_status) { + if (!merge_status.ok()) { + if (merge_status.subcode() == Status::SubCode::kMergeOperatorFailed) { state_ = kMergeOperatorFailed; } else { state_ = kCorrupt; @@ -485,81 +474,56 @@ void GetContext::Merge(const Slice* value) { } if (LIKELY(pinnable_val_ != nullptr)) { - *(pinnable_val_->GetSelf()) = std::move(result); pinnable_val_->PinSelf(); - return; } - - assert(columns_); - columns_->SetPlainValue(std::move(result)); } -void GetContext::MergeWithEntity(Slice entity) { +void GetContext::MergeWithNoBaseValue() { assert(do_merge_); + assert(pinnable_val_ || columns_); assert(!pinnable_val_ || !columns_); - if (LIKELY(pinnable_val_ != nullptr)) { - Slice value_of_default; - - { - const Status s = WideColumnSerialization::GetValueOfDefaultColumn( - entity, value_of_default); - if (!s.ok()) { - state_ = kCorrupt; - return; - } - } + // `op_failure_scope` (an output parameter) is not provided (set to nullptr) + // since a failure must be propagated regardless of its value. + const Status s = MergeHelper::TimedFullMerge( + merge_operator_, user_key_, MergeHelper::kNoBaseValue, + merge_context_->GetOperands(), logger_, statistics_, clock_, + /* update_num_ops_stats */ true, + pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_, + /* op_failure_scope */ nullptr); + PostprocessMerge(s); +} - { - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - const Status s = MergeHelper::TimedFullMerge( - merge_operator_, user_key_, &value_of_default, - merge_context_->GetOperands(), pinnable_val_->GetSelf(), logger_, - statistics_, clock_, /* result_operand */ nullptr, - /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); - if (!s.ok()) { - if (s.subcode() == Status::SubCode::kMergeOperatorFailed) { - state_ = kMergeOperatorFailed; - } else { - state_ = kCorrupt; - } - return; - } - } +void GetContext::MergeWithPlainBaseValue(const Slice& value) { + assert(do_merge_); + assert(pinnable_val_ || columns_); + assert(!pinnable_val_ || !columns_); - pinnable_val_->PinSelf(); - return; - } + // `op_failure_scope` (an output parameter) is not provided (set to nullptr) + // since a failure must be propagated regardless of its value. + const Status s = MergeHelper::TimedFullMerge( + merge_operator_, user_key_, MergeHelper::kPlainBaseValue, value, + merge_context_->GetOperands(), logger_, statistics_, clock_, + /* update_num_ops_stats */ true, + pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_, + /* op_failure_scope */ nullptr); + PostprocessMerge(s); +} - std::string result; - - { - // `op_failure_scope` (an output parameter) is not provided (set to nullptr) - // since a failure must be propagated regardless of its value. - const Status s = MergeHelper::TimedFullMergeWithEntity( - merge_operator_, user_key_, entity, merge_context_->GetOperands(), - &result, logger_, statistics_, clock_, /* update_num_ops_stats */ true, - /* op_failure_scope */ nullptr); - if (!s.ok()) { - if (s.subcode() == Status::SubCode::kMergeOperatorFailed) { - state_ = kMergeOperatorFailed; - } else { - state_ = kCorrupt; - } - return; - } - } +void GetContext::MergeWithWideColumnBaseValue(const Slice& entity) { + assert(do_merge_); + assert(pinnable_val_ || columns_); + assert(!pinnable_val_ || !columns_); - { - assert(columns_); - const Status s = columns_->SetWideColumnValue(std::move(result)); - if (!s.ok()) { - state_ = kCorrupt; - return; - } - } + // `op_failure_scope` (an output parameter) is not provided (set to nullptr) + // since a failure must be propagated regardless of its value. + const Status s = MergeHelper::TimedFullMerge( + merge_operator_, user_key_, MergeHelper::kWideBaseValue, entity, + merge_context_->GetOperands(), logger_, statistics_, clock_, + /* update_num_ops_stats */ true, + pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_, + /* op_failure_scope */ nullptr); + PostprocessMerge(s); } bool GetContext::GetBlobValue(const Slice& user_key, const Slice& blob_index, diff --git a/table/get_context.h b/table/get_context.h index 528cd14fd..9bff27503 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -191,8 +191,16 @@ class GetContext { void push_operand(const Slice& value, Cleanable* value_pinner); private: - void Merge(const Slice* value); - void MergeWithEntity(Slice entity); + // Helper method that postprocesses the results of merge operations, e.g. it + // sets the state correctly upon merge errors. + void PostprocessMerge(const Status& merge_status); + + // The following methods perform the actual merge operation for the + // no base value/plain base value/wide-column base value cases. + void MergeWithNoBaseValue(); + void MergeWithPlainBaseValue(const Slice& value); + void MergeWithWideColumnBaseValue(const Slice& entity); + bool GetBlobValue(const Slice& user_key, const Slice& blob_index, PinnableSlice* blob_value); diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index 208eeb44b..3c41009fa 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -3,7 +3,6 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). - #include "rocksdb/utilities/write_batch_with_index.h" #include @@ -547,9 +546,9 @@ Status WriteBatchWithIndex::GetFromBatchAndDB( // Merge result from DB with merges in Batch std::string merge_result; if (s.ok()) { - s = wbwii.MergeKey(key, pinnable_val, &merge_result); + s = wbwii.MergeKey(key, *pinnable_val, &merge_result); } else { // Key not present in db (s.IsNotFound()) - s = wbwii.MergeKey(key, nullptr, &merge_result); + s = wbwii.MergeKey(key, &merge_result); } if (s.ok()) { pinnable_val->Reset(); @@ -644,11 +643,10 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( std::string merged_value; // Merge result from DB with merges in Batch if (key.s->ok()) { - *key.s = wbwii.MergeKey(*key.key, iter->value, merge_result.second, + *key.s = wbwii.MergeKey(*key.key, *iter->value, merge_result.second, &merged_value); } else { // Key not present in db (s.IsNotFound()) - *key.s = wbwii.MergeKey(*key.key, nullptr, merge_result.second, - &merged_value); + *key.s = wbwii.MergeKey(*key.key, merge_result.second, &merged_value); } if (key.s->ok()) { key.value->Reset(); diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index ee4754f8d..7ffc9ea6f 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -3,13 +3,13 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). - #include "utilities/write_batch_with_index/write_batch_with_index_internal.h" #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/merge_context.h" #include "db/merge_helper.h" +#include "options/cf_options.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/utilities/write_batch_with_index.h" @@ -157,19 +157,16 @@ Slice BaseDeltaIterator::value() const { return delta_entry.value; } else if (delta_entry.type == kDeleteRecord || delta_entry.type == kSingleDeleteRecord) { - status_ = - wbwii_->MergeKey(delta_entry.key, nullptr, merge_result_.GetSelf()); + status_ = wbwii_->MergeKey(delta_entry.key, merge_result_.GetSelf()); } else if (delta_entry.type == kPutRecord) { - status_ = wbwii_->MergeKey(delta_entry.key, &delta_entry.value, + status_ = wbwii_->MergeKey(delta_entry.key, delta_entry.value, merge_result_.GetSelf()); } else if (delta_entry.type == kMergeRecord) { if (equal_keys_) { - Slice base_value = base_iterator_->value(); - status_ = wbwii_->MergeKey(delta_entry.key, &base_value, + status_ = wbwii_->MergeKey(delta_entry.key, base_iterator_->value(), merge_result_.GetSelf()); } else { - status_ = - wbwii_->MergeKey(delta_entry.key, nullptr, merge_result_.GetSelf()); + status_ = wbwii_->MergeKey(delta_entry.key, merge_result_.GetSelf()); } } merge_result_.PinSelf(); @@ -646,55 +643,90 @@ WriteBatchWithIndexInternal::WriteBatchWithIndexInternal( const DBOptions* db_options, ColumnFamilyHandle* column_family) : db_(nullptr), db_options_(db_options), column_family_(column_family) {} +const ImmutableOptions& WriteBatchWithIndexInternal::GetCFOptions() const { + const auto* cfh = + static_cast_with_check(column_family_); + assert(cfh); + assert(cfh->cfd()); + assert(cfh->cfd()->ioptions()); + + return *cfh->cfd()->ioptions(); +} + +std::tuple +WriteBatchWithIndexInternal::GetStatsLoggerAndClock( + const ImmutableOptions& cf_opts) const { + if (db_) { + const auto& db_opts = static_cast_with_check(db_->GetRootDB()) + ->immutable_db_options(); + + return {db_opts.logger, db_opts.statistics.get(), db_opts.clock}; + } + + if (db_options_) { + assert(db_options_->env); + + return {db_options_->info_log.get(), db_options_->statistics.get(), + db_options_->env->GetSystemClock().get()}; + } + + return {cf_opts.logger, cf_opts.stats, cf_opts.clock}; +} + Status WriteBatchWithIndexInternal::MergeKey(const Slice& key, - const Slice* value, const MergeContext& context, std::string* result) const { - if (column_family_ != nullptr) { - auto cfh = static_cast_with_check(column_family_); - const auto merge_operator = cfh->cfd()->ioptions()->merge_operator.get(); - if (merge_operator == nullptr) { - return Status::InvalidArgument( - "Merge_operator must be set for column_family"); - } else if (db_ != nullptr) { - const ImmutableDBOptions& immutable_db_options = - static_cast_with_check(db_->GetRootDB()) - ->immutable_db_options(); - Statistics* statistics = immutable_db_options.statistics.get(); - Logger* logger = immutable_db_options.info_log.get(); - SystemClock* clock = immutable_db_options.clock; - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - return MergeHelper::TimedFullMerge( - merge_operator, key, value, context.GetOperands(), result, logger, - statistics, clock, /* result_operand */ nullptr, - /* update_num_ops_stats */ false, - /* op_failure_scope */ nullptr); - } else if (db_options_ != nullptr) { - Statistics* statistics = db_options_->statistics.get(); - Env* env = db_options_->env; - Logger* logger = db_options_->info_log.get(); - SystemClock* clock = env->GetSystemClock().get(); - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - return MergeHelper::TimedFullMerge( - merge_operator, key, value, context.GetOperands(), result, logger, - statistics, clock, /* result_operand */ nullptr, - /* update_num_ops_stats */ false, - /* op_failure_scope */ nullptr); - } else { - const auto cf_opts = cfh->cfd()->ioptions(); - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - return MergeHelper::TimedFullMerge( - merge_operator, key, value, context.GetOperands(), result, - cf_opts->logger, cf_opts->stats, cf_opts->clock, - /* result_operand */ nullptr, /* update_num_ops_stats */ false, - /* op_failure_scope */ nullptr); - } - } else { + // TODO: support wide columns in WBWI + + if (!column_family_) { return Status::InvalidArgument("Must provide a column_family"); } + + const auto& cf_opts = GetCFOptions(); + + const auto* merge_operator = cf_opts.merge_operator.get(); + if (!merge_operator) { + return Status::InvalidArgument( + "Merge_operator must be set for column_family"); + } + + auto [logger, statistics, clock] = GetStatsLoggerAndClock(cf_opts); + + // `op_failure_scope` (an output parameter) is not provided (set to + // nullptr) since a failure must be propagated regardless of its value. + return MergeHelper::TimedFullMerge( + merge_operator, key, MergeHelper::kNoBaseValue, context.GetOperands(), + logger, statistics, clock, /* update_num_ops_stats */ false, result, + /* columns */ nullptr, /* op_failure_scope */ nullptr); +} + +Status WriteBatchWithIndexInternal::MergeKey(const Slice& key, + const Slice& value, + const MergeContext& context, + std::string* result) const { + // TODO: support wide columns in WBWI + + if (!column_family_) { + return Status::InvalidArgument("Must provide a column_family"); + } + + const auto& cf_opts = GetCFOptions(); + + const auto* merge_operator = cf_opts.merge_operator.get(); + if (!merge_operator) { + return Status::InvalidArgument( + "Merge_operator must be set for column_family"); + } + + auto [logger, statistics, clock] = GetStatsLoggerAndClock(cf_opts); + + // `op_failure_scope` (an output parameter) is not provided (set to + // nullptr) since a failure must be propagated regardless of its value. + return MergeHelper::TimedFullMerge( + merge_operator, key, MergeHelper::kPlainBaseValue, value, + context.GetOperands(), logger, statistics, clock, + /* update_num_ops_stats */ false, result, + /* columns */ nullptr, /* op_failure_scope */ nullptr); } WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( @@ -718,7 +750,7 @@ WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( } else if (result == WBWIIteratorImpl::Result::kFound) { // PUT Slice entry_value = iter->Entry().value; if (context->GetNumOperands() > 0) { - *s = MergeKey(key, &entry_value, *context, value); + *s = MergeKey(key, entry_value, *context, value); if (!s->ok()) { result = WBWIIteratorImpl::Result::kError; } @@ -727,7 +759,7 @@ WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( } } else if (result == WBWIIteratorImpl::kDeleted) { if (context->GetNumOperands() > 0) { - *s = MergeKey(key, nullptr, *context, value); + *s = MergeKey(key, *context, value); if (s->ok()) { result = WBWIIteratorImpl::Result::kFound; } else { @@ -739,4 +771,3 @@ WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( } } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index 031d72889..3798532b4 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -4,7 +4,6 @@ // (found in the LICENSE.Apache file in the root directory). #pragma once - #include #include #include @@ -25,6 +24,7 @@ class MergeContext; class WBWIIteratorImpl; class WriteBatchWithIndexInternal; struct Options; +struct ImmutableOptions; // when direction == forward // * current_at_base_ <=> base_iterator > delta_iterator @@ -322,17 +322,31 @@ class WriteBatchWithIndexInternal { const Slice& key, MergeContext* merge_context, std::string* value, Status* s); - Status MergeKey(const Slice& key, const Slice* value, + + // Merge with no base value + Status MergeKey(const Slice& key, const MergeContext& context, + std::string* result) const; + Status MergeKey(const Slice& key, std::string* result) const { + return MergeKey(key, merge_context_, result); + } + + // Merge with plain base value + Status MergeKey(const Slice& key, const Slice& value, + const MergeContext& context, std::string* result) const; + Status MergeKey(const Slice& key, const Slice& value, std::string* result) const { return MergeKey(key, value, merge_context_, result); } - Status MergeKey(const Slice& key, const Slice* value, - const MergeContext& context, std::string* result) const; + size_t GetNumOperands() const { return merge_context_.GetNumOperands(); } MergeContext* GetMergeContext() { return &merge_context_; } Slice GetOperand(int index) const { return merge_context_.GetOperand(index); } private: + const ImmutableOptions& GetCFOptions() const; + std::tuple GetStatsLoggerAndClock( + const ImmutableOptions& cf_opts) const; + DB* db_; const DBOptions* db_options_; ColumnFamilyHandle* column_family_; From 51b3b7e08c7263ba44aa9841b9f8a0f3d49d18b0 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 20 Sep 2023 08:04:35 -0700 Subject: [PATCH 137/386] Remove a now-unnecessary WideColumnSerialization::Serialize variant (#11864) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11864 Reviewed By: jaykorean Differential Revision: D49445163 fbshipit-source-id: a1275112b9f87a3652df5f155bd215db5819326c --- db/wide/wide_column_serialization.cc | 23 +++--------------- db/wide/wide_column_serialization.h | 20 ---------------- db/wide/wide_column_serialization_test.cc | 29 ----------------------- 3 files changed, 3 insertions(+), 69 deletions(-) diff --git a/db/wide/wide_column_serialization.cc b/db/wide/wide_column_serialization.cc index cd1800795..bb3f29584 100644 --- a/db/wide/wide_column_serialization.cc +++ b/db/wide/wide_column_serialization.cc @@ -16,11 +16,9 @@ namespace ROCKSDB_NAMESPACE { -Status WideColumnSerialization::SerializeImpl(const Slice* value_of_default, - const WideColumns& columns, - std::string& output) { - const size_t num_columns = - value_of_default ? columns.size() + 1 : columns.size(); +Status WideColumnSerialization::Serialize(const WideColumns& columns, + std::string& output) { + const size_t num_columns = columns.size(); if (num_columns > static_cast(std::numeric_limits::max())) { return Status::InvalidArgument("Too many wide columns"); @@ -31,17 +29,6 @@ Status WideColumnSerialization::SerializeImpl(const Slice* value_of_default, PutVarint32(&output, static_cast(num_columns)); const Slice* prev_name = nullptr; - if (value_of_default) { - if (value_of_default->size() > - static_cast(std::numeric_limits::max())) { - return Status::InvalidArgument("Wide column value too long"); - } - - PutLengthPrefixedSlice(&output, kDefaultWideColumnName); - PutVarint32(&output, static_cast(value_of_default->size())); - - prev_name = &kDefaultWideColumnName; - } for (size_t i = 0; i < columns.size(); ++i) { const WideColumn& column = columns[i]; @@ -68,10 +55,6 @@ Status WideColumnSerialization::SerializeImpl(const Slice* value_of_default, prev_name = &name; } - if (value_of_default) { - output.append(value_of_default->data(), value_of_default->size()); - } - for (const auto& column : columns) { const Slice& value = column.value(); diff --git a/db/wide/wide_column_serialization.h b/db/wide/wide_column_serialization.h index f0ffbd392..bb92db04f 100644 --- a/db/wide/wide_column_serialization.h +++ b/db/wide/wide_column_serialization.h @@ -44,9 +44,6 @@ class Slice; class WideColumnSerialization { public: static Status Serialize(const WideColumns& columns, std::string& output); - static Status Serialize(const Slice& value_of_default, - const WideColumns& other_columns, - std::string& output); static Status Deserialize(Slice& input, WideColumns& columns); @@ -55,23 +52,6 @@ class WideColumnSerialization { static Status GetValueOfDefaultColumn(Slice& input, Slice& value); static constexpr uint32_t kCurrentVersion = 1; - - private: - static Status SerializeImpl(const Slice* value_of_default, - const WideColumns& columns, std::string& output); }; -inline Status WideColumnSerialization::Serialize(const WideColumns& columns, - std::string& output) { - constexpr Slice* value_of_default = nullptr; - - return SerializeImpl(value_of_default, columns, output); -} - -inline Status WideColumnSerialization::Serialize( - const Slice& value_of_default, const WideColumns& other_columns, - std::string& output) { - return SerializeImpl(&value_of_default, other_columns, output); -} - } // namespace ROCKSDB_NAMESPACE diff --git a/db/wide/wide_column_serialization_test.cc b/db/wide/wide_column_serialization_test.cc index 8060d2f24..a52d8eb3b 100644 --- a/db/wide/wide_column_serialization_test.cc +++ b/db/wide/wide_column_serialization_test.cc @@ -124,25 +124,6 @@ TEST(WideColumnSerializationTest, SerializeDeserialize) { } } -TEST(WideColumnSerializationTest, SerializeWithPrepend) { - Slice value_of_default("baz"); - WideColumns other_columns{{"foo", "bar"}, {"hello", "world"}}; - - std::string output; - ASSERT_OK(WideColumnSerialization::Serialize(value_of_default, other_columns, - output)); - - Slice input(output); - - WideColumns deserialized_columns; - ASSERT_OK(WideColumnSerialization::Deserialize(input, deserialized_columns)); - - WideColumns expected_columns{{kDefaultWideColumnName, value_of_default}, - other_columns[0], - other_columns[1]}; - ASSERT_EQ(deserialized_columns, expected_columns); -} - TEST(WideColumnSerializationTest, SerializeDuplicateError) { WideColumns columns{{"foo", "bar"}, {"foo", "baz"}}; std::string output; @@ -151,16 +132,6 @@ TEST(WideColumnSerializationTest, SerializeDuplicateError) { WideColumnSerialization::Serialize(columns, output).IsCorruption()); } -TEST(WideColumnSerializationTest, SerializeWithPrependDuplicateError) { - Slice value_of_default("baz"); - WideColumns other_columns{{kDefaultWideColumnName, "dup"}, {"foo", "bar"}}; - - std::string output; - ASSERT_TRUE(WideColumnSerialization::Serialize(value_of_default, - other_columns, output) - .IsCorruption()); -} - TEST(WideColumnSerializationTest, SerializeOutOfOrderError) { WideColumns columns{{"hello", "world"}, {"foo", "bar"}}; std::string output; From 8acf17002a75727e036ad5974ddd335ba092a9be Mon Sep 17 00:00:00 2001 From: chuhao zeng Date: Wed, 20 Sep 2023 11:34:38 -0700 Subject: [PATCH 138/386] Fix row cache falsely return kNotFound when timestamp enabled (#11816) Summary: **Summary:** When row cache hits and a timestamp is being set in read_options, even though ROW_CACHE entry is hit, the return status is kNotFound. **Cause of error:** If timestamp is provided in readoptions, a callback for sequence number checking is registered [here](https://github.com/facebook/rocksdb/blob/8fc78a3a9e1d24ba55731b70c0c25cef0765dbc8/db/db_impl/db_impl.cc#L2112). Hence the default value set at this [line](https://github.com/facebook/rocksdb/blob/694e49cbb1cff88fbb84a96394a0f76b7bac9e41/table/get_context.cc#L611) prevents get_context from saving value found in cache. Causing the final status to be kNotFound even though the entry exist in both cache and SST file. **Proposed Solution** Row cache key contains a sequence number in it. If the key for row cache lookup matches the key in cache, this cache entry should be good to be exposed to user and hence we reuse the sequence number in cache key rather than passing kMaxSequenceNumber. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11816 Reviewed By: ajkr Differential Revision: D49419029 Pulled By: jowlyzhang fbshipit-source-id: 6c77e9e751628d7d8e6c389f299e29a11ea824c6 --- db/db_with_timestamp_basic_test.cc | 101 +++++++++++++++++- db/table_cache.cc | 45 +++++--- db/table_cache.h | 15 +-- table/get_context.cc | 11 +- table/get_context.h | 3 +- .../fix_row_cache_falsely_return_kNotFound.md | 1 + 6 files changed, 145 insertions(+), 31 deletions(-) create mode 100644 unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 8d632d810..0dd0ce8b9 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -1617,6 +1617,105 @@ TEST_F(DBBasicTestWithTimestamp, MultiGetRangeFiltering) { Close(); } +TEST_F(DBBasicTestWithTimestamp, GetWithRowCache) { + Options options = CurrentOptions(); + options.env = env_; + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + LRUCacheOptions cache_options; + cache_options.capacity = 8192; + options.row_cache = cache_options.MakeSharedRowCache(); + + const size_t kTimestampSize = Timestamp(0, 0).size(); + TestComparator test_cmp(kTimestampSize); + options.comparator = &test_cmp; + DestroyAndReopen(options); + + WriteOptions write_opts; + std::string ts_early = Timestamp(1, 0); + std::string ts_later = Timestamp(10, 0); + Slice ts_later_slice = ts_later; + + const Snapshot* snap_with_nothing = db_->GetSnapshot(); + ASSERT_OK(db_->Put(write_opts, "foo", ts_early, "bar")); + const Snapshot* snap_with_foo = db_->GetSnapshot(); + + // Ensure file has sequence number greater than snapshot_with_foo + for (int i = 0; i < 10; i++) { + std::string numStr = std::to_string(i); + ASSERT_OK(db_->Put(write_opts, numStr, ts_later, numStr)); + } + ASSERT_OK(Flush()); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0); + + ReadOptions read_opts; + read_opts.timestamp = &ts_later_slice; + + std::string read_value; + std::string read_ts; + Status s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); + ASSERT_EQ(read_ts, ts_early); + + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); + // Row cache is not storing the ts when record is inserted/updated. + // To be fixed after enabling ROW_CACHE with timestamp. + // ASSERT_EQ(read_ts, ts_early); + + { + std::string ts_nothing = Timestamp(0, 0); + Slice ts_nothing_slice = ts_nothing; + read_opts.timestamp = &ts_nothing_slice; + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_TRUE(s.IsNotFound()); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); + + read_opts.timestamp = &ts_later_slice; + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); + } + + { + read_opts.snapshot = snap_with_foo; + + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 3); + + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 3); + } + + { + read_opts.snapshot = snap_with_nothing; + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_TRUE(s.IsNotFound()); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 4); + + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_TRUE(s.IsNotFound()); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 5); + } + + db_->ReleaseSnapshot(snap_with_nothing); + db_->ReleaseSnapshot(snap_with_foo); + Close(); +} + TEST_P(DBBasicTestWithTimestampTableOptions, MultiGetPrefixFilter) { Options options = CurrentOptions(); options.env = env_; @@ -4375,4 +4474,4 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); RegisterCustomObjects(argc, argv); return RUN_ALL_TESTS(); -} +} \ No newline at end of file diff --git a/db/table_cache.cc b/db/table_cache.cc index 8b3bc50df..2b1606c16 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -351,23 +351,25 @@ Status TableCache::GetRangeTombstoneIterator( return s; } -void TableCache::CreateRowCacheKeyPrefix(const ReadOptions& options, - const FileDescriptor& fd, - const Slice& internal_key, - GetContext* get_context, - IterKey& row_cache_key) { +uint64_t TableCache::CreateRowCacheKeyPrefix(const ReadOptions& options, + const FileDescriptor& fd, + const Slice& internal_key, + GetContext* get_context, + IterKey& row_cache_key) { uint64_t fd_number = fd.GetNumber(); // We use the user key as cache key instead of the internal key, // otherwise the whole cache would be invalidated every time the // sequence key increases. However, to support caching snapshot - // reads, we append the sequence number (incremented by 1 to - // distinguish from 0) only in this case. + // reads, we append a sequence number (incremented by 1 to + // distinguish from 0) other than internal_key seq no + // to determine row cache entry visibility. // If the snapshot is larger than the largest seqno in the file, // all data should be exposed to the snapshot, so we treat it // the same as there is no snapshot. The exception is that if // a seq-checking callback is registered, some internal keys // may still be filtered out. - uint64_t seq_no = 0; + uint64_t cache_entry_seq_no = 0; + // Maybe we can include the whole file ifsnapshot == fd.largest_seqno. if (options.snapshot != nullptr && (get_context->has_callback() || @@ -376,18 +378,24 @@ void TableCache::CreateRowCacheKeyPrefix(const ReadOptions& options, // We should consider to use options.snapshot->GetSequenceNumber() // instead of GetInternalKeySeqno(k), which will make the code // easier to understand. - seq_no = 1 + GetInternalKeySeqno(internal_key); + cache_entry_seq_no = 1 + GetInternalKeySeqno(internal_key); } // Compute row cache key. row_cache_key.TrimAppend(row_cache_key.Size(), row_cache_id_.data(), row_cache_id_.size()); AppendVarint64(&row_cache_key, fd_number); - AppendVarint64(&row_cache_key, seq_no); + AppendVarint64(&row_cache_key, cache_entry_seq_no); + + // Provide a sequence number for callback checking on cache hit. + // As cache_entry_seq_no starts at 1, decrease it's value by 1 to get + // a sequence number align with get context's logic. + return cache_entry_seq_no == 0 ? 0 : cache_entry_seq_no - 1; } bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, - size_t prefix_size, GetContext* get_context) { + size_t prefix_size, GetContext* get_context, + SequenceNumber seq_no) { bool found = false; row_cache_key.TrimAppend(prefix_size, user_key.data(), user_key.size()); @@ -404,8 +412,10 @@ bool TableCache::GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, // get_context.pinnable_slice_. Cache entry is released when // get_context.pinnable_slice_ is reset. row_cache.RegisterReleaseAsCleanup(row_handle, value_pinner); + // If row cache hit, knowing cache key is the same to row_cache_key, + // can use row_cache_key's seq no to construct InternalKey. replayGetContextLog(*row_cache.Value(row_handle), user_key, get_context, - &value_pinner); + &value_pinner, seq_no); RecordTick(ioptions_.stats, ROW_CACHE_HIT); found = true; } else { @@ -428,13 +438,14 @@ Status TableCache::Get( IterKey row_cache_key; std::string row_cache_entry_buffer; - // Check row cache if enabled. Since row cache does not currently store - // sequence numbers, we cannot use it if we need to fetch the sequence. + // Check row cache if enabled. + // Reuse row_cache_key sequence number when row cache hits. if (ioptions_.row_cache && !get_context->NeedToReadSequence()) { auto user_key = ExtractUserKey(k); - CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key); + uint64_t cache_entry_seq_no = + CreateRowCacheKeyPrefix(options, fd, k, get_context, row_cache_key); done = GetFromRowCache(user_key, row_cache_key, row_cache_key.Size(), - get_context); + get_context, cache_entry_seq_no); if (!done) { row_cache_entry = &row_cache_entry_buffer; } @@ -718,4 +729,4 @@ uint64_t TableCache::ApproximateSize( return result; } -} // namespace ROCKSDB_NAMESPACE +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/db/table_cache.h b/db/table_cache.h index 67d36d805..ae3fc93c3 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -262,15 +262,18 @@ class TableCache { // Create a key prefix for looking up the row cache. The prefix is of the // format row_cache_id + fd_number + seq_no. Later, the user key can be // appended to form the full key - void CreateRowCacheKeyPrefix(const ReadOptions& options, - const FileDescriptor& fd, - const Slice& internal_key, - GetContext* get_context, IterKey& row_cache_key); + // Return the sequence number that determines the visibility of row_cache_key + uint64_t CreateRowCacheKeyPrefix(const ReadOptions& options, + const FileDescriptor& fd, + const Slice& internal_key, + GetContext* get_context, + IterKey& row_cache_key); // Helper function to lookup the row cache for a key. It appends the // user key to row_cache_key at offset prefix_size bool GetFromRowCache(const Slice& user_key, IterKey& row_cache_key, - size_t prefix_size, GetContext* get_context); + size_t prefix_size, GetContext* get_context, + SequenceNumber seq_no = kMaxSequenceNumber); const ImmutableOptions& ioptions_; const FileOptions& file_options_; @@ -283,4 +286,4 @@ class TableCache { std::string db_session_id_; }; -} // namespace ROCKSDB_NAMESPACE +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/table/get_context.cc b/table/get_context.cc index ada7a3539..660726cd3 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -558,7 +558,8 @@ void GetContext::push_operand(const Slice& value, Cleanable* value_pinner) { } void replayGetContextLog(const Slice& replay_log, const Slice& user_key, - GetContext* get_context, Cleanable* value_pinner) { + GetContext* get_context, Cleanable* value_pinner, + SequenceNumber seq_no) { Slice s = replay_log; while (s.size()) { auto type = static_cast(*s.data()); @@ -569,11 +570,9 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key, (void)ret; bool dont_care __attribute__((__unused__)); - // Since SequenceNumber is not stored and unknown, we will use - // kMaxSequenceNumber. - get_context->SaveValue( - ParsedInternalKey(user_key, kMaxSequenceNumber, type), value, - &dont_care, value_pinner); + + ParsedInternalKey ikey = ParsedInternalKey(user_key, seq_no, type); + get_context->SaveValue(ikey, value, &dont_care, value_pinner); } } diff --git a/table/get_context.h b/table/get_context.h index 9bff27503..b43ff6e16 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -248,6 +248,7 @@ class GetContext { // must have been set by calling GetContext::SetReplayLog(). void replayGetContextLog(const Slice& replay_log, const Slice& user_key, GetContext* get_context, - Cleanable* value_pinner = nullptr); + Cleanable* value_pinner = nullptr, + SequenceNumber seq_no = kMaxSequenceNumber); } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md b/unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md new file mode 100644 index 000000000..d52621932 --- /dev/null +++ b/unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md @@ -0,0 +1 @@ +* Fix a bug where row cache can falsely return kNotFound even though row cache entry is hit. \ No newline at end of file From 089070cb36b81e5fb86cee0e3455160912591a3c Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 20 Sep 2023 13:34:39 -0700 Subject: [PATCH 139/386] Expose more info about input files in `CompactionFilter::Context` (#11857) Summary: **Context:** As requested, lowest level as well as a map from input file to its table properties among all input files used in table creation (if any) are exposed in `CompactionFilter::Context`. **Summary:** This PR contains two commits: (1) [Refactory](https://github.com/facebook/rocksdb/pull/11857/commits/0012777f0ee829fee859eec5db11a882f450ae26) to make resonating/using what is in `Compaction:: table_properties_` easier - Separate `Compaction:: table_properties_` into `Compaction:: input_table_properties_` and `Compaction:: output_table_properties_` - Separate the "set input table properties" logic into `Compaction:: SetInputTableProperties()`) from `Compaction:: GetInputTableProperties` - Call `Compaction:: SetInputTableProperties()` as soon as possible, which is right after `Compaction::SetInputVersion()`. Bundle these two functions into one `Compaction::FinalizeInputInfo()` to minimize missing one or the other (2) [Expose more info about input files:](https://github.com/facebook/rocksdb/pull/11857/commits/6093e7dfbadd4fe1d05ad8a6ab3452d363f6d131) `CompactionFilter::Context::input_start_level/input_table_properties` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11857 Test Plan: - Modify existing UT ` TEST_F(DBTestCompactionFilter, CompactionFilterContextManual)` to cover new logics Reviewed By: ajkr Differential Revision: D49402540 Pulled By: hx235 fbshipit-source-id: 469fff50fa0e5964ffa5ea8db0743f61438ea392 --- db/column_family.cc | 4 +- db/compaction/compaction.cc | 59 +++++++++++-------- db/compaction/compaction.h | 34 +++++++---- db/compaction/compaction_job.cc | 2 +- db/compaction/compaction_job_test.cc | 2 +- db/db_compaction_filter_test.cc | 28 ++++++++- db/db_impl/db_impl_compaction_flush.cc | 10 +++- db/db_impl/db_impl_secondary.cc | 2 +- include/rocksdb/compaction_filter.h | 11 ++++ include/rocksdb/types.h | 7 +++ .../compact_filter_context_more_info.md | 1 + 11 files changed, 117 insertions(+), 43 deletions(-) create mode 100644 unreleased_history/public_api_changes/compact_filter_context_more_info.md diff --git a/db/column_family.cc b/db/column_family.cc index 8bc3c43e0..7563041e9 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1119,7 +1119,7 @@ Compaction* ColumnFamilyData::PickCompaction( GetName(), mutable_options, mutable_db_options, current_->storage_info(), log_buffer); if (result != nullptr) { - result->SetInputVersion(current_); + result->FinalizeInputInfo(current_); } return result; } @@ -1203,7 +1203,7 @@ Compaction* ColumnFamilyData::CompactRange( compact_range_options, begin, end, compaction_end, conflict, max_file_num_to_ignore, trim_ts); if (result != nullptr) { - result->SetInputVersion(current_); + result->FinalizeInputInfo(current_); } TEST_SYNC_POINT("ColumnFamilyData::CompactRange:Return"); return result; diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index e28257d65..99e5dd5ac 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -204,32 +204,36 @@ bool Compaction::IsFullCompaction( return num_files_in_compaction == total_num_files; } -const TablePropertiesCollection& Compaction::GetTableProperties() { - if (!input_table_properties_initialized_) { - const ReadOptions read_options(Env::IOActivity::kCompaction); - for (size_t i = 0; i < num_input_levels(); ++i) { - for (const FileMetaData* fmd : *(this->inputs(i))) { - std::shared_ptr tp; - std::string file_name = - TableFileName(immutable_options_.cf_paths, fmd->fd.GetNumber(), - fmd->fd.GetPathId()); - Status s = input_version_->GetTableProperties(read_options, &tp, fmd, - &file_name); - if (s.ok()) { - table_properties_[file_name] = tp; - } else { - ROCKS_LOG_ERROR(immutable_options_.info_log, - "Unable to load table properties for file %" PRIu64 - " --- %s\n", - fmd->fd.GetNumber(), s.ToString().c_str()); - } +Status Compaction::InitInputTableProperties() { + if (!input_table_properties_.empty()) { + return Status::OK(); + } + + Status s; + const ReadOptions read_options(Env::IOActivity::kCompaction); + assert(input_version_); + for (size_t i = 0; i < num_input_levels(); ++i) { + for (const FileMetaData* fmd : *(this->inputs(i))) { + std::shared_ptr tp; + std::string file_name = + TableFileName(immutable_options_.cf_paths, fmd->fd.GetNumber(), + fmd->fd.GetPathId()); + s = input_version_->GetTableProperties(read_options, &tp, fmd, + &file_name); + if (s.ok()) { + input_table_properties_[file_name] = tp; + } else { + ROCKS_LOG_ERROR(immutable_options_.info_log, + "Unable to load table properties for file %" PRIu64 + " --- %s\n", + fmd->fd.GetNumber(), s.ToString().c_str()); + input_table_properties_.clear(); + return s; } } + } - input_table_properties_initialized_ = true; - }; - - return table_properties_; + return s; } Compaction::Compaction( @@ -774,8 +778,17 @@ std::unique_ptr Compaction::CreateCompactionFilter() const { CompactionFilter::Context context; context.is_full_compaction = is_full_compaction_; context.is_manual_compaction = is_manual_compaction_; + context.input_start_level = start_level_; context.column_family_id = cfd_->GetID(); context.reason = TableFileCreationReason::kCompaction; + context.input_table_properties = GetInputTableProperties(); + if (context.input_table_properties.empty()) { + ROCKS_LOG_WARN( + immutable_options_.info_log, + "Unable to set `input_table_properties` of `CompactionFilter::Context` " + "for compaction."); + } + return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter( context); } diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h index fcb0f3003..22ce20259 100644 --- a/db/compaction/compaction.h +++ b/db/compaction/compaction.h @@ -289,7 +289,14 @@ class Compaction { // is the sum of all input file sizes. uint64_t OutputFilePreallocationSize() const; - void SetInputVersion(Version* input_version); + // TODO(hx235): eventually we should consider `InitInputTableProperties()`'s + // status and fail the compaction if needed + // TODO(hx235): consider making this function part of the construction so we + // don't forget to call it + void FinalizeInputInfo(Version* input_version) { + SetInputVersion(input_version); + InitInputTableProperties().PermitUncheckedError(); + } struct InputLevelSummaryBuffer { char buffer[128]; @@ -326,16 +333,20 @@ class Compaction { int output_level, VersionStorageInfo* vstorage, const std::vector& inputs); - // If called before a compaction finishes, will return - // table properties of all compaction input files. - // If called after a compaction finished, will return - // table properties of all compaction input and output files. - const TablePropertiesCollection& GetTableProperties(); + const TablePropertiesCollection& GetInputTableProperties() const { + return input_table_properties_; + } + // TODO(hx235): consider making this function symmetric to + // InitInputTableProperties() void SetOutputTableProperties( const std::string& file_name, const std::shared_ptr& tp) { - table_properties_[file_name] = tp; + output_table_properties_[file_name] = tp; + } + + const TablePropertiesCollection& GetOutputTableProperties() const { + return output_table_properties_; } Slice GetSmallestUserKey() const { return smallest_user_key_; } @@ -432,6 +443,10 @@ class Compaction { const int output_level); private: + void SetInputVersion(Version* input_version); + + Status InitInputTableProperties(); + // mark (or clear) all files that are being compacted void MarkFilesBeingCompacted(bool mark_as_compacted); @@ -522,9 +537,8 @@ class Compaction { // Does input compression match the output compression? bool InputCompressionMatchesOutput() const; - bool input_table_properties_initialized_ = false; - // table properties of output files - TablePropertiesCollection table_properties_; + TablePropertiesCollection input_table_properties_; + TablePropertiesCollection output_table_properties_; // smallest user keys in compaction // includes timestamp if user-defined timestamp is enabled. diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 904a10743..a5de19a39 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1979,7 +1979,7 @@ bool CompactionJob::UpdateCompactionStats(uint64_t* num_input_range_del) { bool has_error = false; const ReadOptions read_options(Env::IOActivity::kCompaction); - const auto& input_table_properties = compaction->GetTableProperties(); + const auto& input_table_properties = compaction->GetInputTableProperties(); for (int input_level = 0; input_level < static_cast(compaction->num_input_levels()); ++input_level) { diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 66a47a26f..8bf3132a1 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -644,7 +644,7 @@ class CompactionJobTestBase : public testing::Test { mutable_cf_options_.max_compaction_bytes, 0, kNoCompression, cfd->GetLatestMutableCFOptions()->compression_opts, Temperature::kUnknown, max_subcompactions, grandparents, true); - compaction.SetInputVersion(cfd->current()); + compaction.FinalizeInputInfo(cfd->current()); assert(db_options_.info_log); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc index 596dfefc5..f6f44dc2a 100644 --- a/db/db_compaction_filter_test.cc +++ b/db/db_compaction_filter_test.cc @@ -166,9 +166,12 @@ class ChangeFilter : public CompactionFilter { class KeepFilterFactory : public CompactionFilterFactory { public: explicit KeepFilterFactory(bool check_context = false, - bool check_context_cf_id = false) + bool check_context_cf_id = false, + bool check_context_input_table_properties = false) : check_context_(check_context), check_context_cf_id_(check_context_cf_id), + check_context_input_table_properties_( + check_context_input_table_properties), compaction_filter_created_(false) {} std::unique_ptr CreateCompactionFilter( @@ -176,6 +179,11 @@ class KeepFilterFactory : public CompactionFilterFactory { if (check_context_) { EXPECT_EQ(expect_full_compaction_.load(), context.is_full_compaction); EXPECT_EQ(expect_manual_compaction_.load(), context.is_manual_compaction); + EXPECT_EQ(expect_input_start_level_.load(), context.input_start_level); + } + if (check_context_input_table_properties_) { + EXPECT_TRUE(expect_input_table_properties_ == + context.input_table_properties); } if (check_context_cf_id_) { EXPECT_EQ(expect_cf_id_.load(), context.column_family_id); @@ -189,9 +197,15 @@ class KeepFilterFactory : public CompactionFilterFactory { const char* Name() const override { return "KeepFilterFactory"; } bool check_context_; bool check_context_cf_id_; + // `check_context_input_table_properties_` can be true only when access to + // `expect_input_table_properties_` is syncronized since we can't have + // std::atomic unfortunately + bool check_context_input_table_properties_; std::atomic_bool expect_full_compaction_; std::atomic_bool expect_manual_compaction_; std::atomic expect_cf_id_; + std::atomic expect_input_start_level_; + TablePropertiesCollection expect_input_table_properties_; bool compaction_filter_created_; }; @@ -654,7 +668,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilterWithMergeOperator) { } TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { - KeepFilterFactory* filter = new KeepFilterFactory(true, true); + KeepFilterFactory* filter = new KeepFilterFactory( + true /* check_context */, true /* check_context_cf_id */, + true /* check_context_input_table_properties */); Options options = CurrentOptions(); options.compaction_style = kCompactionStyleUniversal; @@ -662,8 +678,9 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { options.compression = kNoCompression; options.level0_file_num_compaction_trigger = 8; Reopen(options); + const int kNumFiles = 3; int num_keys_per_file = 400; - for (int j = 0; j < 3; j++) { + for (int j = 0; j < kNumFiles; j++) { // Write several keys. const std::string value(10, 'x'); for (int i = 0; i < num_keys_per_file; i++) { @@ -683,6 +700,11 @@ TEST_F(DBTestCompactionFilter, CompactionFilterContextManual) { filter->expect_manual_compaction_.store(true); filter->expect_full_compaction_.store(true); filter->expect_cf_id_.store(0); + filter->expect_input_start_level_.store(0); + ASSERT_OK(dbfull()->GetPropertiesOfAllTables( + &filter->expect_input_table_properties_)); + ASSERT_TRUE(filter->expect_input_table_properties_.size() == kNumFiles); + ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(cfilter_count, 700); ASSERT_EQ(NumSortedRuns(0), 1); diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 333d4ad15..91e6e780c 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1462,7 +1462,8 @@ Status DBImpl::CompactFilesImpl( // without releasing the lock, so we're guaranteed a compaction can be formed. assert(c != nullptr); - c->SetInputVersion(version); + c->FinalizeInputInfo(version); + // deletion compaction currently not allowed in CompactFiles. assert(!c->deletion_compaction()); @@ -3954,7 +3955,12 @@ void DBImpl::BuildCompactionJobInfo( compaction_job_info->base_input_level = c->start_level(); compaction_job_info->output_level = c->output_level(); compaction_job_info->stats = compaction_job_stats; - compaction_job_info->table_properties = c->GetTableProperties(); + const auto& input_table_properties = c->GetInputTableProperties(); + const auto& output_table_properties = c->GetOutputTableProperties(); + compaction_job_info->table_properties.insert(input_table_properties.begin(), + input_table_properties.end()); + compaction_job_info->table_properties.insert(output_table_properties.begin(), + output_table_properties.end()); compaction_job_info->compaction_reason = c->compaction_reason(); compaction_job_info->compression = c->output_compression(); diff --git a/db/db_impl/db_impl_secondary.cc b/db/db_impl/db_impl_secondary.cc index 10680ba1e..235a528ba 100644 --- a/db/db_impl/db_impl_secondary.cc +++ b/db/db_impl/db_impl_secondary.cc @@ -885,7 +885,7 @@ Status DBImplSecondary::CompactWithoutInstallation( *mutable_cf_options, mutable_db_options_, 0)); assert(c != nullptr); - c->SetInputVersion(version); + c->FinalizeInputInfo(version); // Create output directory if it's not existed yet std::unique_ptr output_dir; diff --git a/include/rocksdb/compaction_filter.h b/include/rocksdb/compaction_filter.h index b1b511613..1784f2329 100644 --- a/include/rocksdb/compaction_filter.h +++ b/include/rocksdb/compaction_filter.h @@ -16,6 +16,7 @@ #include "rocksdb/customizable.h" #include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/table_properties.h" #include "rocksdb/types.h" #include "rocksdb/wide_columns.h" @@ -160,10 +161,20 @@ class CompactionFilter : public Customizable { // Whether this table file is created as part of a compaction requested by // the client. bool is_manual_compaction; + // The lowest level among all the input files (if any) used in table + // creation + int input_start_level = kUnknownStartLevel; // The column family that will contain the created table file. uint32_t column_family_id; // Reason this table file is being created. TableFileCreationReason reason; + // Map from all the input files (if any) used in table creation to their + // table properties. When there are such input files but RocksDB fail to + // load their table properties, `input_table_properties` will be an empty + // map. + TablePropertiesCollection input_table_properties; + + static const int kUnknownStartLevel = -1; }; virtual ~CompactionFilter() {} diff --git a/include/rocksdb/types.h b/include/rocksdb/types.h index 3f8ce9795..c9c214686 100644 --- a/include/rocksdb/types.h +++ b/include/rocksdb/types.h @@ -7,6 +7,9 @@ #include +#include +#include + #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { @@ -18,6 +21,10 @@ using ColumnFamilyId = uint32_t; // Represents a sequence number in a WAL file. using SequenceNumber = uint64_t; +struct TableProperties; +using TablePropertiesCollection = + std::unordered_map>; + const SequenceNumber kMinUnCommittedSeq = 1; // 0 is always committed enum class TableFileCreationReason { diff --git a/unreleased_history/public_api_changes/compact_filter_context_more_info.md b/unreleased_history/public_api_changes/compact_filter_context_more_info.md new file mode 100644 index 000000000..3d821fa68 --- /dev/null +++ b/unreleased_history/public_api_changes/compact_filter_context_more_info.md @@ -0,0 +1 @@ +Expose more information about input files used in table creation (if any) in `CompactionFilter::Context`. See `CompactionFilter::Context::input_start_level`,`CompactionFilter::Context::input_table_properties` for more. From c1a97fe1f6bb58d9c98f01c4be20381bd3face9d Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Wed, 20 Sep 2023 16:13:20 -0700 Subject: [PATCH 140/386] Fix Assertion `roundup_len2 >= alignment' failed in crash tests (#11852) Summary: When auto_readahead_size is enabled in async_io, during seek, first buffer will prefetch the data - (current block + readahead till upper_bound). There can be cases where 1. first buffer prefetched all the data till upper bound, or 2. first buffer already has the data from prev seek call and second buffer prefetch further leading to alignment issues. This PR fixes that assertion and second buffer won't go for prefetching if first buffer has already prefetched till upper_bound. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11852 Test Plan: - Added new unit test that failed without this fix. - crash tests passed locally Reviewed By: pdillinger Differential Revision: D49384138 Pulled By: akankshamahajan15 fbshipit-source-id: 54417e909e4d986f1e5a17dbaea059cd4962fd4d --- file/file_prefetch_buffer.cc | 66 +++++++++++-------- file/file_prefetch_buffer.h | 7 ++ file/prefetch_test.cc | 59 ++++++++++++++++- .../bug_fixes/auto_tuning_async_fix.md | 1 + 4 files changed, 105 insertions(+), 28 deletions(-) create mode 100644 unreleased_history/bug_fixes/auto_tuning_async_fix.md diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 0ea246f12..f7c7f4a83 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -364,8 +364,11 @@ Status FilePrefetchBuffer::HandleOverlappingData( size_t second_size = bufs_[second].async_read_in_progress_ ? bufs_[second].async_req_len_ : bufs_[second].buffer_.CurrentSize(); - if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size) { - uint64_t rounddown_start = bufs_[second].offset_ + second_size; + uint64_t rounddown_start = bufs_[second].offset_ + second_size; + // Second buffer might be out of bound if first buffer already prefetched + // that data. + if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size && + !IsOffsetOutOfBound(rounddown_start)) { uint64_t roundup_end = Roundup(rounddown_start + readahead_size, alignment); uint64_t roundup_len = roundup_end - rounddown_start; @@ -562,20 +565,24 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment); } - uint64_t roundup_len2 = roundup_end2 - rounddown_start2; - uint64_t chunk_len2 = 0; - CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, - false /*refit_tail*/, chunk_len2); - assert(chunk_len2 == 0); - // Update the buffer offset. - bufs_[second].offset_ = rounddown_start2; - assert(roundup_len2 >= chunk_len2); - uint64_t read_len2 = static_cast(roundup_len2 - chunk_len2); - s = ReadAsync(opts, reader, read_len2, rounddown_start2, second); - if (!s.ok()) { - DestroyAndClearIOHandle(second); - bufs_[second].buffer_.Clear(); - return s; + // Second buffer might be out of bound if first buffer already prefetched + // that data. + if (!IsOffsetOutOfBound(rounddown_start2)) { + uint64_t roundup_len2 = roundup_end2 - rounddown_start2; + uint64_t chunk_len2 = 0; + CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, + false /*refit_tail*/, chunk_len2); + assert(chunk_len2 == 0); + // Update the buffer offset. + bufs_[second].offset_ = rounddown_start2; + assert(roundup_len2 >= chunk_len2); + uint64_t read_len2 = static_cast(roundup_len2 - chunk_len2); + s = ReadAsync(opts, reader, read_len2, rounddown_start2, second); + if (!s.ok()) { + DestroyAndClearIOHandle(second); + bufs_[second].buffer_.Clear(); + return s; + } } } @@ -925,17 +932,22 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, rounddown_start2 = roundup_end1; } - roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment); - uint64_t roundup_len2 = roundup_end2 - rounddown_start2; - - assert(roundup_len2 >= alignment); - CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, - false, chunk_len2); - assert(chunk_len2 == 0); - assert(roundup_len2 >= chunk_len2); - read_len2 = static_cast(roundup_len2 - chunk_len2); - // Update the buffer offset. - bufs_[second].offset_ = rounddown_start2; + // Second buffer might be out of bound if first buffer already prefetched + // that data. + if (!IsOffsetOutOfBound(rounddown_start2)) { + roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment); + uint64_t roundup_len2 = roundup_end2 - rounddown_start2; + + assert(roundup_len2 >= alignment); + + CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, + false, chunk_len2); + assert(chunk_len2 == 0); + assert(roundup_len2 >= chunk_len2); + read_len2 = static_cast(roundup_len2 - chunk_len2); + // Update the buffer offset. + bufs_[second].offset_ = rounddown_start2; + } } if (read_len1) { diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index a999ea762..875f258ea 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -441,6 +441,13 @@ class FilePrefetchBuffer { } } + inline bool IsOffsetOutOfBound(uint64_t offset) { + if (upper_bound_offset_ > 0) { + return (offset >= upper_bound_offset_); + } + return false; + } + std::vector bufs_; // curr_ represents the index for bufs_ indicating which buffer is being // consumed currently. diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 69e122392..5762be1a0 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -3117,7 +3117,64 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { // Length should be 4000. ASSERT_EQ(async_result.size(), 4000); // Data correctness. - Slice result(content.c_str() + 3000, 4000); + Slice result(&content[3000], 4000); + ASSERT_EQ(result.size(), 4000); + ASSERT_EQ(result, async_result); +} + +// This test checks if during seek in async_io, if first buffer already +// prefetched the data till upper_bound offset, second buffer shouldn't go for +// prefetching. +TEST_F(FilePrefetchBufferTest, IterateUpperBoundTest1) { + std::string fname = "iterate-upperbound-test1"; + Random rand(0); + std::string content = rand.RandomString(32768); + Write(fname, content); + + FileOptions opts; + std::unique_ptr r; + Read(fname, opts, &r); + + FilePrefetchBuffer fpb( + /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, + /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, + /*upper_bound_offset=*/8000, fs()); + + int read_async_called = 0; + SyncPoint::GetInstance()->SetCallBack( + "FilePrefetchBuffer::ReadAsync", + [&](void* /*arg*/) { read_async_called++; }); + SyncPoint::GetInstance()->EnableProcessing(); + + Slice async_result; + // Simulate a seek of 4000 bytes at offset 3000. Due to the readahead + // settings, it will do 1 read of 4000+1000 (till 8000 - upper bound). + Status s = fpb.PrefetchAsync(IOOptions(), r.get(), 3000, 4000, &async_result); + + // Platforms that don't have IO uring may not support async IO + if (s.IsNotSupported()) { + return; + } + + ASSERT_TRUE(s.IsTryAgain()); + IOOptions io_opts; + io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW; + ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), /*offset=*/3000, + /*length=*/4000, &async_result, &s)); + // No sync call should be made. + HistogramData sst_read_micros; + stats()->histogramData(SST_READ_MICROS, &sst_read_micros); + ASSERT_EQ(sst_read_micros.count, 0); + + // Number of async calls should be 1. + // No Prefetching should happen in second buffer as first buffer has already + // prefetched till offset. + ASSERT_EQ(read_async_called, 1); + // Length should be 4000. + ASSERT_EQ(async_result.size(), 4000); + // Data correctness. + Slice result(&content[3000], 4000); ASSERT_EQ(result.size(), 4000); ASSERT_EQ(result, async_result); } diff --git a/unreleased_history/bug_fixes/auto_tuning_async_fix.md b/unreleased_history/bug_fixes/auto_tuning_async_fix.md new file mode 100644 index 000000000..0a54555cd --- /dev/null +++ b/unreleased_history/bug_fixes/auto_tuning_async_fix.md @@ -0,0 +1 @@ +Fix an assertion fault during seek with async_io when readahead trimming is enabled. From 548aabfe5f3e1cbe4bf282d5bc16209b0075f48e Mon Sep 17 00:00:00 2001 From: anand76 Date: Wed, 20 Sep 2023 22:30:17 -0700 Subject: [PATCH 141/386] Disable compressed secondary cache if capacity is 0 (#11863) Summary: This PR makes disabling the compressed secondary cache by setting capacity to 0 a bit more efficient. Previously, inserts/lookups would go to the backing LRUCache before getting rejected due to 0 capacity. With this change, insert/lookup would return from ```CompressedSecondaryCache``` itself. Tests: Existing tests Pull Request resolved: https://github.com/facebook/rocksdb/pull/11863 Reviewed By: akankshamahajan15 Differential Revision: D49476248 Pulled By: anand1976 fbshipit-source-id: f0f17a5e3df7d8bfc06709f8f23c1302056ba590 --- cache/compressed_secondary_cache.cc | 12 +++++++++++- cache/compressed_secondary_cache.h | 1 + .../bug_fixes/compressed_sec_cache_disable.md | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/bug_fixes/compressed_sec_cache_disable.md diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index af4db81e3..4a35b8d1e 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -22,7 +22,8 @@ CompressedSecondaryCache::CompressedSecondaryCache( cache_options_(opts), cache_res_mgr_(std::make_shared( std::make_shared>( - cache_))) {} + cache_))), + disable_cache_(opts.capacity == 0) {} CompressedSecondaryCache::~CompressedSecondaryCache() { assert(cache_res_mgr_->GetTotalReservedCacheSize() == 0); @@ -33,6 +34,10 @@ std::unique_ptr CompressedSecondaryCache::Lookup( Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase, bool& kept_in_sec_cache) { assert(helper); + if (disable_cache_) { + return nullptr; + } + std::unique_ptr handle; kept_in_sec_cache = false; Cache::Handle* lru_handle = cache_->Lookup(key); @@ -115,6 +120,10 @@ Status CompressedSecondaryCache::Insert(const Slice& key, return Status::InvalidArgument(); } + if (disable_cache_) { + return Status::OK(); + } + auto internal_helper = GetHelper(cache_options_.enable_custom_split_merge); if (!force_insert) { Cache::Handle* lru_handle = cache_->Lookup(key); @@ -186,6 +195,7 @@ Status CompressedSecondaryCache::SetCapacity(size_t capacity) { MutexLock l(&capacity_mutex_); cache_options_.capacity = capacity; cache_->SetCapacity(capacity); + disable_cache_ = capacity == 0; return Status::OK(); } diff --git a/cache/compressed_secondary_cache.h b/cache/compressed_secondary_cache.h index 777782fc3..773a25bf9 100644 --- a/cache/compressed_secondary_cache.h +++ b/cache/compressed_secondary_cache.h @@ -136,6 +136,7 @@ class CompressedSecondaryCache : public SecondaryCache { CompressedSecondaryCacheOptions cache_options_; mutable port::Mutex capacity_mutex_; std::shared_ptr cache_res_mgr_; + bool disable_cache_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/bug_fixes/compressed_sec_cache_disable.md b/unreleased_history/bug_fixes/compressed_sec_cache_disable.md new file mode 100644 index 000000000..9c80f4474 --- /dev/null +++ b/unreleased_history/bug_fixes/compressed_sec_cache_disable.md @@ -0,0 +1 @@ +When the compressed secondary cache capacity is reduced to 0, it should be completely disabled. Before this fix, inserts and lookups would still go to the backing `LRUCache` before returning, thus incurring locking overhead. With this fix, inserts and lookups are no-ops and do not add any overhead. From bf488c3052f3d204dca3e2ea8cb5396421f06eb2 Mon Sep 17 00:00:00 2001 From: "Peter (Stig) Edwards" Date: Thu, 21 Sep 2023 13:52:01 -0700 Subject: [PATCH 142/386] Use *next_sequence -1 here (#11861) Summary: To fix off-by-one error: Transaction could not check for conflicts for operation at SequenceNumber 500000 as the MemTable only contains changes newer than SequenceNumber 500001. Fixes https://github.com/facebook/rocksdb/issues/11822 I think introduced in https://github.com/facebook/rocksdb/commit/a657ee9a9c4a2acb529b8f5567965e4bf6d38fd5 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11861 Reviewed By: pdillinger Differential Revision: D49457273 Pulled By: ajkr fbshipit-source-id: b527cbae4ecc7874633a11f07027adee62940d74 --- db/db_impl/db_impl_open.cc | 2 +- .../optimistic_transaction_test.cc | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index d9d1f932a..de164cc20 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1298,7 +1298,7 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, flushed = true; cfd->CreateNewMemtable(*cfd->GetLatestMutableCFOptions(), - *next_sequence); + *next_sequence - 1); } } } diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 835b9f097..733494180 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1635,6 +1635,47 @@ TEST_P(OptimisticTransactionTest, SequenceNumberAfterRecoverTest) { delete transaction; } +#ifdef __SANITIZE_THREAD__ +// Skip OptimisticTransactionTest.SequenceNumberAfterRecoverLargeTest under TSAN +// to avoid false positive because of TSAN lock limit of 64. +#else +TEST_P(OptimisticTransactionTest, SequenceNumberAfterRecoverLargeTest) { + WriteOptions write_options; + OptimisticTransactionOptions transaction_options; + + Transaction* transaction( + txn_db->BeginTransaction(write_options, transaction_options)); + + std::string value(1024 * 1024, 'X'); + const size_t n_zero = 2; + std::string s_i; + Status s; + for (int i = 1; i <= 64; i++) { + s_i = std::to_string(i); + auto key = std::string(n_zero - std::min(n_zero, s_i.length()), '0') + s_i; + s = transaction->Put(key, value); + ASSERT_OK(s); + } + + s = transaction->Commit(); + ASSERT_OK(s); + delete transaction; + + Reopen(); + transaction = txn_db->BeginTransaction(write_options, transaction_options); + s = transaction->Put("bar", "val"); + ASSERT_OK(s); + s = transaction->Commit(); + if (!s.ok()) { + std::cerr << "Failed to commit records. Error: " << s.ToString() + << std::endl; + } + ASSERT_OK(s); + + delete transaction; +} +#endif // __SANITIZE_THREAD__ + TEST_P(OptimisticTransactionTest, TimestampedSnapshotMissingCommitTs) { std::unique_ptr txn(txn_db->BeginTransaction(WriteOptions())); ASSERT_OK(txn->Put("a", "v")); From 32fc1e6cdc153be990bd34c38a3713ee22e1941b Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 21 Sep 2023 14:59:58 -0700 Subject: [PATCH 143/386] Add unit test for the multiget fix when ReadOptions.read_tier == kPersistedTier and disableWAL == true (#11854) Summary: Add unit tests for the fix in https://github.com/facebook/rocksdb/pull/11700 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11854 Reviewed By: anand1976 Differential Revision: D49392462 Pulled By: jowlyzhang fbshipit-source-id: bd6978e4888074fa5417f3ccda7a78a2c7eee9c6 --- db/db_test.cc | 95 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 28 deletions(-) diff --git a/db/db_test.cc b/db/db_test.cc index 8e7717a7c..714f42608 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -644,6 +644,33 @@ TEST_F(DBTest, ReadFromPersistedTier) { ASSERT_OK(db_->Get(ropt, handles_[1], "bar", &value)); } + const auto check_multiget_func = + [&](const ReadOptions& read_opts, + std::vector cfhs, std::vector& keys, + std::vector& values, + bool batched) -> std::vector { + if (!batched) { + return db_->MultiGet(read_opts, cfhs, keys, &values); + } else { + size_t num_keys = keys.size(); + std::vector statuses; + std::vector pinnable_values; + statuses.resize(num_keys); + pinnable_values.resize(num_keys); + values.resize(num_keys); + db_->MultiGet(read_opts, cfhs[0], num_keys, keys.data(), + pinnable_values.data(), statuses.data(), false); + for (size_t i = 0; i < statuses.size(); ++i) { + if (statuses[i].ok()) { + values[i].assign(pinnable_values[i].data(), + pinnable_values[i].size()); + pinnable_values[i].Reset(); + } + } + return statuses; + } + }; + // Multiget std::vector multiget_cfs; multiget_cfs.push_back(handles_[1]); @@ -652,14 +679,17 @@ TEST_F(DBTest, ReadFromPersistedTier) { multiget_keys.push_back("foo"); multiget_keys.push_back("bar"); std::vector multiget_values; - auto statuses = - db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); - if (wopt.disableWAL) { - ASSERT_TRUE(statuses[0].IsNotFound()); - ASSERT_TRUE(statuses[1].IsNotFound()); - } else { - ASSERT_OK(statuses[0]); - ASSERT_OK(statuses[1]); + for (int i = 0; i < 2; i++) { + bool batched = i == 0; + auto statuses = check_multiget_func(ropt, multiget_cfs, multiget_keys, + multiget_values, batched); + if (wopt.disableWAL) { + ASSERT_TRUE(statuses[0].IsNotFound()); + ASSERT_TRUE(statuses[1].IsNotFound()); + } else { + ASSERT_OK(statuses[0]); + ASSERT_OK(statuses[1]); + } } // 2nd round: flush and put a new value in memtable. @@ -683,16 +713,21 @@ TEST_F(DBTest, ReadFromPersistedTier) { // Expect same result in multiget multiget_cfs.push_back(handles_[1]); multiget_keys.push_back("rocksdb"); - statuses = - db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); - ASSERT_TRUE(statuses[0].ok()); - ASSERT_EQ("first", multiget_values[0]); - ASSERT_TRUE(statuses[1].ok()); - ASSERT_EQ("one", multiget_values[1]); - if (wopt.disableWAL) { - ASSERT_TRUE(statuses[2].IsNotFound()); - } else { - ASSERT_OK(statuses[2]); + multiget_values.clear(); + + for (int i = 0; i < 2; i++) { + bool batched = i == 0; + auto statuses = check_multiget_func(ropt, multiget_cfs, multiget_keys, + multiget_values, batched); + ASSERT_TRUE(statuses[0].ok()); + ASSERT_EQ("first", multiget_values[0]); + ASSERT_TRUE(statuses[1].ok()); + ASSERT_EQ("one", multiget_values[1]); + if (wopt.disableWAL) { + ASSERT_TRUE(statuses[2].IsNotFound()); + } else { + ASSERT_OK(statuses[2]); + } } // 3rd round: delete and flush @@ -712,17 +747,21 @@ TEST_F(DBTest, ReadFromPersistedTier) { ASSERT_TRUE(db_->Get(ropt, handles_[1], "rocksdb", &value).ok()); ASSERT_EQ(value, "hello"); - statuses = - db_->MultiGet(ropt, multiget_cfs, multiget_keys, &multiget_values); - ASSERT_TRUE(statuses[0].IsNotFound()); - if (wopt.disableWAL) { - ASSERT_TRUE(statuses[1].ok()); - ASSERT_EQ("one", multiget_values[1]); - } else { - ASSERT_TRUE(statuses[1].IsNotFound()); + multiget_values.clear(); + for (int i = 0; i < 2; i++) { + bool batched = i == 0; + auto statuses = check_multiget_func(ropt, multiget_cfs, multiget_keys, + multiget_values, batched); + ASSERT_TRUE(statuses[0].IsNotFound()); + if (wopt.disableWAL) { + ASSERT_TRUE(statuses[1].ok()); + ASSERT_EQ("one", multiget_values[1]); + } else { + ASSERT_TRUE(statuses[1].IsNotFound()); + } + ASSERT_TRUE(statuses[2].ok()); + ASSERT_EQ("hello", multiget_values[2]); } - ASSERT_TRUE(statuses[2].ok()); - ASSERT_EQ("hello", multiget_values[2]); if (wopt.disableWAL == 0) { DestroyAndReopen(options); } From b927ba5936216861c2c35ab68f50ba4a78e65747 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Thu, 21 Sep 2023 15:31:29 -0700 Subject: [PATCH 144/386] Rollback other pending memtable flushes when a flush fails (#11865) Summary: when atomic_flush=false, there are certain cases where we try to install memtable results with already deleted SST files. This can happen when the following sequence events happen: ``` Start Flush0 for memtable M0 to SST0 Start Flush1 for memtable M1 to SST1 Flush 1 returns OK, but don't install to MANIFEST and let whoever flushes M0 to take care of it Flush0 finishes with a retryable IOError, it rollbacks M0, (incorrectly) does not rollback M1, and deletes SST0 and SST1 Starts Flush2 for M0, it does not pick up M1 since it thought M1 is flushed Flush2 writes SST2 and finishes OK, tries to install SST2 and SST1 Error opening SST1 since it's already deleted with an error message like the following: IO error: No such file or directory: While open a file for random read: /tmp/rocksdbtest-501/db_flush_test_3577_4230653031040984171/000011.sst: No such file or directory ``` This happens since: 1. We currently only rollback the memtables that we are flushing in a flush job when atomic_flush=false. 2. Pending output SSTs from previous flushes are deleted since a pending file number is released whenever a flush job is finished no matter of flush status: https://github.com/facebook/rocksdb/blob/f42e70bf561d4be9b6bbe7316d1c2c0c8a3818e6/db/db_impl/db_impl_compaction_flush.cc#L3161 This PR fixes the issue by rollback these pending flushes. There is another issue where if a new flush for new memtable starts and finishes after Flush0 finishes. Its output may also be deleted (see more in unit test). It is fixed by checking bg error status before installing a memtable result, and rollback if there is an error. There is a more efficient fix where we just don't release the pending file output number for flushes that delegate installation. It is more efficient since it does not have to rewrite the flush output file. With the fix in this PR, we can end up with a giant file if a lot of memtables are being flushed together. However, the more efficient fix is a bit more complicated to implement (requires associating such pending file numbers with flush job/memtables) and is more risky since it changes normal flush code path. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11865 Test Plan: * Added repro unit tests. Reviewed By: anand1976 Differential Revision: D49484922 Pulled By: cbi42 fbshipit-source-id: 25b536c08f4e02e7f1d0f86571663737d2b5d53d --- db/db_flush_test.cc | 186 ++++++++++++++++++ db/db_impl/db_impl_compaction_flush.cc | 24 ++- db/error_handler.cc | 1 + db/event_helpers.cc | 2 + db/flush_job.cc | 34 +++- db/flush_job.h | 7 +- db/memtable_list.cc | 49 ++++- db/memtable_list.h | 14 +- db/memtable_list_test.cc | 4 +- .../bug_fixes/100_rollback_pending_flush.md | 1 + 10 files changed, 297 insertions(+), 25 deletions(-) create mode 100644 unreleased_history/bug_fixes/100_rollback_pending_flush.md diff --git a/db/db_flush_test.cc b/db/db_flush_test.cc index acf9723e9..d10bd3180 100644 --- a/db/db_flush_test.cc +++ b/db/db_flush_test.cc @@ -3193,6 +3193,192 @@ INSTANTIATE_TEST_CASE_P(DBFlushDirectIOTest, DBFlushDirectIOTest, INSTANTIATE_TEST_CASE_P(DBAtomicFlushTest, DBAtomicFlushTest, testing::Bool()); +TEST_F(DBFlushTest, NonAtomicFlushRollbackPendingFlushes) { + // Fix a bug in when atomic_flush=false. + // The bug can happen as follows: + // Start Flush0 for memtable M0 to SST0 + // Start Flush1 for memtable M1 to SST1 + // Flush1 returns OK, but don't install to MANIFEST and let whoever flushes + // M0 to take care of it + // Flush0 finishes with a retryable IOError + // - It rollbacks M0, (incorrectly) not M1 + // - Deletes SST1 and SST2 + // + // Auto-recovery will start Flush2 for M0, it does not pick up M1 since it + // thinks that M1 is flushed + // Flush2 writes SST3 and finishes OK, tries to install SST3 and SST2 + // Error opening SST2 since it's already deleted + // + // The fix is to let Flush0 also rollback M1. + Options opts = CurrentOptions(); + opts.atomic_flush = false; + opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); + opts.max_write_buffer_number = 64; + opts.max_background_flushes = 4; + env_->SetBackgroundThreads(4, Env::HIGH); + DestroyAndReopen(opts); + std::atomic_int flush_count = 0; + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->SetCallBack( + "FlushJob::WriteLevel0Table:s", [&](void* s_ptr) { + int c = flush_count.fetch_add(1); + if (c == 0) { + Status* s = (Status*)(s_ptr); + IOStatus io_error = IOStatus::IOError("injected foobar"); + io_error.SetRetryable(true); + *s = io_error; + TEST_SYNC_POINT("Let mem1 flush start"); + TEST_SYNC_POINT("Wait for mem1 flush to finish"); + } + }); + SyncPoint::GetInstance()->LoadDependency( + {{"Let mem1 flush start", "Mem1 flush starts"}, + {"DBImpl::BGWorkFlush:done", "Wait for mem1 flush to finish"}, + {"RecoverFromRetryableBGIOError:RecoverSuccess", + "Wait for error recover"}}); + // Need first flush to wait for the second flush to finish + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "val1")); + // trigger bg flush mem0 + ASSERT_OK(Put(Key(2), "val2")); + TEST_SYNC_POINT("Mem1 flush starts"); + // trigger bg flush mem1 + ASSERT_OK(Put(Key(3), "val3")); + + TEST_SYNC_POINT("Wait for error recover"); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(DBFlushTest, AbortNonAtomicFlushWhenBGError) { + // Fix a bug in when atomic_flush=false. + // The bug can happen as follows: + // Start Flush0 for memtable M0 to SST0 + // Start Flush1 for memtable M1 to SST1 + // Flush1 returns OK, but doesn't install output MANIFEST and let whoever + // flushes M0 to take care of it + // Start Flush2 for memtable M2 to SST2 + // Flush0 finishes with a retryable IOError + // - It rollbacks M0 AND M1 + // - Deletes SST1 and SST2 + // Flush2 finishes, does not rollback M2, + // - releases the pending file number that keeps SST2 alive + // - deletes SST2 + // + // Then auto-recovery starts, error opening SST2 when try to install + // flush result + // + // The fix is to let Flush2 rollback M2 if it finds that + // there is a background error. + Options opts = CurrentOptions(); + opts.atomic_flush = false; + opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); + opts.max_write_buffer_number = 64; + opts.max_background_flushes = 4; + env_->SetBackgroundThreads(4, Env::HIGH); + DestroyAndReopen(opts); + std::atomic_int flush_count = 0; + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->SetCallBack( + "FlushJob::WriteLevel0Table:s", [&](void* s_ptr) { + int c = flush_count.fetch_add(1); + if (c == 0) { + Status* s = (Status*)(s_ptr); + IOStatus io_error = IOStatus::IOError("injected foobar"); + io_error.SetRetryable(true); + *s = io_error; + TEST_SYNC_POINT("Let mem1 flush start"); + TEST_SYNC_POINT("Wait for mem1 flush to finish"); + + TEST_SYNC_POINT("Let mem2 flush start"); + TEST_SYNC_POINT("Wait for mem2 to start writing table"); + } + }); + + SyncPoint::GetInstance()->SetCallBack( + "FlushJob::WriteLevel0Table", [&](void* mems) { + autovector* mems_ptr = (autovector*)mems; + if ((*mems_ptr)[0]->GetID() == 3) { + TEST_SYNC_POINT("Mem2 flush starts writing table"); + TEST_SYNC_POINT("Mem2 flush waits until rollback"); + } + }); + SyncPoint::GetInstance()->LoadDependency( + {{"Let mem1 flush start", "Mem1 flush starts"}, + {"DBImpl::BGWorkFlush:done", "Wait for mem1 flush to finish"}, + {"Let mem2 flush start", "Mem2 flush starts"}, + {"Mem2 flush starts writing table", + "Wait for mem2 to start writing table"}, + {"RollbackMemtableFlush", "Mem2 flush waits until rollback"}, + {"RecoverFromRetryableBGIOError:RecoverSuccess", + "Wait for error recover"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_OK(Put(Key(1), "val1")); + // trigger bg flush mem0 + ASSERT_OK(Put(Key(2), "val2")); + TEST_SYNC_POINT("Mem1 flush starts"); + // trigger bg flush mem1 + ASSERT_OK(Put(Key(3), "val3")); + + TEST_SYNC_POINT("Mem2 flush starts"); + ASSERT_OK(Put(Key(4), "val4")); + + TEST_SYNC_POINT("Wait for error recover"); + // Recovery flush writes 3 memtables together into 1 file. + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); +} + +TEST_F(DBFlushTest, NonAtomicNormalFlushAbortWhenBGError) { + Options opts = CurrentOptions(); + opts.atomic_flush = false; + opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); + opts.max_write_buffer_number = 64; + opts.max_background_flushes = 1; + env_->SetBackgroundThreads(2, Env::HIGH); + DestroyAndReopen(opts); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + std::atomic_int flush_write_table_count = 0; + SyncPoint::GetInstance()->SetCallBack( + "FlushJob::WriteLevel0Table:s", [&](void* s_ptr) { + int c = flush_write_table_count.fetch_add(1); + if (c == 0) { + Status* s = (Status*)(s_ptr); + IOStatus io_error = IOStatus::IOError("injected foobar"); + io_error.SetRetryable(true); + *s = io_error; + } + }); + + SyncPoint::GetInstance()->EnableProcessing(); + SyncPoint::GetInstance()->LoadDependency( + {{"RecoverFromRetryableBGIOError:RecoverSuccess", + "Wait for error recover"}}); + + ASSERT_OK(Put(Key(1), "val1")); + // trigger bg flush0 for mem0 + ASSERT_OK(Put(Key(2), "val2")); + dbfull()->TEST_WaitForFlushMemTable().PermitUncheckedError(); + + // trigger bg flush1 for mem1, should see bg error and abort + // before picking a memtable to flush + ASSERT_OK(Put(Key(3), "val3")); + + TEST_SYNC_POINT("Wait for error recover"); + // Recovery flush writes 2 memtables together into 1 file. + ASSERT_EQ(1, NumTableFilesAtLevel(0)); + // 1 for flush 0 and 1 for recovery flush + ASSERT_EQ(2, flush_write_table_count); + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 91e6e780c..8e4f70682 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -284,6 +284,20 @@ Status DBImpl::FlushMemTableToOutputFile( // If the log sync failed, we do not need to pick memtable. Otherwise, // num_flush_not_started_ needs to be rollback. TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"); + // Exit a flush due to bg error should not set bg error again. + bool skip_set_bg_error = false; + if (s.ok() && flush_reason != FlushReason::kErrorRecovery && + flush_reason != FlushReason::kErrorRecoveryRetryFlush && + !error_handler_.GetBGError().ok()) { + // Error recovery in progress, should not pick memtable which excludes + // them from being picked up by recovery flush. + // This ensures that when bg error is set, no new flush can pick + // memtables. + skip_set_bg_error = true; + s = error_handler_.GetBGError(); + assert(!s.ok()); + } + if (s.ok()) { flush_job.PickMemTable(); need_cancel = true; @@ -304,7 +318,8 @@ Status DBImpl::FlushMemTableToOutputFile( // is unlocked by the current thread. if (s.ok()) { s = flush_job.Run(&logs_with_prep_tracker_, &file_meta, - &switched_to_mempurge); + &switched_to_mempurge, &skip_set_bg_error, + &error_handler_); need_cancel = false; } @@ -345,7 +360,8 @@ Status DBImpl::FlushMemTableToOutputFile( } } - if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) { + if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() && + !skip_set_bg_error) { if (log_io_s.ok()) { // Error while writing to MANIFEST. // In fact, versions_->io_status() can also be the result of renaming @@ -634,8 +650,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( for (int i = 0; i != num_cfs; ++i) { if (exec_status[i].second.ok() && exec_status[i].first) { auto& mems = jobs[i]->GetMemTables(); - cfds[i]->imm()->RollbackMemtableFlush(mems, - file_meta[i].fd.GetNumber()); + cfds[i]->imm()->RollbackMemtableFlush( + mems, /*rollback_succeeding_memtables=*/false); } } } diff --git a/db/error_handler.cc b/db/error_handler.cc index 55451e42c..efadfbc80 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -655,6 +655,7 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( } recovery_in_prog_ = true; + TEST_SYNC_POINT("StartRecoverFromRetryableBGIOError::in_progress"); recovery_thread_.reset( new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); diff --git a/db/event_helpers.cc b/db/event_helpers.cc index d442a1ed7..700c5f22c 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -240,6 +240,8 @@ void EventHelpers::NotifyOnErrorRecoveryEnd( info.new_bg_error.PermitUncheckedError(); } db_mutex->Lock(); + } else { + old_bg_error.PermitUncheckedError(); } } diff --git a/db/flush_job.cc b/db/flush_job.cc index 0e6c66cac..046abcd10 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -215,7 +215,8 @@ void FlushJob::PickMemTable() { } Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta, - bool* switched_to_mempurge) { + bool* switched_to_mempurge, bool* skipped_since_bg_error, + ErrorHandler* error_handler) { TEST_SYNC_POINT("FlushJob::Start"); db_mutex_->AssertHeld(); assert(pick_memtable_called); @@ -303,17 +304,31 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta, } if (!s.ok()) { - cfd_->imm()->RollbackMemtableFlush(mems_, meta_.fd.GetNumber()); + cfd_->imm()->RollbackMemtableFlush( + mems_, /*rollback_succeeding_memtables=*/!db_options_.atomic_flush); } else if (write_manifest_) { - TEST_SYNC_POINT("FlushJob::InstallResults"); - // Replace immutable memtable with the generated Table - s = cfd_->imm()->TryInstallMemtableFlushResults( - cfd_, mutable_cf_options_, mems_, prep_tracker, versions_, db_mutex_, - meta_.fd.GetNumber(), &job_context_->memtables_to_free, db_directory_, - log_buffer_, &committed_flush_jobs_info_, - !(mempurge_s.ok()) /* write_edit : true if no mempurge happened (or if aborted), + assert(!db_options_.atomic_flush); + if (!db_options_.atomic_flush && + flush_reason_ != FlushReason::kErrorRecovery && + flush_reason_ != FlushReason::kErrorRecoveryRetryFlush && + error_handler && !error_handler->GetBGError().ok()) { + cfd_->imm()->RollbackMemtableFlush( + mems_, /*rollback_succeeding_memtables=*/!db_options_.atomic_flush); + s = error_handler->GetBGError(); + if (skipped_since_bg_error) { + *skipped_since_bg_error = true; + } + } else { + TEST_SYNC_POINT("FlushJob::InstallResults"); + // Replace immutable memtable with the generated Table + s = cfd_->imm()->TryInstallMemtableFlushResults( + cfd_, mutable_cf_options_, mems_, prep_tracker, versions_, db_mutex_, + meta_.fd.GetNumber(), &job_context_->memtables_to_free, db_directory_, + log_buffer_, &committed_flush_jobs_info_, + !(mempurge_s.ok()) /* write_edit : true if no mempurge happened (or if aborted), but 'false' if mempurge successful: no new min log number or new level 0 file path to write to manifest. */); + } } if (s.ok() && file_meta != nullptr) { @@ -965,6 +980,7 @@ Status FlushJob::WriteLevel0Table() { &table_properties_, write_hint, full_history_ts_low, blob_callback_, base_, &num_input_entries, &memtable_payload_bytes, &memtable_garbage_bytes); + TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:s", &s); // TODO: Cleanup io_status in BuildTable and table builders assert(!s.ok() || io_s.ok()); io_s.PermitUncheckedError(); diff --git a/db/flush_job.h b/db/flush_job.h index 43d10ffe9..db5dbd719 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -83,9 +83,14 @@ class FlushJob { // Require db_mutex held. // Once PickMemTable() is called, either Run() or Cancel() has to be called. void PickMemTable(); + // @param skip_since_bg_error If not nullptr and if atomic_flush=false, + // then it is set to true if flush installation is skipped and memtable + // is rolled back due to existing background error. Status Run(LogsWithPrepTracker* prep_tracker = nullptr, FileMetaData* file_meta = nullptr, - bool* switched_to_mempurge = nullptr); + bool* switched_to_mempurge = nullptr, + bool* skipped_since_bg_error = nullptr, + ErrorHandler* error_handler = nullptr); void Cancel(); const autovector& GetMemTables() const { return mems_; } diff --git a/db/memtable_list.cc b/db/memtable_list.cc index b665c19b8..da4721d13 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -434,21 +434,54 @@ void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id, } void MemTableList::RollbackMemtableFlush(const autovector& mems, - uint64_t /*file_number*/) { + bool rollback_succeeding_memtables) { + TEST_SYNC_POINT("RollbackMemtableFlush"); AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_MEMTABLE_ROLLBACK); assert(!mems.empty()); - - // If the flush was not successful, then just reset state. - // Maybe a succeeding attempt to flush will be successful. +#ifndef NDEBUG for (MemTable* m : mems) { assert(m->flush_in_progress_); assert(m->file_number_ == 0); + } +#endif + + if (rollback_succeeding_memtables && !mems.empty()) { + std::list& memlist = current_->memlist_; + auto it = memlist.rbegin(); + for (; *it != mems[0] && it != memlist.rend(); ++it) { + } + // mems should be in memlist + assert(*it == mems[0]); + if (*it == mems[0]) { + ++it; + } + while (it != memlist.rend()) { + MemTable* m = *it; + // Only rollback complete, not in-progress, + // in_progress can be flushes that are still writing SSTs + if (m->flush_completed_) { + m->flush_in_progress_ = false; + m->flush_completed_ = false; + m->edit_.Clear(); + m->file_number_ = 0; + num_flush_not_started_++; + ++it; + } else { + break; + } + } + } - m->flush_in_progress_ = false; - m->flush_completed_ = false; - m->edit_.Clear(); - num_flush_not_started_++; + for (MemTable* m : mems) { + if (m->flush_in_progress_) { + assert(m->file_number_ == 0); + m->file_number_ = 0; + m->flush_in_progress_ = false; + m->flush_completed_ = false; + m->edit_.Clear(); + num_flush_not_started_++; + } } imm_flush_needed.store(true, std::memory_order_release); } diff --git a/db/memtable_list.h b/db/memtable_list.h index e95493b6f..51d14dff7 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -271,8 +271,20 @@ class MemTableList { // Reset status of the given memtable list back to pending state so that // they can get picked up again on the next round of flush. + // + // @param rollback_succeeding_memtables If true, will rollback adjacent + // younger memtables whose flush is completed. Specifically, suppose the + // current immutable memtables are M_0,M_1...M_N ordered from youngest to + // oldest. Suppose that the youngest memtable in `mems` is M_K. We will try to + // rollback M_K-1, M_K-2... until the first memtable whose flush is + // not completed. These are the memtables that would have been installed + // by this flush job if it were to succeed. This flag is currently used + // by non atomic_flush rollback. + // Note that we also do rollback in `write_manifest_cb` by calling + // `RemoveMemTablesOrRestoreFlags()`. There we rollback the entire batch so + // it is similar to what we do here with rollback_succeeding_memtables=true. void RollbackMemtableFlush(const autovector& mems, - uint64_t file_number); + bool rollback_succeeding_memtables); // Try commit a successful flush in the manifest file. It might just return // Status::OK letting a concurrent flush to do the actual the recording. diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index dfa1dbfc7..12f7495b8 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -682,7 +682,7 @@ TEST_F(MemTableListTest, FlushPendingTest) { ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Revert flush - list.RollbackMemtableFlush(to_flush, 0); + list.RollbackMemtableFlush(to_flush, false); ASSERT_FALSE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); to_flush.clear(); @@ -732,7 +732,7 @@ TEST_F(MemTableListTest, FlushPendingTest) { ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire)); // Rollback first pick of tables - list.RollbackMemtableFlush(to_flush, 0); + list.RollbackMemtableFlush(to_flush, false); ASSERT_TRUE(list.IsFlushPending()); ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire)); to_flush.clear(); diff --git a/unreleased_history/bug_fixes/100_rollback_pending_flush.md b/unreleased_history/bug_fixes/100_rollback_pending_flush.md new file mode 100644 index 000000000..8ca2b1296 --- /dev/null +++ b/unreleased_history/bug_fixes/100_rollback_pending_flush.md @@ -0,0 +1 @@ +* Fix a bug where RocksDB (with atomic_flush=false) can delete output SST files of pending flushes when a previous concurrent flush fails (#11865). This can result in DB entering read-only state with error message like `IO error: No such file or directory: While open a file for random read: /tmp/rocksdbtest-501/db_flush_test_87732_4230653031040984171/000013.sst`. \ No newline at end of file From 269478ee4618283cd6d710fdfea9687157a259c1 Mon Sep 17 00:00:00 2001 From: anand76 Date: Thu, 21 Sep 2023 20:30:53 -0700 Subject: [PATCH 145/386] Support compressed and local flash secondary cache stacking (#11812) Summary: This PR implements support for a three tier cache - primary block cache, compressed secondary cache, and a nvm (local flash) secondary cache. This allows more effective utilization of the nvm cache, and minimizes the number of reads from local flash by caching compressed blocks in the compressed secondary cache. The basic design is as follows - 1. A new secondary cache implementation, ```TieredSecondaryCache```, is introduced. It keeps the compressed and nvm secondary caches and manages the movement of blocks between them and the primary block cache. To setup a three tier cache, we allocate a ```CacheWithSecondaryAdapter```, with a ```TieredSecondaryCache``` instance as the secondary cache. 2. The table reader passes both the uncompressed and compressed block to ```FullTypedCacheInterface::InsertFull```, allowing the block cache to optionally store the compressed block. 3. When there's a miss, the block object is constructed and inserted in the primary cache, and the compressed block is inserted into the nvm cache by calling ```InsertSaved```. This avoids the overhead of recompressing the block, as well as avoiding putting more memory pressure on the compressed secondary cache. 4. When there's a hit in the nvm cache, we attempt to insert the block in the compressed secondary cache and the primary cache, subject to the admission policy of those caches (i.e admit on second access). Blocks/items evicted from any tier are simply discarded. We can easily implement additional admission policies if desired. Todo (In a subsequent PR): 1. Add to db_bench and run benchmarks 2. Add to db_stress Pull Request resolved: https://github.com/facebook/rocksdb/pull/11812 Reviewed By: pdillinger Differential Revision: D49461842 Pulled By: anand1976 fbshipit-source-id: b40ac1330ef7cd8c12efa0a3ca75128e602e3a0b --- CMakeLists.txt | 2 + Makefile | 3 + TARGETS | 7 + cache/cache.cc | 35 + cache/cache_bench_tool.cc | 3 +- cache/cache_helpers.cc | 3 +- cache/charged_cache.cc | 6 +- cache/charged_cache.h | 8 +- cache/compressed_secondary_cache.cc | 169 +++-- cache/compressed_secondary_cache.h | 9 + cache/compressed_secondary_cache_test.cc | 4 +- cache/lru_cache_test.cc | 41 +- cache/secondary_cache.cc | 33 - cache/secondary_cache_adapter.cc | 30 +- cache/secondary_cache_adapter.h | 10 +- cache/sharded_cache.h | 9 +- cache/tiered_secondary_cache.cc | 119 ++++ cache/tiered_secondary_cache.h | 155 +++++ cache/tiered_secondary_cache_test.cc | 642 ++++++++++++++++++ cache/typed_cache.h | 28 +- db/blob/blob_contents.h | 3 +- db/blob/blob_file_reader.cc | 3 +- db/db_basic_test.cc | 8 +- db/db_block_cache_test.cc | 10 +- db/db_test.cc | 5 +- db/db_test_util.cc | 12 +- db/db_test_util.h | 5 +- include/rocksdb/advanced_cache.h | 41 +- include/rocksdb/advanced_options.h | 3 +- include/rocksdb/cache.h | 17 +- include/rocksdb/secondary_cache.h | 87 ++- options/customizable_test.cc | 4 + src.mk | 4 +- .../block_based/block_based_table_builder.cc | 2 +- table/block_based/block_based_table_reader.cc | 92 ++- table/block_based/block_based_table_reader.h | 7 +- .../block_based_table_reader_sync_and_async.h | 6 +- table/block_based/block_cache.cc | 4 +- table/block_based/block_cache.h | 33 +- table/block_based/block_test.cc | 57 +- table/block_fetcher.cc | 5 +- table/block_fetcher.h | 4 + table/block_fetcher_test.cc | 8 +- test_util/secondary_cache_test_util.cc | 3 +- tools/db_bench_tool.cc | 8 +- .../new_features/secondary_cache_stacking.md | 1 + .../public_api_changes/new_tiered_cache.md | 1 + utilities/fault_injection_secondary_cache.h | 5 + utilities/simulator_cache/sim_cache.cc | 11 +- 49 files changed, 1527 insertions(+), 238 deletions(-) create mode 100644 cache/tiered_secondary_cache.cc create mode 100644 cache/tiered_secondary_cache.h create mode 100644 cache/tiered_secondary_cache_test.cc create mode 100644 unreleased_history/new_features/secondary_cache_stacking.md create mode 100644 unreleased_history/public_api_changes/new_tiered_cache.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a53fcba3..b475a2224 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -632,6 +632,7 @@ set(SOURCES cache/secondary_cache.cc cache/secondary_cache_adapter.cc cache/sharded_cache.cc + cache/tiered_secondary_cache.cc db/arena_wrapped_db_iter.cc db/blob/blob_contents.cc db/blob/blob_fetcher.cc @@ -1263,6 +1264,7 @@ if(WITH_TESTS) cache/cache_test.cc cache/compressed_secondary_cache_test.cc cache/lru_cache_test.cc + cache/tiered_secondary_cache_test.cc db/blob/blob_counting_iterator_test.cc db/blob/blob_file_addition_test.cc db/blob/blob_file_builder_test.cc diff --git a/Makefile b/Makefile index 08ad7e48a..71c96f284 100644 --- a/Makefile +++ b/Makefile @@ -1885,6 +1885,9 @@ compressed_secondary_cache_test: $(OBJ_DIR)/cache/compressed_secondary_cache_tes lru_cache_test: $(OBJ_DIR)/cache/lru_cache_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +tiered_secondary_cache_test: $(OBJ_DIR)/cache/tiered_secondary_cache_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + range_del_aggregator_test: $(OBJ_DIR)/db/range_del_aggregator_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) diff --git a/TARGETS b/TARGETS index ad0da6100..86d62e2c5 100644 --- a/TARGETS +++ b/TARGETS @@ -21,6 +21,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "cache/secondary_cache.cc", "cache/secondary_cache_adapter.cc", "cache/sharded_cache.cc", + "cache/tiered_secondary_cache.cc", "db/arena_wrapped_db_iter.cc", "db/blob/blob_contents.cc", "db/blob/blob_fetcher.cc", @@ -5475,6 +5476,12 @@ cpp_unittest_wrapper(name="tiered_compaction_test", extra_compiler_flags=[]) +cpp_unittest_wrapper(name="tiered_secondary_cache_test", + srcs=["cache/tiered_secondary_cache_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) + + cpp_unittest_wrapper(name="timer_queue_test", srcs=["util/timer_queue_test.cc"], deps=[":rocksdb_test_lib"], diff --git a/cache/cache.cc b/cache/cache.cc index a65f5ec4f..3dbea128e 100644 --- a/cache/cache.cc +++ b/cache/cache.cc @@ -66,6 +66,41 @@ static std::unordered_map OptionTypeFlags::kMutable}}, }; +namespace { +static void NoopDelete(Cache::ObjectPtr /*obj*/, + MemoryAllocator* /*allocator*/) { + assert(false); +} + +static size_t SliceSize(Cache::ObjectPtr obj) { + return static_cast(obj)->size(); +} + +static Status SliceSaveTo(Cache::ObjectPtr from_obj, size_t from_offset, + size_t length, char* out) { + const Slice& slice = *static_cast(from_obj); + std::memcpy(out, slice.data() + from_offset, length); + return Status::OK(); +} + +static Status NoopCreate(const Slice& /*data*/, CompressionType /*type*/, + CacheTier /*source*/, Cache::CreateContext* /*ctx*/, + MemoryAllocator* /*allocator*/, + Cache::ObjectPtr* /*out_obj*/, + size_t* /*out_charge*/) { + assert(false); + return Status::NotSupported(); +} + +static Cache::CacheItemHelper kBasicCacheItemHelper(CacheEntryRole::kMisc, + &NoopDelete); +} // namespace + +const Cache::CacheItemHelper kSliceCacheItemHelper{ + CacheEntryRole::kMisc, &NoopDelete, &SliceSize, + &SliceSaveTo, &NoopCreate, &kBasicCacheItemHelper, +}; + Status SecondaryCache::CreateFromString( const ConfigOptions& config_options, const std::string& value, std::shared_ptr* result) { diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index e33e36162..b75835348 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -290,7 +290,8 @@ Status SaveToFn(Cache::ObjectPtr from_obj, size_t /*from_offset*/, return Status::OK(); } -Status CreateFn(const Slice& data, Cache::CreateContext* /*context*/, +Status CreateFn(const Slice& data, CompressionType /*type*/, + CacheTier /*source*/, Cache::CreateContext* /*context*/, MemoryAllocator* /*allocator*/, Cache::ObjectPtr* out_obj, size_t* out_charge) { *out_obj = new char[data.size()]; diff --git a/cache/cache_helpers.cc b/cache/cache_helpers.cc index 22597bf6d..bceb6f3c0 100644 --- a/cache/cache_helpers.cc +++ b/cache/cache_helpers.cc @@ -25,7 +25,8 @@ Status WarmInCache(Cache* cache, const Slice& key, const Slice& saved, assert(helper->create_cb); Cache::ObjectPtr value; size_t charge; - Status st = helper->create_cb(saved, create_context, + Status st = helper->create_cb(saved, CompressionType::kNoCompression, + CacheTier::kVolatileTier, create_context, cache->memory_allocator(), &value, &charge); if (st.ok()) { st = diff --git a/cache/charged_cache.cc b/cache/charged_cache.cc index e44288ecd..6a21bacfb 100644 --- a/cache/charged_cache.cc +++ b/cache/charged_cache.cc @@ -19,8 +19,10 @@ ChargedCache::ChargedCache(std::shared_ptr cache, Status ChargedCache::Insert(const Slice& key, ObjectPtr obj, const CacheItemHelper* helper, size_t charge, - Handle** handle, Priority priority) { - Status s = target_->Insert(key, obj, helper, charge, handle, priority); + Handle** handle, Priority priority, + const Slice& compressed_val, CompressionType type) { + Status s = target_->Insert(key, obj, helper, charge, handle, priority, + compressed_val, type); if (s.ok()) { // Insert may cause the cache entry eviction if the cache is full. So we // directly call the reservation manager to update the total memory used diff --git a/cache/charged_cache.h b/cache/charged_cache.h index f2eacb9ed..a59c178ab 100644 --- a/cache/charged_cache.h +++ b/cache/charged_cache.h @@ -22,9 +22,11 @@ class ChargedCache : public CacheWrapper { ChargedCache(std::shared_ptr cache, std::shared_ptr block_cache); - Status Insert(const Slice& key, ObjectPtr obj, const CacheItemHelper* helper, - size_t charge, Handle** handle = nullptr, - Priority priority = Priority::LOW) override; + Status Insert( + const Slice& key, ObjectPtr obj, const CacheItemHelper* helper, + size_t charge, Handle** handle = nullptr, + Priority priority = Priority::LOW, const Slice& compressed_val = Slice(), + CompressionType type = CompressionType::kNoCompression) override; Cache::Handle* Lookup(const Slice& key, const CacheItemHelper* helper, CreateContext* create_context, diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index 4a35b8d1e..3a35945e2 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -11,6 +11,7 @@ #include "memory/memory_allocator_impl.h" #include "monitoring/perf_context_imp.h" +#include "util/coding.h" #include "util/compression.h" #include "util/string_util.h" @@ -54,40 +55,65 @@ std::unique_ptr CompressedSecondaryCache::Lookup( CacheAllocationPtr* ptr{nullptr}; CacheAllocationPtr merged_value; size_t handle_value_charge{0}; + const char* data_ptr = nullptr; + CacheTier source = CacheTier::kVolatileCompressedTier; + CompressionType type = cache_options_.compression_type; if (cache_options_.enable_custom_split_merge) { CacheValueChunk* value_chunk_ptr = reinterpret_cast(handle_value); merged_value = MergeChunksIntoValue(value_chunk_ptr, handle_value_charge); ptr = &merged_value; + data_ptr = ptr->get(); } else { + uint32_t type_32 = static_cast(type); + uint32_t source_32 = static_cast(source); ptr = reinterpret_cast(handle_value); handle_value_charge = cache_->GetCharge(lru_handle); + data_ptr = ptr->get(); + data_ptr = GetVarint32Ptr(data_ptr, data_ptr + 1, + static_cast(&type_32)); + type = static_cast(type_32); + data_ptr = GetVarint32Ptr(data_ptr, data_ptr + 1, + static_cast(&source_32)); + source = static_cast(source_32); + handle_value_charge -= (data_ptr - ptr->get()); } MemoryAllocator* allocator = cache_options_.memory_allocator.get(); Status s; Cache::ObjectPtr value{nullptr}; size_t charge{0}; - if (cache_options_.compression_type == kNoCompression || - cache_options_.do_not_compress_roles.Contains(helper->role)) { - s = helper->create_cb(Slice(ptr->get(), handle_value_charge), - create_context, allocator, &value, &charge); - } else { - UncompressionContext uncompression_context(cache_options_.compression_type); - UncompressionInfo uncompression_info(uncompression_context, - UncompressionDict::GetEmptyDict(), - cache_options_.compression_type); - - size_t uncompressed_size{0}; - CacheAllocationPtr uncompressed = UncompressData( - uncompression_info, (char*)ptr->get(), handle_value_charge, - &uncompressed_size, cache_options_.compress_format_version, allocator); - - if (!uncompressed) { - cache_->Release(lru_handle, /*erase_if_last_ref=*/true); - return nullptr; + if (source == CacheTier::kVolatileCompressedTier) { + if (cache_options_.compression_type == kNoCompression || + cache_options_.do_not_compress_roles.Contains(helper->role)) { + s = helper->create_cb(Slice(data_ptr, handle_value_charge), + kNoCompression, CacheTier::kVolatileTier, + create_context, allocator, &value, &charge); + } else { + UncompressionContext uncompression_context( + cache_options_.compression_type); + UncompressionInfo uncompression_info(uncompression_context, + UncompressionDict::GetEmptyDict(), + cache_options_.compression_type); + + size_t uncompressed_size{0}; + CacheAllocationPtr uncompressed = + UncompressData(uncompression_info, (char*)data_ptr, + handle_value_charge, &uncompressed_size, + cache_options_.compress_format_version, allocator); + + if (!uncompressed) { + cache_->Release(lru_handle, /*erase_if_last_ref=*/true); + return nullptr; + } + s = helper->create_cb(Slice(uncompressed.get(), uncompressed_size), + kNoCompression, CacheTier::kVolatileTier, + create_context, allocator, &value, &charge); } - s = helper->create_cb(Slice(uncompressed.get(), uncompressed_size), + } else { + // The item was not compressed by us. Let the helper create_cb + // uncompress it + s = helper->create_cb(Slice(data_ptr, handle_value_charge), type, source, create_context, allocator, &value, &charge); } @@ -112,45 +138,56 @@ std::unique_ptr CompressedSecondaryCache::Lookup( return handle; } -Status CompressedSecondaryCache::Insert(const Slice& key, - Cache::ObjectPtr value, - const Cache::CacheItemHelper* helper, - bool force_insert) { - if (value == nullptr) { - return Status::InvalidArgument(); +bool CompressedSecondaryCache::MaybeInsertDummy(const Slice& key) { + auto internal_helper = GetHelper(cache_options_.enable_custom_split_merge); + Cache::Handle* lru_handle = cache_->Lookup(key); + if (lru_handle == nullptr) { + PERF_COUNTER_ADD(compressed_sec_cache_insert_dummy_count, 1); + // Insert a dummy handle if the handle is evicted for the first time. + cache_->Insert(key, /*obj=*/nullptr, internal_helper, /*charge=*/0) + .PermitUncheckedError(); + return true; + } else { + cache_->Release(lru_handle, /*erase_if_last_ref=*/false); } - if (disable_cache_) { + return false; +} + +Status CompressedSecondaryCache::InsertInternal( + const Slice& key, Cache::ObjectPtr value, + const Cache::CacheItemHelper* helper, CompressionType type, + CacheTier source) { + if (source != CacheTier::kVolatileCompressedTier && + cache_options_.enable_custom_split_merge) { + // We don't support custom split/merge for the tiered case return Status::OK(); } auto internal_helper = GetHelper(cache_options_.enable_custom_split_merge); - if (!force_insert) { - Cache::Handle* lru_handle = cache_->Lookup(key); - if (lru_handle == nullptr) { - PERF_COUNTER_ADD(compressed_sec_cache_insert_dummy_count, 1); - // Insert a dummy handle if the handle is evicted for the first time. - return cache_->Insert(key, /*obj=*/nullptr, internal_helper, - /*charge=*/0); - } else { - cache_->Release(lru_handle, /*erase_if_last_ref=*/false); - } - } - - size_t size = (*helper->size_cb)(value); + char header[10]; + char* payload = header; + payload = EncodeVarint32(payload, static_cast(type)); + payload = EncodeVarint32(payload, static_cast(source)); + + size_t header_size = payload - header; + size_t data_size = (*helper->size_cb)(value); + size_t total_size = data_size + header_size; CacheAllocationPtr ptr = - AllocateBlock(size, cache_options_.memory_allocator.get()); + AllocateBlock(total_size, cache_options_.memory_allocator.get()); + char* data_ptr = ptr.get() + header_size; - Status s = (*helper->saveto_cb)(value, 0, size, ptr.get()); + Status s = (*helper->saveto_cb)(value, 0, data_size, data_ptr); if (!s.ok()) { return s; } - Slice val(ptr.get(), size); + Slice val(data_ptr, data_size); std::string compressed_val; if (cache_options_.compression_type != kNoCompression && + type == kNoCompression && !cache_options_.do_not_compress_roles.Contains(helper->role)) { - PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, size); + PERF_COUNTER_ADD(compressed_sec_cache_uncompressed_bytes, data_size); CompressionOptions compression_opts; CompressionContext compression_context(cache_options_.compression_type, compression_opts); @@ -168,12 +205,14 @@ Status CompressedSecondaryCache::Insert(const Slice& key, } val = Slice(compressed_val); - size = compressed_val.size(); - PERF_COUNTER_ADD(compressed_sec_cache_compressed_bytes, size); + data_size = compressed_val.size(); + total_size = header_size + data_size; + PERF_COUNTER_ADD(compressed_sec_cache_compressed_bytes, data_size); if (!cache_options_.enable_custom_split_merge) { - ptr = AllocateBlock(size, cache_options_.memory_allocator.get()); - memcpy(ptr.get(), compressed_val.data(), size); + ptr = AllocateBlock(total_size, cache_options_.memory_allocator.get()); + data_ptr = ptr.get() + header_size; + memcpy(data_ptr, compressed_val.data(), data_size); } } @@ -184,9 +223,43 @@ Status CompressedSecondaryCache::Insert(const Slice& key, SplitValueIntoChunks(val, cache_options_.compression_type, charge); return cache_->Insert(key, value_chunks_head, internal_helper, charge); } else { + std::memcpy(ptr.get(), header, header_size); CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr)); - return cache_->Insert(key, buf, internal_helper, size); + return cache_->Insert(key, buf, internal_helper, total_size); + } +} + +Status CompressedSecondaryCache::Insert(const Slice& key, + Cache::ObjectPtr value, + const Cache::CacheItemHelper* helper, + bool force_insert) { + if (value == nullptr) { + return Status::InvalidArgument(); + } + + if (!force_insert && MaybeInsertDummy(key)) { + return Status::OK(); } + + return InsertInternal(key, value, helper, kNoCompression, + CacheTier::kVolatileCompressedTier); +} + +Status CompressedSecondaryCache::InsertSaved( + const Slice& key, const Slice& saved, CompressionType type = kNoCompression, + CacheTier source = CacheTier::kVolatileTier) { + if (type == kNoCompression) { + return Status::OK(); + } + + auto slice_helper = &kSliceCacheItemHelper; + if (MaybeInsertDummy(key)) { + return Status::OK(); + } + + return InsertInternal( + key, static_cast(const_cast(&saved)), + slice_helper, type, source); } void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); } diff --git a/cache/compressed_secondary_cache.h b/cache/compressed_secondary_cache.h index 773a25bf9..32e6fd0df 100644 --- a/cache/compressed_secondary_cache.h +++ b/cache/compressed_secondary_cache.h @@ -80,6 +80,9 @@ class CompressedSecondaryCache : public SecondaryCache { const Cache::CacheItemHelper* helper, bool force_insert) override; + Status InsertSaved(const Slice& key, const Slice& saved, CompressionType type, + CacheTier source) override; + std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase, @@ -130,6 +133,12 @@ class CompressedSecondaryCache : public SecondaryCache { CacheAllocationPtr MergeChunksIntoValue(const void* chunks_head, size_t& charge); + bool MaybeInsertDummy(const Slice& key); + + Status InsertInternal(const Slice& key, Cache::ObjectPtr value, + const Cache::CacheItemHelper* helper, + CompressionType type, CacheTier source); + // TODO: clean up to use cleaner interfaces in typed_cache.h const Cache::CacheItemHelper* GetHelper(bool enable_custom_split_merge) const; std::shared_ptr cache_; diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index 54727a2fc..eb31738f7 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -992,7 +992,7 @@ class CompressedSecCacheTestWithTiered /*_capacity=*/70 << 20, /*_estimated_entry_charge=*/256 << 10, /*_num_shard_bits=*/0); - TieredVolatileCacheOptions opts; + TieredCacheOptions opts; lru_opts.capacity = 70 << 20; lru_opts.num_shard_bits = 0; lru_opts.high_pri_pool_ratio = 0; @@ -1006,7 +1006,7 @@ class CompressedSecCacheTestWithTiered ; opts.comp_cache_opts.capacity = 30 << 20; opts.comp_cache_opts.num_shard_bits = 0; - cache_ = NewTieredVolatileCache(opts); + cache_ = NewTieredCache(opts); cache_res_mgr_ = std::make_shared>( cache_); diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 047f5b80b..27fd5cc85 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -983,13 +983,14 @@ class TestSecondaryCache : public SecondaryCache { using ResultMap = std::unordered_map; - explicit TestSecondaryCache(size_t capacity) + explicit TestSecondaryCache(size_t capacity, bool insert_saved = false) : cache_(NewLRUCache(capacity, 0, false, 0.5 /* high_pri_pool_ratio */, nullptr, kDefaultToAdaptiveMutex, kDontChargeCacheMetadata)), num_inserts_(0), num_lookups_(0), - inject_failure_(false) {} + inject_failure_(false), + insert_saved_(insert_saved) {} const char* Name() const override { return "TestSecondaryCache"; } @@ -1020,6 +1021,17 @@ class TestSecondaryCache : public SecondaryCache { return cache_.Insert(key, buf, size); } + Status InsertSaved(const Slice& key, const Slice& saved, + CompressionType /*type*/ = kNoCompression, + CacheTier /*source*/ = CacheTier::kVolatileTier) override { + if (insert_saved_) { + return Insert(key, const_cast(&saved), &kSliceCacheItemHelper, + /*force_insert=*/true); + } else { + return Status::OK(); + } + } + std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool /*wait*/, @@ -1048,7 +1060,8 @@ class TestSecondaryCache : public SecondaryCache { char* ptr = cache_.Value(handle); size_t size = DecodeFixed64(ptr); ptr += sizeof(uint64_t); - s = helper->create_cb(Slice(ptr, size), create_context, + s = helper->create_cb(Slice(ptr, size), kNoCompression, + CacheTier::kVolatileTier, create_context, /*alloc*/ nullptr, &value, &charge); } if (s.ok()) { @@ -1137,6 +1150,7 @@ class TestSecondaryCache : public SecondaryCache { uint32_t num_inserts_; uint32_t num_lookups_; bool inject_failure_; + bool insert_saved_; std::string ckey_prefix_; ResultMap result_map_; }; @@ -1167,7 +1181,7 @@ INSTANTIATE_TEST_CASE_P(DBSecondaryCacheTest, DBSecondaryCacheTest, TEST_P(BasicSecondaryCacheTest, BasicTest) { std::shared_ptr secondary_cache = - std::make_shared(4096); + std::make_shared(4096, true); std::shared_ptr cache = NewCache(1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); @@ -1224,7 +1238,7 @@ TEST_P(BasicSecondaryCacheTest, BasicTest) { TEST_P(BasicSecondaryCacheTest, StatsTest) { std::shared_ptr secondary_cache = - std::make_shared(4096); + std::make_shared(4096, true); std::shared_ptr cache = NewCache(1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); @@ -1278,7 +1292,7 @@ TEST_P(BasicSecondaryCacheTest, StatsTest) { TEST_P(BasicSecondaryCacheTest, BasicFailTest) { std::shared_ptr secondary_cache = - std::make_shared(2048); + std::make_shared(2048, true); std::shared_ptr cache = NewCache(1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); @@ -1320,7 +1334,7 @@ TEST_P(BasicSecondaryCacheTest, BasicFailTest) { TEST_P(BasicSecondaryCacheTest, SaveFailTest) { std::shared_ptr secondary_cache = - std::make_shared(2048); + std::make_shared(2048, true); std::shared_ptr cache = NewCache(1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); @@ -1361,7 +1375,7 @@ TEST_P(BasicSecondaryCacheTest, SaveFailTest) { TEST_P(BasicSecondaryCacheTest, CreateFailTest) { std::shared_ptr secondary_cache = - std::make_shared(2048); + std::make_shared(2048, true); std::shared_ptr cache = NewCache(1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); @@ -1402,7 +1416,7 @@ TEST_P(BasicSecondaryCacheTest, CreateFailTest) { TEST_P(BasicSecondaryCacheTest, FullCapacityTest) { for (bool strict_capacity_limit : {false, true}) { std::shared_ptr secondary_cache = - std::make_shared(2048); + std::make_shared(2048, true); std::shared_ptr cache = NewCache(1024 /* capacity */, 0 /* num_shard_bits */, strict_capacity_limit, secondary_cache); @@ -2021,8 +2035,9 @@ class CacheWithStats : public CacheWrapper { Status Insert(const Slice& key, Cache::ObjectPtr value, const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { + Handle** handle = nullptr, Priority priority = Priority::LOW, + const Slice& /*compressed*/ = Slice(), + CompressionType /*type*/ = kNoCompression) override { insert_count_++; return target_->Insert(key, value, helper, charge, handle, priority); } @@ -2115,7 +2130,7 @@ TEST_P(DBSecondaryCacheTest, LRUCacheDumpLoadBasic) { // we have a new cache it is empty, then, before we do the Get, we do the // dumpload std::shared_ptr secondary_cache = - std::make_shared(2048 * 1024); + std::make_shared(2048 * 1024, true); // This time with secondary cache base_cache = NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); @@ -2271,7 +2286,7 @@ TEST_P(DBSecondaryCacheTest, LRUCacheDumpLoadWithFilter) { // we have a new cache it is empty, then, before we do the Get, we do the // dumpload std::shared_ptr secondary_cache = - std::make_shared(2048 * 1024); + std::make_shared(2048 * 1024, true); // This time with secondary_cache base_cache = NewCache(1024 * 1024 /* capacity */, 0 /* num_shard_bits */, false /* strict_capacity_limit */, secondary_cache); diff --git a/cache/secondary_cache.cc b/cache/secondary_cache.cc index 5fecc0a6e..4439869f1 100644 --- a/cache/secondary_cache.cc +++ b/cache/secondary_cache.cc @@ -9,37 +9,4 @@ namespace ROCKSDB_NAMESPACE { -namespace { - -void NoopDelete(Cache::ObjectPtr, MemoryAllocator*) {} - -size_t SliceSize(Cache::ObjectPtr obj) { - return static_cast(obj)->size(); -} - -Status SliceSaveTo(Cache::ObjectPtr from_obj, size_t from_offset, size_t length, - char* out) { - const Slice& slice = *static_cast(from_obj); - std::memcpy(out, slice.data() + from_offset, length); - return Status::OK(); -} - -Status FailCreate(const Slice&, Cache::CreateContext*, MemoryAllocator*, - Cache::ObjectPtr*, size_t*) { - return Status::NotSupported("Only for dumping data into SecondaryCache"); -} - -} // namespace - -Status SecondaryCache::InsertSaved(const Slice& key, const Slice& saved) { - static Cache::CacheItemHelper helper_no_secondary{CacheEntryRole::kMisc, - &NoopDelete}; - static Cache::CacheItemHelper helper{ - CacheEntryRole::kMisc, &NoopDelete, &SliceSize, - &SliceSaveTo, &FailCreate, &helper_no_secondary}; - // NOTE: depends on Insert() being synchronous, not keeping pointer `&saved` - return Insert(key, const_cast(&saved), &helper, - /*force_insert=*/true); -} - } // namespace ROCKSDB_NAMESPACE diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index 7f5968bb7..dba68e8d2 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -5,6 +5,7 @@ #include "cache/secondary_cache_adapter.h" +#include "cache/tiered_secondary_cache.h" #include "monitoring/perf_context_imp.h" #include "util/cast_util.h" @@ -111,7 +112,7 @@ CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() { size_t sec_capacity = 0; Status s = secondary_cache_->GetCapacity(sec_capacity); assert(s.ok()); - assert(pri_cache_res_->GetTotalReservedCacheSize() == sec_capacity); + assert(pri_cache_res_->GetTotalMemoryUsed() == sec_capacity); } #endif // NDEBUG } @@ -119,7 +120,8 @@ CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() { bool CacheWithSecondaryAdapter::EvictionHandler(const Slice& key, Handle* handle, bool was_hit) { auto helper = GetCacheItemHelper(handle); - if (helper->IsSecondaryCacheCompatible()) { + if (helper->IsSecondaryCacheCompatible() && + adm_policy_ != TieredAdmissionPolicy::kAdmPolicyThreeQueue) { auto obj = target_->Value(handle); // Ignore dummy entry if (obj != kDummyObj) { @@ -225,7 +227,9 @@ Cache::Handle* CacheWithSecondaryAdapter::Promote( Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value, const CacheItemHelper* helper, size_t charge, Handle** handle, - Priority priority) { + Priority priority, + const Slice& compressed_value, + CompressionType type) { Status s = target_->Insert(key, value, helper, charge, handle, priority); if (s.ok() && value == nullptr && distribute_cache_res_) { size_t sec_charge = static_cast(charge * (sec_cache_res_ratio_)); @@ -234,6 +238,12 @@ Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value, s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/false); assert(s.ok()); } + // Warm up the secondary cache with the compressed block. The secondary + // cache may choose to ignore it based on the admission policy. + if (value != nullptr && !compressed_value.empty()) { + Status status = secondary_cache_->InsertSaved(key, compressed_value, type); + assert(status.ok()); + } return s; } @@ -411,8 +421,7 @@ const char* CacheWithSecondaryAdapter::Name() const { return target_->Name(); } -std::shared_ptr NewTieredVolatileCache( - TieredVolatileCacheOptions& opts) { +std::shared_ptr NewTieredCache(TieredCacheOptions& opts) { if (!opts.cache_opts) { return nullptr; } @@ -440,6 +449,17 @@ std::shared_ptr NewTieredVolatileCache( std::shared_ptr sec_cache; sec_cache = NewCompressedSecondaryCache(opts.comp_cache_opts); + if (opts.nvm_sec_cache) { + if (opts.adm_policy == TieredAdmissionPolicy::kAdmPolicyThreeQueue || + opts.adm_policy == TieredAdmissionPolicy::kAdmPolicyAuto) { + sec_cache = std::make_shared( + sec_cache, opts.nvm_sec_cache, + TieredAdmissionPolicy::kAdmPolicyThreeQueue); + } else { + return nullptr; + } + } + return std::make_shared( cache, sec_cache, opts.adm_policy, /*distribute_cache_res=*/true); } diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index 4ef048de5..c9b846df6 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -20,10 +20,12 @@ class CacheWithSecondaryAdapter : public CacheWrapper { ~CacheWithSecondaryAdapter() override; - Status Insert(const Slice& key, ObjectPtr value, - const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override; + Status Insert( + const Slice& key, ObjectPtr value, const CacheItemHelper* helper, + size_t charge, Handle** handle = nullptr, + Priority priority = Priority::LOW, + const Slice& compressed_value = Slice(), + CompressionType type = CompressionType::kNoCompression) override; Handle* Lookup(const Slice& key, const CacheItemHelper* helper, CreateContext* create_context, diff --git a/cache/sharded_cache.h b/cache/sharded_cache.h index 5c42194d8..39042137f 100644 --- a/cache/sharded_cache.h +++ b/cache/sharded_cache.h @@ -170,9 +170,12 @@ class ShardedCache : public ShardedCacheBase { [s_c_l](CacheShard* cs) { cs->SetStrictCapacityLimit(s_c_l); }); } - Status Insert(const Slice& key, ObjectPtr obj, const CacheItemHelper* helper, - size_t charge, Handle** handle = nullptr, - Priority priority = Priority::LOW) override { + Status Insert( + const Slice& key, ObjectPtr obj, const CacheItemHelper* helper, + size_t charge, Handle** handle = nullptr, + Priority priority = Priority::LOW, + const Slice& /*compressed_value*/ = Slice(), + CompressionType /*type*/ = CompressionType::kNoCompression) override { assert(helper); HashVal hash = CacheShard::ComputeHash(key, hash_seed_); auto h_out = reinterpret_cast(handle); diff --git a/cache/tiered_secondary_cache.cc b/cache/tiered_secondary_cache.cc new file mode 100644 index 000000000..493e69572 --- /dev/null +++ b/cache/tiered_secondary_cache.cc @@ -0,0 +1,119 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "cache/tiered_secondary_cache.h" + +namespace ROCKSDB_NAMESPACE { + +// Creation callback for use in the lookup path. It calls the upper layer +// create_cb to create the object, and optionally calls the compressed +// secondary cache InsertSaved to save the compressed block. If +// advise_erase is set, it means the primary cache wants the block to be +// erased in the secondary cache, so we skip calling InsertSaved. +// +// For the time being, we assume that all blocks in the nvm tier belong to +// the primary block cache (i.e CacheTier::kVolatileTier). That can be changed +// if we implement demotion from the compressed secondary cache to the nvm +// cache in the future. +Status TieredSecondaryCache::MaybeInsertAndCreate( + const Slice& data, CompressionType type, CacheTier source, + Cache::CreateContext* ctx, MemoryAllocator* allocator, + Cache::ObjectPtr* out_obj, size_t* out_charge) { + TieredSecondaryCache::CreateContext* context = + static_cast(ctx); + assert(source == CacheTier::kVolatileTier); + if (!context->advise_erase && type != kNoCompression) { + // Attempt to insert into compressed secondary cache + // TODO: Don't hardcode the source + context->comp_sec_cache->InsertSaved(*context->key, data, type, source) + .PermitUncheckedError(); + } + // Primary cache will accept the object, so call its helper to create + // the object + return context->helper->create_cb(data, type, source, context->inner_ctx, + allocator, out_obj, out_charge); +} + +// The lookup first looks up in the compressed secondary cache. If its a miss, +// then the nvm cache lookup is called. The cache item helper and create +// context are wrapped in order to intercept the creation callback to make +// the decision on promoting to the compressed secondary cache. +std::unique_ptr TieredSecondaryCache::Lookup( + const Slice& key, const Cache::CacheItemHelper* helper, + Cache::CreateContext* create_context, bool wait, bool advise_erase, + bool& kept_in_sec_cache) { + bool dummy = false; + std::unique_ptr result = + target()->Lookup(key, helper, create_context, wait, advise_erase, + /*kept_in_sec_cache=*/dummy); + // We never want the item to spill back into the secondary cache + kept_in_sec_cache = true; + if (result) { + assert(result->IsReady()); + return result; + } + + // If wait is true, then we can be a bit more efficient and avoid a memory + // allocation for the CReateContext. + const Cache::CacheItemHelper* outer_helper = + TieredSecondaryCache::GetHelper(); + if (wait) { + TieredSecondaryCache::CreateContext ctx; + ctx.key = &key; + ctx.advise_erase = advise_erase; + ctx.helper = helper; + ctx.inner_ctx = create_context; + ctx.comp_sec_cache = target(); + + return nvm_sec_cache_->Lookup(key, outer_helper, &ctx, wait, advise_erase, + kept_in_sec_cache); + } + + // If wait is false, i.e its an async lookup, we have to allocate a result + // handle for tracking purposes. Embed the CreateContext inside the handle + // so we need only allocate memory once instead of twice. + std::unique_ptr handle(new ResultHandle()); + handle->ctx()->key = &key; + handle->ctx()->advise_erase = advise_erase; + handle->ctx()->helper = helper; + handle->ctx()->inner_ctx = create_context; + handle->ctx()->comp_sec_cache = target(); + handle->SetInnerHandle(nvm_sec_cache_->Lookup( + key, outer_helper, handle->ctx(), wait, advise_erase, kept_in_sec_cache)); + if (!handle->inner_handle()) { + handle.reset(); + } else { + result.reset(handle.release()); + } + + return result; +} + +// Call the nvm cache WaitAll to complete the lookups +void TieredSecondaryCache::WaitAll( + std::vector handles) { + std::vector nvm_handles; + std::vector my_handles; + nvm_handles.reserve(handles.size()); + for (auto handle : handles) { + // The handle could belong to the compressed secondary cache. Skip if + // that's the case. + if (handle->IsReady()) { + continue; + } + ResultHandle* hdl = static_cast(handle); + nvm_handles.push_back(hdl->inner_handle()); + my_handles.push_back(hdl); + } + nvm_sec_cache_->WaitAll(nvm_handles); + for (auto handle : my_handles) { + assert(handle->IsReady()); + auto nvm_handle = handle->inner_handle(); + handle->SetSize(nvm_handle->Size()); + handle->SetValue(nvm_handle->Value()); + } +} + +} // namespace ROCKSDB_NAMESPACE diff --git a/cache/tiered_secondary_cache.h b/cache/tiered_secondary_cache.h new file mode 100644 index 000000000..6e0536436 --- /dev/null +++ b/cache/tiered_secondary_cache.h @@ -0,0 +1,155 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/cache.h" +#include "rocksdb/secondary_cache.h" + +namespace ROCKSDB_NAMESPACE { + +// A SecondaryCache that implements stacking of a compressed secondary cache +// and a non-volatile (local flash) cache. It implements an admission +// policy of warming the bottommost tier (local flash) with compressed +// blocks from the SST on misses, and on hits in the bottommost tier, +// promoting to the compressed and/or primary block cache. The admission +// policies of the primary block cache and compressed secondary cache remain +// unchanged - promote on second access. There is no demotion ofablocks +// evicted from a tier. They are just discarded. +// +// In order to properly handle compressed blocks directly read from SSTs, and +// to allow writeback of blocks compressed by the compressed secondary +// cache in the future, we make use of the compression type and source +// cache tier arguments in InsertSaved. +class TieredSecondaryCache : public SecondaryCacheWrapper { + public: + TieredSecondaryCache(std::shared_ptr comp_sec_cache, + std::shared_ptr nvm_sec_cache, + TieredAdmissionPolicy adm_policy) + : SecondaryCacheWrapper(comp_sec_cache), nvm_sec_cache_(nvm_sec_cache) { +#ifndef NDEBUG + assert(adm_policy == TieredAdmissionPolicy::kAdmPolicyThreeQueue); +#else + (void)adm_policy; +#endif + } + + ~TieredSecondaryCache() override {} + + const char* Name() const override { return "TieredSecondaryCache"; } + + // This is a no-op as we currently don't allow demotion (i.e + // insertion by the upper layer) of evicted blocks. + virtual Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*obj*/, + const Cache::CacheItemHelper* /*helper*/, + bool /*force_insert*/) override { + return Status::OK(); + } + + // Warm up the nvm tier directly + virtual Status InsertSaved( + const Slice& key, const Slice& saved, + CompressionType type = CompressionType::kNoCompression, + CacheTier source = CacheTier::kVolatileTier) override { + return nvm_sec_cache_->InsertSaved(key, saved, type, source); + } + + virtual std::unique_ptr Lookup( + const Slice& key, const Cache::CacheItemHelper* helper, + Cache::CreateContext* create_context, bool wait, bool advise_erase, + bool& kept_in_sec_cache) override; + + virtual void WaitAll( + std::vector handles) override; + + private: + struct CreateContext : public Cache::CreateContext { + const Slice* key; + bool advise_erase; + const Cache::CacheItemHelper* helper; + Cache::CreateContext* inner_ctx; + std::shared_ptr inner_handle; + SecondaryCache* comp_sec_cache; + }; + + class ResultHandle : public SecondaryCacheResultHandle { + public: + ~ResultHandle() override {} + + bool IsReady() override { + return !inner_handle_ || inner_handle_->IsReady(); + } + + void Wait() override { + inner_handle_->Wait(); + Complete(); + } + + size_t Size() override { return size_; } + + Cache::ObjectPtr Value() override { return value_; } + + void Complete() { + assert(IsReady()); + size_ = inner_handle_->Size(); + value_ = inner_handle_->Value(); + inner_handle_.reset(); + } + + void SetInnerHandle(std::unique_ptr&& handle) { + inner_handle_ = std::move(handle); + } + + void SetSize(size_t size) { size_ = size; } + + void SetValue(Cache::ObjectPtr val) { value_ = val; } + + CreateContext* ctx() { return &ctx_; } + + SecondaryCacheResultHandle* inner_handle() { return inner_handle_.get(); } + + private: + std::unique_ptr inner_handle_; + CreateContext ctx_; + size_t size_; + Cache::ObjectPtr value_; + }; + + static void NoopDelete(Cache::ObjectPtr /*obj*/, + MemoryAllocator* /*allocator*/) { + assert(false); + } + static size_t ZeroSize(Cache::ObjectPtr /*obj*/) { + assert(false); + return 0; + } + static Status NoopSaveTo(Cache::ObjectPtr /*from_obj*/, + size_t /*from_offset*/, size_t /*length*/, + char* /*out_buf*/) { + assert(false); + return Status::OK(); + } + static Status MaybeInsertAndCreate(const Slice& data, CompressionType type, + CacheTier source, + Cache::CreateContext* ctx, + MemoryAllocator* allocator, + Cache::ObjectPtr* out_obj, + size_t* out_charge); + + static const Cache::CacheItemHelper* GetHelper() { + const static Cache::CacheItemHelper basic_helper(CacheEntryRole::kMisc, + &NoopDelete); + const static Cache::CacheItemHelper maybe_insert_and_create_helper{ + CacheEntryRole::kMisc, &NoopDelete, &ZeroSize, + &NoopSaveTo, &MaybeInsertAndCreate, &basic_helper, + }; + return &maybe_insert_and_create_helper; + } + + std::shared_ptr comp_sec_cache_; + std::shared_ptr nvm_sec_cache_; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/cache/tiered_secondary_cache_test.cc b/cache/tiered_secondary_cache_test.cc new file mode 100644 index 000000000..67b0e3955 --- /dev/null +++ b/cache/tiered_secondary_cache_test.cc @@ -0,0 +1,642 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +#include "db/db_test_util.h" +#include "rocksdb/cache.h" +#include "rocksdb/secondary_cache.h" +#include "typed_cache.h" +#include "util/random.h" + +namespace ROCKSDB_NAMESPACE { + +class TestSecondaryCache : public SecondaryCache { + public: + explicit TestSecondaryCache(size_t capacity) + : cache_(NewLRUCache(capacity, 0, false, 0.5 /* high_pri_pool_ratio */, + nullptr, kDefaultToAdaptiveMutex, + kDontChargeCacheMetadata)), + num_insert_saved_(0), + num_hits_(0), + num_misses_(0) {} + + const char* Name() const override { return "TestSecondaryCache"; } + + Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*value*/, + const Cache::CacheItemHelper* /*helper*/, + bool /*force_insert*/) override { + assert(false); + return Status::NotSupported(); + } + + Status InsertSaved(const Slice& key, const Slice& saved, + CompressionType type = kNoCompression, + CacheTier source = CacheTier::kVolatileTier) override { + CheckCacheKeyCommonPrefix(key); + size_t size; + char* buf; + Status s; + + num_insert_saved_++; + size = saved.size(); + buf = new char[size + sizeof(uint64_t) + 2 * sizeof(uint16_t)]; + EncodeFixed64(buf, size); + buf += sizeof(uint64_t); + EncodeFixed16(buf, type); + buf += sizeof(uint16_t); + EncodeFixed16(buf, (uint16_t)source); + buf += sizeof(uint16_t); + memcpy(buf, saved.data(), size); + buf -= sizeof(uint64_t) + 2 * sizeof(uint16_t); + if (!s.ok()) { + delete[] buf; + return s; + } + return cache_.Insert(key, buf, size); + } + + std::unique_ptr Lookup( + const Slice& key, const Cache::CacheItemHelper* helper, + Cache::CreateContext* create_context, bool wait, bool /*advise_erase*/, + bool& kept_in_sec_cache) override { + std::string key_str = key.ToString(); + TEST_SYNC_POINT_CALLBACK("TestSecondaryCache::Lookup", &key_str); + + std::unique_ptr secondary_handle; + kept_in_sec_cache = false; + + TypedHandle* handle = cache_.Lookup(key); + if (handle) { + num_hits_++; + Cache::ObjectPtr value = nullptr; + size_t charge = 0; + Status s; + char* ptr = cache_.Value(handle); + CompressionType type; + CacheTier source; + size_t size = DecodeFixed64(ptr); + ptr += sizeof(uint64_t); + type = static_cast(DecodeFixed16(ptr)); + ptr += sizeof(uint16_t); + source = static_cast(DecodeFixed16(ptr)); + assert(source == CacheTier::kVolatileTier); + ptr += sizeof(uint16_t); + s = helper->create_cb(Slice(ptr, size), type, source, create_context, + /*alloc*/ nullptr, &value, &charge); + if (s.ok()) { + secondary_handle.reset(new TestSecondaryCacheResultHandle( + cache_.get(), handle, value, charge, /*ready=*/wait)); + kept_in_sec_cache = true; + } else { + cache_.Release(handle); + } + } else { + num_misses_++; + } + return secondary_handle; + } + + bool SupportForceErase() const override { return false; } + + void Erase(const Slice& /*key*/) override {} + + void WaitAll(std::vector handles) override { + for (SecondaryCacheResultHandle* handle : handles) { + TestSecondaryCacheResultHandle* sec_handle = + static_cast(handle); + EXPECT_FALSE(sec_handle->IsReady()); + sec_handle->SetReady(); + } + } + + std::string GetPrintableOptions() const override { return ""; } + + uint32_t num_insert_saved() { return num_insert_saved_; } + + uint32_t num_hits() { return num_hits_; } + + uint32_t num_misses() { return num_misses_; } + + void CheckCacheKeyCommonPrefix(const Slice& key) { + Slice current_prefix(key.data(), OffsetableCacheKey::kCommonPrefixSize); + if (ckey_prefix_.empty()) { + ckey_prefix_ = current_prefix.ToString(); + } else { + EXPECT_EQ(ckey_prefix_, current_prefix.ToString()); + } + } + + private: + class TestSecondaryCacheResultHandle : public SecondaryCacheResultHandle { + public: + TestSecondaryCacheResultHandle(Cache* cache, Cache::Handle* handle, + Cache::ObjectPtr value, size_t size, + bool ready) + : cache_(cache), + handle_(handle), + value_(value), + size_(size), + is_ready_(ready) {} + + ~TestSecondaryCacheResultHandle() override { cache_->Release(handle_); } + + bool IsReady() override { return is_ready_; } + + void Wait() override {} + + Cache::ObjectPtr Value() override { + assert(is_ready_); + return value_; + } + + size_t Size() override { return Value() ? size_ : 0; } + + void SetReady() { is_ready_ = true; } + + private: + Cache* cache_; + Cache::Handle* handle_; + Cache::ObjectPtr value_; + size_t size_; + bool is_ready_; + }; + + using SharedCache = + BasicTypedSharedCacheInterface; + using TypedHandle = SharedCache::TypedHandle; + SharedCache cache_; + uint32_t num_insert_saved_; + uint32_t num_hits_; + uint32_t num_misses_; + std::string ckey_prefix_; +}; + +class DBTieredSecondaryCacheTest : public DBTestBase { + public: + DBTieredSecondaryCacheTest() + : DBTestBase("db_tiered_secondary_cache_test", /*env_do_fsync=*/true) {} + + std::shared_ptr NewCache(size_t pri_capacity, + size_t compressed_capacity, + size_t nvm_capacity) { + LRUCacheOptions lru_opts; + TieredCacheOptions opts; + lru_opts.capacity = pri_capacity; + lru_opts.num_shard_bits = 0; + lru_opts.high_pri_pool_ratio = 0; + opts.cache_opts = &lru_opts; + opts.cache_type = PrimaryCacheType::kCacheTypeLRU; + opts.adm_policy = TieredAdmissionPolicy::kAdmPolicyThreeQueue; + opts.comp_cache_opts.capacity = compressed_capacity; + opts.comp_cache_opts.num_shard_bits = 0; + nvm_sec_cache_.reset(new TestSecondaryCache(nvm_capacity)); + opts.nvm_sec_cache = nvm_sec_cache_; + cache_ = NewTieredCache(opts); + assert(cache_ != nullptr); + +#if 0 + CacheWithSecondaryAdapter* adapter_cache_ = + static_cast(cache_.get()); + TieredSecondaryCache* tiered_cache_ = + static_cast( + adapter_cache_->TEST_GetSecondaryCache()); +#endif + + return cache_; + } + + TestSecondaryCache* nvm_sec_cache() { return nvm_sec_cache_.get(); } + + private: + std::shared_ptr cache_; + std::shared_ptr nvm_sec_cache_; +}; + +// In this test, the block size is set to 4096. Each value is 1007 bytes, so +// each data block contains exactly 4 KV pairs. Metadata blocks are not +// cached, so we can accurately estimate the cache usage. +TEST_F(DBTieredSecondaryCacheTest, BasicTest) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); + return; + } + + BlockBasedTableOptions table_options; + // We want a block cache of size 5KB, and a compressed secondary cache of + // size 5KB. However, we specify a block cache size of 256KB here in order + // to take into account the cache reservation in the block cache on + // behalf of the compressed cache. The unit of cache reservation is 256KB. + // The effective block cache capacity will be calculated as 256 + 5 = 261KB, + // and 256KB will be reserved for the compressed cache, leaving 5KB for + // the primary block cache. We only have to worry about this here because + // the cache size is so small. + table_options.block_cache = NewCache(256 * 1024, 5 * 1024, 256 * 1024); + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + // Disable paranoid_file_checks so that flush will not read back the newly + // written file + options.paranoid_file_checks = false; + DestroyAndReopen(options); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v; + test::CompressibleString(&rnd, 0.5, 1007, &p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + // The first 2 Gets, for keys 0 and 5, will load the corresponding data + // blocks as they will be cache misses. The nvm secondary cache will be + // warmed up with the compressed blocks + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 1u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 1u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + + // At this point, the nvm cache is warmed up with the data blocks for 0 + // and 5. The next Get will lookup the block in nvm and will be a hit. + // It will be created as a standalone entry in memory, and a placeholder + // will be inserted in the primary and compressed caches. + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 1u); + + // For this Get, the primary and compressed only have placeholders for + // the required data block. So we will lookup the nvm cache and find the + // block there. This time, the block will be promoted to the primary + // block cache. No promotion to the compressed secondary cache happens, + // and it will retain the placeholder. + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 2u); + + // This Get will find the data block in the primary cache. + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 2u); + + // We repeat the sequence for key 5. This will end up evicting the block + // for 0 from the in-memory cache. + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 3u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 4u); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 4u); + + // This Get for key 0 will find the data block in nvm. Since the compressed + // cache still has the placeholder, the block (compressed) will be + // admitted. It is theh inserted into the primary as a standalone entry. + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 5u); + + // This Get for key 0 will find the data block in the compressed secondary + // cache. + v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 2u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 5u); + + Destroy(options); +} + +// This test is very similar to BasicTest, except it calls MultiGet rather +// than Get, in order to exercise the async lookup and WaitAll path. +TEST_F(DBTieredSecondaryCacheTest, BasicMultiGetTest) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); + return; + } + + BlockBasedTableOptions table_options; + table_options.block_cache = NewCache(260 * 1024, 10 * 1024, 256 * 1024); + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + options.paranoid_file_checks = false; + DestroyAndReopen(options); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v; + test::CompressibleString(&rnd, 0.5, 1007, &p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + std::vector keys; + std::vector values; + + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 3u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 3u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + + keys.clear(); + values.clear(); + keys.push_back(Key(12)); + keys.push_back(Key(16)); + keys.push_back(Key(20)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 3u); + + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 6u); + + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 6u); + + keys.clear(); + values.clear(); + keys.push_back(Key(12)); + keys.push_back(Key(16)); + keys.push_back(Key(20)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 9u); + + keys.clear(); + values.clear(); + keys.push_back(Key(12)); + keys.push_back(Key(16)); + keys.push_back(Key(20)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 12u); + + keys.clear(); + values.clear(); + keys.push_back(Key(12)); + keys.push_back(Key(16)); + keys.push_back(Key(20)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 12u); + + Destroy(options); +} + +TEST_F(DBTieredSecondaryCacheTest, WaitAllTest) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); + return; + } + + BlockBasedTableOptions table_options; + table_options.block_cache = NewCache(250 * 1024, 20 * 1024, 256 * 1024); + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + options.paranoid_file_checks = false; + DestroyAndReopen(options); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v; + test::CompressibleString(&rnd, 0.5, 1007, &p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + std::vector keys; + std::vector values; + + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 3u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 3u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + + keys.clear(); + values.clear(); + keys.push_back(Key(12)); + keys.push_back(Key(16)); + keys.push_back(Key(20)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + + // Insert placeholders for 4 in primary and compressed + std::string val = Get(Key(4)); + + // Force placeholder 4 out of primary + keys.clear(); + values.clear(); + keys.push_back(Key(24)); + keys.push_back(Key(28)); + keys.push_back(Key(32)); + keys.push_back(Key(36)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 10u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 10u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 1u); + + // Now read 4 again. This will create a placeholder in primary, and insert + // in compressed secondary since it already has a placeholder + val = Get(Key(4)); + + // Now read 0, 4 and 8. While 4 is already in the compressed secondary + // cache, 0 and 8 will be read asynchronously from the nvm tier. The + // WaitAll will be called for all 3 blocks. + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 10u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 10u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 4u); + + Destroy(options); +} + +// This test is for iteration. It iterates through a set of keys in two +// passes. First pass loads the compressed blocks into the nvm tier, and +// the second pass should hit all of those blocks. +TEST_F(DBTieredSecondaryCacheTest, IterateTest) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); + return; + } + + BlockBasedTableOptions table_options; + table_options.block_cache = NewCache(250 * 1024, 10 * 1024, 256 * 1024); + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + options.paranoid_file_checks = false; + DestroyAndReopen(options); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v; + test::CompressibleString(&rnd, 0.5, 1007, &p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + ReadOptions ro; + ro.readahead_size = 256 * 1024; + auto iter = dbfull()->NewIterator(ro); + iter->SeekToFirst(); + for (int i = 0; i < 31; ++i) { + ASSERT_EQ(Key(i), iter->key().ToString()); + ASSERT_EQ(1007, iter->value().size()); + iter->Next(); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + delete iter; + + iter = dbfull()->NewIterator(ro); + iter->SeekToFirst(); + for (int i = 0; i < 31; ++i) { + ASSERT_EQ(Key(i), iter->key().ToString()); + ASSERT_EQ(1007, iter->value().size()); + iter->Next(); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 8u); + delete iter; + + Destroy(options); +} + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/cache/typed_cache.h b/cache/typed_cache.h index e42aa4c26..125bfa0f5 100644 --- a/cache/typed_cache.h +++ b/cache/typed_cache.h @@ -234,15 +234,19 @@ class FullTypedCacheHelperFns : public BasicTypedCacheHelperFns { return Status::OK(); } - static Status Create(const Slice& data, CreateContext* context, + static Status Create(const Slice& data, CompressionType type, + CacheTier source, CreateContext* context, MemoryAllocator* allocator, ObjectPtr* out_obj, size_t* out_charge) { std::unique_ptr value = nullptr; + if (source != CacheTier::kVolatileTier) { + return Status::InvalidArgument(); + } if constexpr (sizeof(TCreateContext) > 0) { TCreateContext* tcontext = static_cast(context); - tcontext->Create(&value, out_charge, data, allocator); + tcontext->Create(&value, out_charge, data, type, allocator); } else { - TCreateContext::Create(&value, out_charge, data, allocator); + TCreateContext::Create(&value, out_charge, data, type, allocator); } *out_obj = UpCastValue(value.release()); return Status::OK(); @@ -301,13 +305,15 @@ class FullTypedCacheInterface inline Status InsertFull( const Slice& key, TValuePtr value, size_t charge, TypedHandle** handle = nullptr, Priority priority = Priority::LOW, - CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier) { + CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier, + const Slice& compressed = Slice(), + CompressionType type = CompressionType::kNoCompression) { auto untyped_handle = reinterpret_cast(handle); - auto helper = lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier + auto helper = lowest_used_cache_tier > CacheTier::kVolatileTier ? GetFullHelper() : GetBasicHelper(); return this->cache_->Insert(key, UpCastValue(value), helper, charge, - untyped_handle, priority); + untyped_handle, priority, compressed, type); } // Like SecondaryCache::InsertSaved, with SecondaryCache compatibility @@ -319,9 +325,9 @@ class FullTypedCacheInterface size_t* out_charge = nullptr) { ObjectPtr value; size_t charge; - Status st = GetFullHelper()->create_cb(data, create_context, - this->cache_->memory_allocator(), - &value, &charge); + Status st = GetFullHelper()->create_cb( + data, kNoCompression, CacheTier::kVolatileTier, create_context, + this->cache_->memory_allocator(), &value, &charge); if (out_charge) { *out_charge = charge; } @@ -340,7 +346,7 @@ class FullTypedCacheInterface const Slice& key, TCreateContext* create_context = nullptr, Priority priority = Priority::LOW, Statistics* stats = nullptr, CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier) { - if (lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) { + if (lowest_used_cache_tier > CacheTier::kVolatileTier) { return reinterpret_cast(this->cache_->Lookup( key, GetFullHelper(), create_context, priority, stats)); } else { @@ -352,7 +358,7 @@ class FullTypedCacheInterface inline void StartAsyncLookupFull( TypedAsyncLookupHandle& async_handle, CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier) { - if (lowest_used_cache_tier == CacheTier::kNonVolatileBlockTier) { + if (lowest_used_cache_tier > CacheTier::kVolatileTier) { async_handle.helper = GetFullHelper(); this->cache_->StartAsyncLookup(async_handle); } else { diff --git a/db/blob/blob_contents.h b/db/blob/blob_contents.h index 15a672a0a..40b94d51f 100644 --- a/db/blob/blob_contents.h +++ b/db/blob/blob_contents.h @@ -46,7 +46,8 @@ class BlobContents { class BlobContentsCreator : public Cache::CreateContext { public: static void Create(std::unique_ptr* out, size_t* out_charge, - const Slice& contents, MemoryAllocator* alloc) { + const Slice& contents, CompressionType /*type*/, + MemoryAllocator* alloc) { auto raw = new BlobContents(AllocateAndCopyBlock(contents, alloc), contents.size()); out->reset(raw); diff --git a/db/blob/blob_file_reader.cc b/db/blob/blob_file_reader.cc index bdab3ae68..0c30efbc1 100644 --- a/db/blob/blob_file_reader.cc +++ b/db/blob/blob_file_reader.cc @@ -585,7 +585,8 @@ Status BlobFileReader::UncompressBlobIfNeeded( assert(result); if (compression_type == kNoCompression) { - BlobContentsCreator::Create(result, nullptr, value_slice, allocator); + BlobContentsCreator::Create(result, nullptr, value_slice, kNoCompression, + allocator); return Status::OK(); } diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 1cb78e62b..3d5794149 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -3654,10 +3654,12 @@ class DBBasicTestMultiGet : public DBTestBase { Status Insert(const Slice& key, Cache::ObjectPtr value, const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { + Handle** handle = nullptr, Priority priority = Priority::LOW, + const Slice& compressed = Slice(), + CompressionType type = kNoCompression) override { num_inserts_++; - return target_->Insert(key, value, helper, charge, handle, priority); + return target_->Insert(key, value, helper, charge, handle, priority, + compressed, type); } Handle* Lookup(const Slice& key, const CacheItemHelper* helper, diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 512a48149..3335626b6 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -294,7 +294,9 @@ class ReadOnlyCacheWrapper : public CacheWrapper { Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*value*/, const CacheItemHelper* /*helper*/, size_t /*charge*/, - Handle** /*handle*/, Priority /*priority*/) override { + Handle** /*handle*/, Priority /*priority*/, + const Slice& /*compressed*/, + CompressionType /*type*/) override { return Status::NotSupported(); } }; @@ -628,13 +630,15 @@ class MockCache : public LRUCache { Status Insert(const Slice& key, Cache::ObjectPtr value, const Cache::CacheItemHelper* helper, size_t charge, - Handle** handle, Priority priority) override { + Handle** handle, Priority priority, const Slice& compressed, + CompressionType type) override { if (priority == Priority::LOW) { low_pri_insert_count++; } else { high_pri_insert_count++; } - return LRUCache::Insert(key, value, helper, charge, handle, priority); + return LRUCache::Insert(key, value, helper, charge, handle, priority, + compressed, type); } }; diff --git a/db/db_test.cc b/db/db_test.cc index 714f42608..c59951d78 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -7031,8 +7031,9 @@ TEST_F(DBTest, RowCache) { using CacheWrapper::CacheWrapper; const char* Name() const override { return "FailInsertionCache"; } Status Insert(const Slice&, Cache::ObjectPtr, const CacheItemHelper*, - size_t, Handle** = nullptr, - Priority = Priority::LOW) override { + size_t, Handle** = nullptr, Priority = Priority::LOW, + const Slice& /*compressed*/ = Slice(), + CompressionType /*type*/ = kNoCompression) override { return Status::MemoryLimit(); } }; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 01d934bd1..189002270 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1716,12 +1716,12 @@ TargetCacheChargeTrackingCache::TargetCacheChargeTrackingCache( cache_charge_increments_sum_(0) {} template -Status TargetCacheChargeTrackingCache::Insert(const Slice& key, - ObjectPtr value, - const CacheItemHelper* helper, - size_t charge, Handle** handle, - Priority priority) { - Status s = target_->Insert(key, value, helper, charge, handle, priority); +Status TargetCacheChargeTrackingCache::Insert( + const Slice& key, ObjectPtr value, const CacheItemHelper* helper, + size_t charge, Handle** handle, Priority priority, const Slice& compressed, + CompressionType type) { + Status s = target_->Insert(key, value, helper, charge, handle, priority, + compressed, type); if (helper == kCrmHelper) { if (last_peak_tracked_) { cache_charge_peak_ = 0; diff --git a/db/db_test_util.h b/db/db_test_util.h index 52e856cb3..1e1225812 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -936,8 +936,9 @@ class TargetCacheChargeTrackingCache : public CacheWrapper { Status Insert(const Slice& key, ObjectPtr value, const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override; + Handle** handle = nullptr, Priority priority = Priority::LOW, + const Slice& compressed = Slice(), + CompressionType type = kNoCompression) override; using Cache::Release; bool Release(Handle* handle, bool erase_if_last_ref = false) override; diff --git a/include/rocksdb/advanced_cache.h b/include/rocksdb/advanced_cache.h index ec4a5b296..a5a19d3a0 100644 --- a/include/rocksdb/advanced_cache.h +++ b/include/rocksdb/advanced_cache.h @@ -13,7 +13,9 @@ #include #include "rocksdb/cache.h" +#include "rocksdb/compression_type.h" #include "rocksdb/memory_allocator.h" +#include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -109,13 +111,18 @@ class Cache { // pointer into static data). using DeleterFn = void (*)(ObjectPtr obj, MemoryAllocator* allocator); - // The CreateCallback is takes in a buffer from the NVM cache and constructs - // an object using it. The callback doesn't have ownership of the buffer and + // The CreateCallback is takes in a buffer from the secondary cache and + // constructs an object using it. The buffer could be compressed or + // uncompressed, as indicated by the type argument. If compressed, + // the callback is responsible for uncompressing it using information + // from the context, such as compression dictionary. + // The callback doesn't have ownership of the buffer and // should copy the contents into its own buffer. The CreateContext* is // provided by Lookup and may be used to follow DB- or CF-specific settings. // In case of some error, non-OK is returned and the caller should ignore // any result in out_obj. (The implementation must clean up after itself.) - using CreateCallback = Status (*)(const Slice& data, CreateContext* context, + using CreateCallback = Status (*)(const Slice& data, CompressionType type, + CacheTier source, CreateContext* context, MemoryAllocator* allocator, ObjectPtr* out_obj, size_t* out_charge); @@ -242,12 +249,19 @@ class Cache { // the item is only inserted into the primary cache. It may // defer the insertion to the secondary cache as it sees fit. // + // Along with the object pointer, the caller may pass a Slice pointing to + // the compressed serialized data of the object. If compressed is + // non-empty, then the caller must pass the type indicating the compression + // algorithm used. The cache may, optionally, also insert the compressed + // block into one or more cache tiers. + // // When the inserted entry is no longer needed, it will be destroyed using // helper->del_cb (if non-nullptr). - virtual Status Insert(const Slice& key, ObjectPtr obj, - const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) = 0; + virtual Status Insert( + const Slice& key, ObjectPtr obj, const CacheItemHelper* helper, + size_t charge, Handle** handle = nullptr, + Priority priority = Priority::LOW, const Slice& compressed = Slice(), + CompressionType type = CompressionType::kNoCompression) = 0; // Similar to Insert, but used for creating cache entries that cannot // be found with Lookup, such as for memory charging purposes. The @@ -536,11 +550,14 @@ class CacheWrapper : public Cache { // Only function that derived class must provide // const char* Name() const override { ... } - Status Insert(const Slice& key, ObjectPtr value, - const CacheItemHelper* helper, size_t charge, - Handle** handle = nullptr, - Priority priority = Priority::LOW) override { - return target_->Insert(key, value, helper, charge, handle, priority); + Status Insert( + const Slice& key, ObjectPtr value, const CacheItemHelper* helper, + size_t charge, Handle** handle = nullptr, + Priority priority = Priority::LOW, + const Slice& compressed_value = Slice(), + CompressionType type = CompressionType::kNoCompression) override { + return target_->Insert(key, value, helper, charge, handle, priority, + compressed_value, type); } Handle* CreateStandalone(const Slice& key, ObjectPtr obj, diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 4f481a8ee..794d70be5 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -275,7 +275,8 @@ struct CompactionOptionsFIFO { // In the future, we may add more caching layers. enum class CacheTier : uint8_t { kVolatileTier = 0, - kNonVolatileBlockTier = 0x01, + kVolatileCompressedTier = 0x01, + kNonVolatileBlockTier = 0x02, }; enum UpdateStatus { // Return status For inplace update callback diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index a85595e4f..061ebbb36 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -481,22 +481,33 @@ enum TieredAdmissionPolicy { // Same as kAdmPolicyPlaceholder, but also if an entry in the primary cache // was a hit, then force insert it into the compressed secondary cache kAdmPolicyAllowCacheHits, + // An admission policy for three cache tiers - primary uncompressed, + // compressed secondary, and a compressed local flash (non-volatile) cache. + // Each tier is managed as an independent queue. + kAdmPolicyThreeQueue, kAdmPolicyMax, }; +// EXPERIMENTAL +// The following feature is experimental, and the API is subject to change +// // A 2-tier cache with a primary block cache, and a compressed secondary // cache. The returned cache instance will internally allocate a primary // uncompressed cache of the specified type, and a compressed secondary // cache. Any cache memory reservations, such as WriteBufferManager // allocations costed to the block cache, will be distributed // proportionally across both the primary and secondary. -struct TieredVolatileCacheOptions { +struct TieredCacheOptions { ShardedCacheOptions* cache_opts; PrimaryCacheType cache_type; TieredAdmissionPolicy adm_policy; CompressedSecondaryCacheOptions comp_cache_opts; + // An optional secondary cache that will serve as the persistent cache + // tier. If present, compressed blocks will be written to this + // secondary cache. + std::shared_ptr nvm_sec_cache; }; -extern std::shared_ptr NewTieredVolatileCache( - TieredVolatileCacheOptions& cache_opts); +// EXPERIMENTAL +extern std::shared_ptr NewTieredCache(TieredCacheOptions& cache_opts); } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/secondary_cache.h b/include/rocksdb/secondary_cache.h index 292c0ffe0..49792ca67 100644 --- a/include/rocksdb/secondary_cache.h +++ b/include/rocksdb/secondary_cache.h @@ -11,6 +11,7 @@ #include "rocksdb/advanced_cache.h" #include "rocksdb/customizable.h" +#include "rocksdb/options.h" #include "rocksdb/slice.h" #include "rocksdb/statistics.h" #include "rocksdb/status.h" @@ -83,15 +84,19 @@ class SecondaryCache : public Customizable { bool force_insert) = 0; // Insert a value from its saved/persistable data (typically uncompressed - // block), as if generated by SaveToCallback/SizeCallback. This can be used - // in "warming up" the cache from some auxiliary source, and like Insert() - // may or may not write it to cache depending on the admission control - // policy, even if the return status is success. + // block), as if generated by SaveToCallback/SizeCallback. The data can be + // compressed, in which case the type argument should specify the + // compression algorithm used. Additionally, the source argument should + // be set to the appropriate tier that will be responsible for + // uncompressing the data. // - // The default implementation only assumes the entry helper's create_cb is - // called at Lookup() time and not Insert() time, so should work for all - // foreseeable implementations. - virtual Status InsertSaved(const Slice& key, const Slice& saved); + // This method can be used in "warming up" the cache from some auxiliary + // source, and like Insert() may or may not write it to cache depending on + // the admission control policy, even if the return status is success. + virtual Status InsertSaved( + const Slice& key, const Slice& saved, + CompressionType type = CompressionType::kNoCompression, + CacheTier source = CacheTier::kVolatileTier) = 0; // Lookup the data for the given key in this cache. The create_cb // will be used to create the object. The handle returned may not be @@ -148,4 +153,70 @@ class SecondaryCache : public Customizable { virtual Status Inflate(size_t /*increase*/) { return Status::NotSupported(); } }; +// A wrapper around a SecondaryCache object. A derived class may selectively +// override methods to implement a different behavior. +class SecondaryCacheWrapper : public SecondaryCache { + public: + explicit SecondaryCacheWrapper(std::shared_ptr target) + : target_(std::move(target)) {} + + virtual Status Insert(const Slice& key, Cache::ObjectPtr obj, + const Cache::CacheItemHelper* helper, + bool force_insert) override { + return target()->Insert(key, obj, helper, force_insert); + } + + virtual Status InsertSaved( + const Slice& key, const Slice& saved, + CompressionType type = CompressionType::kNoCompression, + CacheTier source = CacheTier::kVolatileTier) override { + return target()->InsertSaved(key, saved, type, source); + } + + virtual std::unique_ptr Lookup( + const Slice& key, const Cache::CacheItemHelper* helper, + Cache::CreateContext* create_context, bool wait, bool advise_erase, + bool& kept_in_sec_cache) override { + return target()->Lookup(key, helper, create_context, wait, advise_erase, + kept_in_sec_cache); + } + + virtual bool SupportForceErase() const override { + return target()->SupportForceErase(); + } + + virtual void Erase(const Slice& key) override { target()->Erase(key); } + + virtual void WaitAll( + std::vector handles) override { + target()->WaitAll(handles); + } + + virtual Status SetCapacity(size_t capacity) override { + return target()->SetCapacity(capacity); + } + + virtual Status GetCapacity(size_t& capacity) override { + return target()->GetCapacity(capacity); + } + + virtual Status Deflate(size_t decrease) override { + return target()->Deflate(decrease); + } + + virtual Status Inflate(size_t increase) override { + return target()->Inflate(increase); + } + + protected: + SecondaryCache* target() const { return target_.get(); } + + private: + std::shared_ptr target_; +}; + +// Useful for cache entries that just need to be copied into a +// secondary cache, such as compressed blocks +extern const Cache::CacheItemHelper kSliceCacheItemHelper; + } // namespace ROCKSDB_NAMESPACE diff --git a/options/customizable_test.cc b/options/customizable_test.cc index 125a5aabe..0e614ed16 100644 --- a/options/customizable_test.cc +++ b/options/customizable_test.cc @@ -1234,6 +1234,10 @@ class TestSecondaryCache : public SecondaryCache { bool /*force_insert*/) override { return Status::NotSupported(); } + Status InsertSaved(const Slice& /*key*/, const Slice& /*saved*/, + CompressionType /*type*/, CacheTier /*source*/) override { + return Status::OK(); + } std::unique_ptr Lookup( const Slice& /*key*/, const Cache::CacheItemHelper* /*helper*/, Cache::CreateContext* /*create_context*/, bool /*wait*/, diff --git a/src.mk b/src.mk index 2992f5c4a..edc8bdef5 100644 --- a/src.mk +++ b/src.mk @@ -12,6 +12,7 @@ LIB_SOURCES = \ cache/secondary_cache.cc \ cache/secondary_cache_adapter.cc \ cache/sharded_cache.cc \ + cache/tiered_secondary_cache.cc \ db/arena_wrapped_db_iter.cc \ db/blob/blob_contents.cc \ db/blob/blob_fetcher.cc \ @@ -432,8 +433,9 @@ BENCH_MAIN_SOURCES = \ TEST_MAIN_SOURCES = \ cache/cache_test.cc \ cache/cache_reservation_manager_test.cc \ - cache/lru_cache_test.cc \ cache/compressed_secondary_cache_test.cc \ + cache/lru_cache_test.cc \ + cache/tiered_secondary_cache_test.cc \ db/blob/blob_counting_iterator_test.cc \ db/blob/blob_file_addition_test.cc \ db/blob/blob_file_builder_test.cc \ diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 051f9d87b..cc4f17413 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -488,7 +488,7 @@ struct BlockBasedTableBuilder::Rep { flush_block_policy( table_options.flush_block_policy_factory->NewFlushBlockPolicy( table_options, data_block)), - create_context(&table_options, ioptions.stats, + create_context(&table_options, &ioptions, ioptions.stats, compression_type == kZSTD || compression_type == kZSTDNotFinalCompression, tbo.moptions.block_protection_bytes_per_key, diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 528b87bb9..c53aa0fb9 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -683,7 +683,7 @@ Status BlockBasedTable::Open( rep->table_properties->compression_name == CompressionTypeToString(kZSTDNotFinalCompression)); rep->create_context = BlockCreateContext( - &rep->table_options, rep->ioptions.stats, + &rep->table_options, &rep->ioptions, rep->ioptions.stats, blocks_definitely_zstd_compressed, block_protection_bytes_per_key, rep->internal_comparator.user_comparator(), rep->index_value_is_full, rep->index_has_first_key); @@ -1303,8 +1303,8 @@ Cache::Priority BlockBasedTable::GetCachePriority() const { template WithBlocklikeCheck BlockBasedTable::GetDataBlockFromCache( const Slice& cache_key, BlockCacheInterface block_cache, - CachableEntry* out_parsed_block, - GetContext* get_context) const { + CachableEntry* out_parsed_block, GetContext* get_context, + const UncompressionDict* dict) const { assert(out_parsed_block); assert(out_parsed_block->IsEmpty()); @@ -1313,10 +1313,12 @@ WithBlocklikeCheck BlockBasedTable::GetDataBlockFromCache( // Lookup uncompressed cache first if (block_cache) { + BlockCreateContext create_ctx = rep_->create_context; + create_ctx.dict = dict; assert(!cache_key.empty()); auto cache_handle = block_cache.LookupFull( - cache_key, &rep_->create_context, GetCachePriority(), - statistics, rep_->ioptions.lowest_used_cache_tier); + cache_key, &create_ctx, GetCachePriority(), statistics, + rep_->ioptions.lowest_used_cache_tier); // Avoid updating metrics here if the handle is not complete yet. This // happens with MultiGet and secondary cache. So update the metrics only @@ -1343,8 +1345,9 @@ WithBlocklikeCheck BlockBasedTable::GetDataBlockFromCache( template WithBlocklikeCheck BlockBasedTable::PutDataBlockToCache( const Slice& cache_key, BlockCacheInterface block_cache, - CachableEntry* out_parsed_block, BlockContents&& block_contents, - CompressionType block_comp_type, + CachableEntry* out_parsed_block, + BlockContents&& uncompressed_block_contents, + BlockContents&& compressed_block_contents, CompressionType block_comp_type, const UncompressionDict& uncompression_dict, MemoryAllocator* memory_allocator, GetContext* get_context) const { const ImmutableOptions& ioptions = rep_->ioptions; @@ -1356,23 +1359,22 @@ WithBlocklikeCheck BlockBasedTable::PutDataBlockToCache( Statistics* statistics = ioptions.stats; std::unique_ptr block_holder; - if (block_comp_type != kNoCompression) { + if (block_comp_type != kNoCompression && + uncompressed_block_contents.data.empty()) { + assert(compressed_block_contents.data.data()); // Retrieve the uncompressed contents into a new buffer - BlockContents uncompressed_block_contents; UncompressionContext context(block_comp_type); UncompressionInfo info(context, uncompression_dict, block_comp_type); - s = UncompressBlockData(info, block_contents.data.data(), - block_contents.data.size(), + s = UncompressBlockData(info, compressed_block_contents.data.data(), + compressed_block_contents.data.size(), &uncompressed_block_contents, format_version, ioptions, memory_allocator); if (!s.ok()) { return s; } - rep_->create_context.Create(&block_holder, - std::move(uncompressed_block_contents)); - } else { - rep_->create_context.Create(&block_holder, std::move(block_contents)); } + rep_->create_context.Create(&block_holder, + std::move(uncompressed_block_contents)); // insert into uncompressed block cache if (block_cache && block_holder->own_bytes()) { @@ -1380,7 +1382,8 @@ WithBlocklikeCheck BlockBasedTable::PutDataBlockToCache( BlockCacheTypedHandle* cache_handle = nullptr; s = block_cache.InsertFull(cache_key, block_holder.get(), charge, &cache_handle, GetCachePriority(), - rep_->ioptions.lowest_used_cache_tier); + rep_->ioptions.lowest_used_cache_tier, + compressed_block_contents.data, block_comp_type); if (s.ok()) { assert(cache_handle != nullptr); @@ -1500,7 +1503,7 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( if (!contents) { if (use_block_cache_for_lookup) { s = GetDataBlockFromCache(key, block_cache, out_parsed_block, - get_context); + get_context, &uncompression_dict); // Value could still be null at this point, so check the cache handle // and update the read pattern for prefetching if (out_parsed_block->GetValue() || @@ -1531,14 +1534,26 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( TBlocklike::kBlockType != BlockType::kFilter && TBlocklike::kBlockType != BlockType::kCompressionDictionary && rep_->blocks_maybe_compressed; + // This flag, if true, tells BlockFetcher to return the uncompressed + // block when ReadBlockContents() is called. const bool do_uncompress = maybe_compressed; CompressionType contents_comp_type; // Maybe serialized or uncompressed BlockContents tmp_contents; + BlockContents uncomp_contents; + BlockContents comp_contents; if (!contents) { Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS; StopWatch sw(rep_->ioptions.clock, statistics, histogram); + // Setting do_uncompress to false may cause an extra mempcy in the + // following cases - + // 1. Compression is enabled, but block is not actually compressed + // 2. Compressed block is in the prefetch buffer + // 3. Direct IO + // + // It would also cause a memory allocation to be used rather than + // stack if the compressed block size is < 5KB BlockFetcher block_fetcher( rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, &tmp_contents, rep_->ioptions, do_uncompress, maybe_compressed, @@ -1559,7 +1574,6 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( } contents_comp_type = block_fetcher.get_compression_type(); - contents = &tmp_contents; if (get_context) { switch (TBlocklike::kBlockType) { case BlockType::kIndex: @@ -1573,17 +1587,43 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( break; } } + if (s.ok()) { + if (do_uncompress && contents_comp_type != kNoCompression) { + comp_contents = BlockContents(block_fetcher.GetCompressedBlock()); + uncomp_contents = std::move(tmp_contents); + } else if (contents_comp_type != kNoCompression) { + // do_uncompress must be false, so output of BlockFetcher is + // compressed + comp_contents = std::move(tmp_contents); + } else { + uncomp_contents = std::move(tmp_contents); + } + + // If filling cache is allowed and a cache is configured, try to put + // the block to the cache. Do this here while block_fetcher is in + // scope, since comp_contents will be a reference to the compressed + // block in block_fetcher + s = PutDataBlockToCache( + key, block_cache, out_parsed_block, std::move(uncomp_contents), + std::move(comp_contents), contents_comp_type, uncompression_dict, + GetMemoryAllocator(rep_->table_options), get_context); + } } else { contents_comp_type = GetBlockCompressionType(*contents); - } + if (contents_comp_type != kNoCompression) { + comp_contents = std::move(*contents); + } else { + uncomp_contents = std::move(*contents); + } - if (s.ok()) { - // If filling cache is allowed and a cache is configured, try to put the - // block to the cache. - s = PutDataBlockToCache( - key, block_cache, out_parsed_block, std::move(*contents), - contents_comp_type, uncompression_dict, - GetMemoryAllocator(rep_->table_options), get_context); + if (s.ok()) { + // If filling cache is allowed and a cache is configured, try to put + // the block to the cache. + s = PutDataBlockToCache( + key, block_cache, out_parsed_block, std::move(uncomp_contents), + std::move(comp_contents), contents_comp_type, uncompression_dict, + GetMemoryAllocator(rep_->table_options), get_context); + } } } } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 6162c5889..32c5f6852 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -410,7 +410,8 @@ class BlockBasedTable : public TableReader { template WithBlocklikeCheck GetDataBlockFromCache( const Slice& cache_key, BlockCacheInterface block_cache, - CachableEntry* block, GetContext* get_context) const; + CachableEntry* block, GetContext* get_context, + const UncompressionDict* dict) const; // Put a maybe compressed block to the corresponding block caches. // This method will perform decompression against block_contents if needed @@ -425,7 +426,9 @@ class BlockBasedTable : public TableReader { template WithBlocklikeCheck PutDataBlockToCache( const Slice& cache_key, BlockCacheInterface block_cache, - CachableEntry* cached_block, BlockContents&& block_contents, + CachableEntry* cached_block, + BlockContents&& uncompressed_block_contents, + BlockContents&& compressed_block_contents, CompressionType block_comp_type, const UncompressionDict& uncompression_dict, MemoryAllocator* memory_allocator, GetContext* get_context) const; diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 8ee594db6..e7621909c 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -402,6 +402,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) BCI block_cache{rep_->table_options.block_cache.get()}; std::array async_handles; + BlockCreateContext create_ctx = rep_->create_context; std::array cache_keys; size_t cache_lookup_count = 0; @@ -448,6 +449,9 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) sst_file_range.SkipKey(miter); continue; } + create_ctx.dict = uncompression_dict.GetValue() + ? uncompression_dict.GetValue() + : &UncompressionDict::GetEmptyDict(); if (v.handle.offset() == prev_offset) { // This key can reuse the previous block (later on). @@ -475,7 +479,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) GetCacheKey(rep_->base_cache_key, v.handle); async_handle.key = cache_keys[cache_lookup_count].AsSlice(); // NB: StartAsyncLookupFull populates async_handle.helper - async_handle.create_context = &rep_->create_context; + async_handle.create_context = &create_ctx; async_handle.priority = GetCachePriority(); async_handle.stats = rep_->ioptions.statistics.get(); diff --git a/table/block_based/block_cache.cc b/table/block_based/block_cache.cc index a252899d2..08f5d2158 100644 --- a/table/block_based/block_cache.cc +++ b/table/block_based/block_cache.cc @@ -5,6 +5,8 @@ #include "table/block_based/block_cache.h" +#include "table/block_based/block_based_table_reader.h" + namespace ROCKSDB_NAMESPACE { void BlockCreateContext::Create(std::unique_ptr* parsed_out, @@ -96,7 +98,7 @@ const std::array CacheTier::kVolatileTier) { return kCacheItemFullHelperForBlockType[static_cast(block_type)]; } else { return kCacheItemBasicHelperForBlockType[static_cast(block_type)]; diff --git a/table/block_based/block_cache.h b/table/block_based/block_cache.h index 00eaface3..06ba50566 100644 --- a/table/block_based/block_cache.h +++ b/table/block_based/block_cache.h @@ -70,24 +70,28 @@ class Block_kMetaIndex : public Block { struct BlockCreateContext : public Cache::CreateContext { BlockCreateContext() {} BlockCreateContext(const BlockBasedTableOptions* _table_options, - Statistics* _statistics, bool _using_zstd, - uint8_t _protection_bytes_per_key, + const ImmutableOptions* _ioptions, Statistics* _statistics, + bool _using_zstd, uint8_t _protection_bytes_per_key, const Comparator* _raw_ucmp, bool _index_value_is_full = false, bool _index_has_first_key = false) : table_options(_table_options), + ioptions(_ioptions), statistics(_statistics), + raw_ucmp(_raw_ucmp), using_zstd(_using_zstd), protection_bytes_per_key(_protection_bytes_per_key), - raw_ucmp(_raw_ucmp), index_value_is_full(_index_value_is_full), index_has_first_key(_index_has_first_key) {} const BlockBasedTableOptions* table_options = nullptr; + const ImmutableOptions* ioptions = nullptr; Statistics* statistics = nullptr; + const Comparator* raw_ucmp = nullptr; + const UncompressionDict* dict = nullptr; + uint32_t format_version; bool using_zstd = false; uint8_t protection_bytes_per_key = 0; - const Comparator* raw_ucmp = nullptr; bool index_value_is_full; bool index_has_first_key; @@ -95,9 +99,24 @@ struct BlockCreateContext : public Cache::CreateContext { template inline void Create(std::unique_ptr* parsed_out, size_t* charge_out, const Slice& data, - MemoryAllocator* alloc) { - Create(parsed_out, - BlockContents(AllocateAndCopyBlock(data, alloc), data.size())); + CompressionType type, MemoryAllocator* alloc) { + BlockContents uncompressed_block_contents; + if (type != CompressionType::kNoCompression) { + assert(dict != nullptr); + UncompressionContext context(type); + UncompressionInfo info(context, *dict, type); + Status s = UncompressBlockData( + info, data.data(), data.size(), &uncompressed_block_contents, + table_options->format_version, *ioptions, alloc); + if (!s.ok()) { + parsed_out->reset(); + return; + } + } else { + uncompressed_block_contents = + BlockContents(AllocateAndCopyBlock(data, alloc), data.size()); + } + Create(parsed_out, std::move(uncompressed_block_contents)); *charge_out = parsed_out->get()->ApproximateMemoryUsage(); } diff --git a/table/block_based/block_test.cc b/table/block_based/block_test.cc index 3264371c1..9082a08e9 100644 --- a/table/block_based/block_test.cc +++ b/table/block_based/block_test.cc @@ -848,9 +848,12 @@ TEST_F(BlockPerKVChecksumTest, EmptyBlock) { Options options = Options(); BlockBasedTableOptions tbo; uint8_t protection_bytes_per_key = 8; - BlockCreateContext create_context{ - &tbo, nullptr /* statistics */, false /* using_zstd */, - protection_bytes_per_key, options.comparator}; + BlockCreateContext create_context{&tbo, + nullptr, + nullptr /* statistics */, + false /* using_zstd */, + protection_bytes_per_key, + options.comparator}; create_context.Create(&data_block, std::move(contents)); std::unique_ptr biter{data_block->NewDataIterator( options.comparator, kDisableGlobalSequenceNumber)}; @@ -885,9 +888,12 @@ TEST_F(BlockPerKVChecksumTest, InitializeProtectionInfo) { Options options = Options(); BlockBasedTableOptions tbo; uint8_t protection_bytes_per_key = 8; - BlockCreateContext create_context{ - &tbo, nullptr /* statistics */, false /* using_zstd */, - protection_bytes_per_key, options.comparator}; + BlockCreateContext create_context{&tbo, + nullptr /* ioptions */, + nullptr /* statistics */, + false /* using_zstd */, + protection_bytes_per_key, + options.comparator}; { std::string invalid_content = "1"; @@ -949,14 +955,19 @@ TEST_F(BlockPerKVChecksumTest, ApproximateMemory) { uint8_t protection_bytes_per_key = 8; BlockCreateContext with_checksum_create_context{ &tbo, + nullptr /* ioptions */, nullptr /* statistics */, false /* using_zstd */, protection_bytes_per_key, options.comparator, true /* index_value_is_full */}; - BlockCreateContext create_context{ - &tbo, nullptr /* statistics */, false /* using_zstd */, - 0, options.comparator, true /* index_value_is_full */}; + BlockCreateContext create_context{&tbo, + nullptr /* ioptions */, + nullptr /* statistics */, + false /* using_zstd */, + 0, + options.comparator, + true /* index_value_is_full */}; { std::unique_ptr data_block; @@ -1045,8 +1056,11 @@ class DataBlockKVChecksumTest std::vector &keys, std::vector &values, int num_record) { BlockBasedTableOptions tbo; - BlockCreateContext create_context{&tbo, nullptr /* statistics */, - false /* using_zstd */, GetChecksumLen(), + BlockCreateContext create_context{&tbo, + nullptr /* statistics */, + nullptr /* ioptions */, + false /* using_zstd */, + GetChecksumLen(), Options().comparator}; builder_ = std::make_unique( static_cast(GetRestartInterval()), @@ -1172,6 +1186,7 @@ class IndexBlockKVChecksumTest uint8_t protection_bytes_per_key = GetChecksumLen(); BlockCreateContext create_context{ &tbo, + nullptr /* ioptions */, nullptr /* statistics */, false /* _using_zstd */, protection_bytes_per_key, @@ -1312,9 +1327,12 @@ class MetaIndexBlockKVChecksumTest Options options = Options(); BlockBasedTableOptions tbo; uint8_t protection_bytes_per_key = GetChecksumLen(); - BlockCreateContext create_context{ - &tbo, nullptr /* statistics */, false /* using_zstd */, - protection_bytes_per_key, options.comparator}; + BlockCreateContext create_context{&tbo, + nullptr /* ioptions */, + nullptr /* statistics */, + false /* using_zstd */, + protection_bytes_per_key, + options.comparator}; builder_ = std::make_unique(static_cast(GetRestartInterval())); // add a bunch of records to a block @@ -1344,9 +1362,12 @@ TEST_P(MetaIndexBlockKVChecksumTest, ChecksumConstructionAndVerification) { Options options = Options(); BlockBasedTableOptions tbo; uint8_t protection_bytes_per_key = GetChecksumLen(); - BlockCreateContext create_context{ - &tbo, nullptr /* statistics */, false /* using_zstd */, - protection_bytes_per_key, options.comparator}; + BlockCreateContext create_context{&tbo, + nullptr /* ioptions */, + nullptr /* statistics */, + false /* using_zstd */, + protection_bytes_per_key, + options.comparator}; std::vector num_restart_intervals = {1, 16}; for (const auto num_restart_interval : num_restart_intervals) { const int kNumRecords = num_restart_interval * GetRestartInterval(); @@ -1680,4 +1701,4 @@ int main(int argc, char **argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index 34d3e23e9..257a1a42e 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -336,9 +336,11 @@ IOStatus BlockFetcher::ReadBlockContents() { #ifndef NDEBUG num_heap_buf_memcpy_++; #endif - compression_type_ = kNoCompression; + // Save the compressed block without trailer + slice_ = Slice(slice_.data(), block_size_); } else { GetBlockContents(); + slice_ = Slice(); } InsertUncompressedBlockToPersistentCacheIfNeeded(); @@ -387,7 +389,6 @@ IOStatus BlockFetcher::ReadAsyncBlockContents() { #ifndef NDEBUG num_heap_buf_memcpy_++; #endif - compression_type_ = kNoCompression; } else { GetBlockContents(); } diff --git a/table/block_fetcher.h b/table/block_fetcher.h index da6c352d0..e5a51e3eb 100644 --- a/table/block_fetcher.h +++ b/table/block_fetcher.h @@ -79,6 +79,10 @@ class BlockFetcher { inline size_t GetBlockSizeWithTrailer() const { return block_size_with_trailer_; } + inline Slice& GetCompressedBlock() { + assert(compression_type_ != kNoCompression); + return slice_; + } #ifndef NDEBUG int TEST_GetNumStackBufMemcpy() const { return num_stack_buf_memcpy_; } diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index 18109811d..d738fa3df 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -299,7 +299,7 @@ class BlockFetcherTest : public testing::Test { MemoryAllocator* heap_buf_allocator, MemoryAllocator* compressed_buf_allocator, BlockContents* contents, MemcpyStats* stats, - CompressionType* compresstion_type) { + CompressionType* compression_type) { ImmutableOptions ioptions(options_); ReadOptions roptions; PersistentCacheOptions persistent_cache_options; @@ -318,7 +318,11 @@ class BlockFetcherTest : public testing::Test { stats->num_compressed_buf_memcpy = fetcher->TEST_GetNumCompressedBufMemcpy(); - *compresstion_type = fetcher->get_compression_type(); + if (do_uncompress) { + *compression_type = kNoCompression; + } else { + *compression_type = fetcher->get_compression_type(); + } } // NOTE: expected_compression_type is the expected compression diff --git a/test_util/secondary_cache_test_util.cc b/test_util/secondary_cache_test_util.cc index 1c62dc4ad..6f0bd3849 100644 --- a/test_util/secondary_cache_test_util.cc +++ b/test_util/secondary_cache_test_util.cc @@ -37,7 +37,8 @@ Status SaveToCallbackFail(Cache::ObjectPtr /*obj*/, size_t /*offset*/, return Status::NotSupported(); } -Status CreateCallback(const Slice& data, Cache::CreateContext* context, +Status CreateCallback(const Slice& data, CompressionType /*type*/, + CacheTier /*source*/, Cache::CreateContext* context, MemoryAllocator* /*allocator*/, Cache::ObjectPtr* out_obj, size_t* out_charge) { auto t = static_cast(context); diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index f6662c09f..340a8a3a1 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -3060,12 +3060,12 @@ class Benchmark { FLAGS_cache_numshardbits); opts.hash_seed = GetCacheHashSeed(); if (use_tiered_cache) { - TieredVolatileCacheOptions tiered_opts; + TieredCacheOptions tiered_opts; opts.capacity += secondary_cache_opts.capacity; tiered_opts.cache_type = PrimaryCacheType::kCacheTypeHCC; tiered_opts.cache_opts = &opts; tiered_opts.comp_cache_opts = secondary_cache_opts; - return NewTieredVolatileCache(tiered_opts); + return NewTieredCache(tiered_opts); } else { return opts.MakeSharedCache(); } @@ -3093,12 +3093,12 @@ class Benchmark { } if (use_tiered_cache) { - TieredVolatileCacheOptions tiered_opts; + TieredCacheOptions tiered_opts; opts.capacity += secondary_cache_opts.capacity; tiered_opts.cache_type = PrimaryCacheType::kCacheTypeLRU; tiered_opts.cache_opts = &opts; tiered_opts.comp_cache_opts = secondary_cache_opts; - return NewTieredVolatileCache(tiered_opts); + return NewTieredCache(tiered_opts); } else { return opts.MakeSharedCache(); } diff --git a/unreleased_history/new_features/secondary_cache_stacking.md b/unreleased_history/new_features/secondary_cache_stacking.md new file mode 100644 index 000000000..05e1bb242 --- /dev/null +++ b/unreleased_history/new_features/secondary_cache_stacking.md @@ -0,0 +1 @@ +RocksDB now allows the block cache to be stacked on top of a compressed secondary cache and a non-volatile secondary cache, thus creating a three-tier cache. To set it up, use the `NewTieredCache()` API in rocksdb/cache.h.. diff --git a/unreleased_history/public_api_changes/new_tiered_cache.md b/unreleased_history/public_api_changes/new_tiered_cache.md new file mode 100644 index 000000000..935e6a7ad --- /dev/null +++ b/unreleased_history/public_api_changes/new_tiered_cache.md @@ -0,0 +1 @@ +The `NewTieredVolatileCache()` API in rocksdb/cache.h has been renamed to `NewTieredCache()`. diff --git a/utilities/fault_injection_secondary_cache.h b/utilities/fault_injection_secondary_cache.h index 60488dcfb..dd73ac156 100644 --- a/utilities/fault_injection_secondary_cache.h +++ b/utilities/fault_injection_secondary_cache.h @@ -35,6 +35,11 @@ class FaultInjectionSecondaryCache : public SecondaryCache { const Cache::CacheItemHelper* helper, bool force_insert) override; + Status InsertSaved(const Slice& /*key*/, const Slice& /*saved*/, + CompressionType /*type*/, CacheTier /*source*/) override { + return Status::OK(); + } + std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, diff --git a/utilities/simulator_cache/sim_cache.cc b/utilities/simulator_cache/sim_cache.cc index d58c3b34f..ff9d52dca 100644 --- a/utilities/simulator_cache/sim_cache.cc +++ b/utilities/simulator_cache/sim_cache.cc @@ -169,7 +169,8 @@ class SimCacheImpl : public SimCache { Status Insert(const Slice& key, Cache::ObjectPtr value, const CacheItemHelper* helper, size_t charge, Handle** handle, - Priority priority) override { + Priority priority, const Slice& compressed = {}, + CompressionType type = kNoCompression) override { // The handle and value passed in are for real cache, so we pass nullptr // to key_only_cache_ for both instead. Also, the deleter function pointer // will be called by user to perform some external operation which should @@ -178,8 +179,9 @@ class SimCacheImpl : public SimCache { Handle* h = key_only_cache_->Lookup(key); if (h == nullptr) { // TODO: Check for error here? - auto s = key_only_cache_->Insert(key, nullptr, &kNoopCacheItemHelper, - charge, nullptr, priority); + auto s = + key_only_cache_->Insert(key, nullptr, &kNoopCacheItemHelper, charge, + nullptr, priority, compressed, type); s.PermitUncheckedError(); } else { key_only_cache_->Release(h); @@ -189,7 +191,8 @@ class SimCacheImpl : public SimCache { if (!target_) { return Status::OK(); } - return target_->Insert(key, value, helper, charge, handle, priority); + return target_->Insert(key, value, helper, charge, handle, priority, + compressed, type); } Handle* Lookup(const Slice& key, const CacheItemHelper* helper, From f6cb763409dce499259b9f5aebb73751b06abf0a Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 22 Sep 2023 13:47:31 -0700 Subject: [PATCH 146/386] Fix major performance bug in AutoHCC growth phase (#11871) Summary: ## The Problem Mark Callaghan found a performance bug in yet-unreleased AutoHCC (which should have been found in my own testing). The observed behavior is very slow insertion performance as the table is growing into a very large structure. The root cause is the precarious combination of linear hashing (indexing into the table while allowing growth) and linear probing (for finding an empty slot to insert into). Naively combined, this is a disaster because in linear hashing, part of the table is twice as dense as first probing location as the rest. Thus, even a modest load factor like 0.6 could cause the dense part of the table to degrade to linear search. The code had a correction for this imbalance, which works in steady-state operation, but failed to account for the concentrating effect of table growth. Specifically, newly-added slots were underpopulated which allowed old slots to become over-populated and degrade to linear search, even in single-threaded operation. Here's an example: ``` ./cache_bench -cache_type=auto_hyper_clock_cache -threads=1 -populate_cache=0 -value_bytes=500 -cache_size=3000000000 -histograms=0 -report_problems -ops_per_thread=20000000 -resident_ratio=0.6 ``` AutoHCC: Complete in 774.213 s; Rough parallel ops/sec = 25832 FixedHCC: Complete in 19.630 s; Rough parallel ops/sec = 1018840 LRUCache: Complete in 25.842 s; Rough parallel ops/sec = 773947 ## The Fix One small change is apparently sufficient to fix the problem, but I wanted to re-optimize the whole "finding a good empty slot" algorithm to improve safety margins for good performance and to improve typical case performance. The small change is to track the newly-added slot from Grow in Insert, when applicable, and use that slot for insertion if (a) the home slot is already occupied, and (b) the newly-added slot is empty. This appears to sufficiently load new slots while avoiding over-population of either old or new slots. See `likely_empty_slot`. However I've also made the logic much more resilient to parts of the table becoming over-populated. I tested a variant that used double hashing instead of linear probing and found that hurt steady-state average-case performance, presumably due to loss of locality in the chains. And even conventional double hashing might not be ideally robust against density skew in the table (still present because of home location bias), because double hashing might choose a small increment that could take a long time to iterate to the under-populated part of the table. The compromise that seems to bring the best of each approach is this: do linear probing (+1 at a time) within a small bound (chosen bound of 4 based on performance testing) and then fall back on a double-hashing variant if no slot has been found. The double-hashing variant uses a probing increment that is always close to the golden ratio, relative to the table size, so that any under-populated regions of the table can be found relatively quickly, without introducing any additional skew. And the increment is varied slightly to avoid clustering effects that could happen with a fixed increment (regardless of how big it is). And that leaves us with one remaining problem: the double hashing increment might not be relatively prime to the table size, so the probing sequence might be a cycle that does not cover the full set of slots. To solve this we can use a technique I developed many years ago (probably also developed by others) that simply adds one (in modular arithmetic) whenever we finish a (potentially incomplete) cycle. This is a simple and reasonably efficient way to iterate over all the slots without repetition, regardless of whether the increment is not relatively prime to the table size, or even zero. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11871 Test Plan: existing correctness tests, especially ClockCacheTest.ClockTableFull Intended follow-up: make ClockTableFull test more complete for AutoHCC ## Performance Ignoring old AutoHCC performance, as we established above it could be terrible. FixedHCC and LRUCache are unaffected by this change. All tests below include this change. ### Getting up to size, single thread (same cache_bench command as above, all three run at same time) AutoHCC: Complete in 26.724 s; Rough parallel ops/sec = 748400 FixedHCC: Complete in 19.987 s; Rough parallel ops/sec = 1000631 LRUCache: Complete in 28.291 s; Rough parallel ops/sec = 706939 Single-threaded faster than LRUCache (often / sometimes) is good. FixedHCC has an obvious advantage because it starts at full size. ### Multiple threads, steady state, high hit rate ~95% Using `-threads=10 -populate_cache=1 -ops_per_thread=10000000` and still `-resident_ratio=0.6` AutoHCC: Complete in 48.778 s; Rough parallel ops/sec = 2050119 FixedHCC: Complete in 46.569 s; Rough parallel ops/sec = 2147329 LRUCache: Complete in 50.537 s; Rough parallel ops/sec = 1978735 ### Multiple threads, steady state, low hit rate ~50% Change to `-resident_ratio=0.2` AutoHCC: Complete in 49.264 s; Rough parallel ops/sec = 2029884 FixedHCC: Complete in 49.750 s; Rough parallel ops/sec = 2010041 LRUCache: Complete in 53.002 s; Rough parallel ops/sec = 1886713 Don't expect AutoHCC to be consistently faster than FixedHCC, but they are at least similar in these benchmarks. Reviewed By: jowlyzhang Differential Revision: D49548534 Pulled By: pdillinger fbshipit-source-id: 263e4f4d71d0e9a7d91db3795b48fad75408822b --- cache/clock_cache.cc | 158 ++++++++++++++++++++++++------------------- cache/clock_cache.h | 1 + 2 files changed, 91 insertions(+), 68 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index c7a8cf393..32d415816 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -2218,6 +2218,9 @@ bool AutoHyperClockTable::Grow(InsertState& state) { // forward" due to length_info_ being out-of-date. CatchUpLengthInfoNoWait(grow_home); + // See usage in DoInsert() + state.likely_empty_slot = grow_home; + // Success return true; } @@ -2847,14 +2850,15 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( // We could go searching through the chain for any duplicate, but that's // not typically helpful, except for the REDUNDANT block cache stats. // (Inferior duplicates will age out with eviction.) However, we do skip - // insertion if the home slot already has a match (already_matches below), - // so that we keep better CPU cache locality when we can. + // insertion if the home slot (or some other we happen to probe) already + // has a match (already_matches below). This helps to keep better locality + // when we can. // // And we can do that as part of searching for an available slot to // insert the new entry, because our preferred location and first slot // checked will be the home slot. // - // As the table initially grows to size few entries will be in the same + // As the table initially grows to size, few entries will be in the same // cache line as the chain head. However, churn in the cache relatively // quickly improves the proportion of entries sharing that cache line with // the chain head. Data: @@ -2877,12 +2881,19 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( size_t idx = home; bool already_matches = false; - if (!TryInsert(proto, arr[idx], initial_countdown, take_ref, - &already_matches)) { - if (already_matches) { - return nullptr; - } - + if (TryInsert(proto, arr[idx], initial_countdown, take_ref, + &already_matches)) { + assert(idx == home); + } else if (already_matches) { + return nullptr; + // Here we try to populate newly-opened slots in the table, but not + // when we can add something to its home slot. This makes the structure + // more performant more quickly on (initial) growth. + } else if (UNLIKELY(state.likely_empty_slot > 0) && + TryInsert(proto, arr[state.likely_empty_slot], initial_countdown, + take_ref, &already_matches)) { + idx = state.likely_empty_slot; + } else { // We need to search for an available slot outside of the home. // Linear hashing provides nice resizing but does typically mean // that some heads (home locations) have (in expectation) twice as @@ -2892,54 +2903,28 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( // // This means that if we just use linear probing (by a small constant) // to find an available slot, part of the structure could easily fill up - // and resot to linear time operations even when the overall load factor + // and resort to linear time operations even when the overall load factor // is only modestly high, like 70%. Even though each slot has its own CPU - // cache line, there is likely a small locality benefit (e.g. TLB and - // paging) to iterating one by one, but obviously not with the linear - // hashing imbalance. + // cache line, there appears to be a small locality benefit (e.g. TLB and + // paging) to iterating one by one, as long as we don't afoul of the + // linear hashing imbalance. // // In a traditional non-concurrent structure, we could keep a "free list" // to ensure immediate access to an available slot, but maintaining such // a structure could require more cross-thread coordination to ensure // all entries are eventually available to all threads. // - // The way we solve this problem is to use linear probing but try to - // correct for the linear hashing imbalance (when probing beyond the - // home slot). If the home is high load (minimum shift) we choose an - // alternate location, uniformly among all slots, to linear probe from. - // - // Supporting data: we can use FixedHyperClockCache to get a baseline - // of near-ideal distribution of occupied slots, with its uniform - // distribution and double hashing. - // $ ./cache_bench -cache_type=fixed_hyper_clock_cache -histograms=0 - // -cache_size=1300000000 - // ... - // Slot occupancy stats: Overall 59% (156629/262144), - // Min/Max/Window = 47%/70%/500, MaxRun{Pos/Neg} = 22/15 - // - // Now we can try various sizes between powers of two with AutoHCC to see - // how bad the MaxRun can be. - // $ for I in `seq 8 15`; do - // ./cache_bench -cache_type=auto_hyper_clock_cache -histograms=0 - // -cache_size=${I}00000000 2>&1 | grep clock_cache.cc; done - // where the worst case MaxRun was with I=11: - // Slot occupancy stats: Overall 59% (132528/221094), - // Min/Max/Window = 44%/73%/500, MaxRun{Pos/Neg} = 64/19 - // - // The large table size offers a large sample size to be confident that - // this is an acceptable level of clustering (max ~3x probe length) - // compared to no clustering. Increasing the max load factor to 0.7 - // increases the MaxRun above 100, potentially much closer to a tipping - // point. - - // TODO? remember a freed entry from eviction, possibly in thread local - - size_t start = home; - if (orig_home_shift == LengthInfoToMinShift(state.saved_length_info)) { - start = FastRange64(proto.hashed_key[0], used_length); - } - idx = start; - for (int cycles = 0;;) { + // The way we solve this problem is to use unit-increment linear probing + // with a small bound, and then fall back on big jumps to have a good + // chance of finding a slot in an under-populated region quickly if that + // doesn't work. + size_t i = 0; + constexpr size_t kMaxLinearProbe = 4; + for (; i < kMaxLinearProbe; i++) { + idx++; + if (idx >= used_length) { + idx -= used_length; + } if (TryInsert(proto, arr[idx], initial_countdown, take_ref, &already_matches)) { break; @@ -2947,26 +2932,59 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( if (already_matches) { return nullptr; } - ++idx; - if (idx >= used_length) { - // In case the structure has grown, double-check - StartInsert(state); - used_length = LengthInfoToUsedLength(state.saved_length_info); + } + if (i == kMaxLinearProbe) { + // Keep searching, but change to a search method that should quickly + // find any under-populated region. Switching to an increment based + // on the golden ratio helps with that, but we also inject some minor + // variation (less than 2%, 1 in 2^6) to avoid clustering effects on + // this larger increment (if it were a fixed value in steady state + // operation). Here we are primarily using upper bits of hashed_key[1] + // while home is based on lowest bits. + uint64_t incr_ratio = 0x9E3779B185EBCA87U + (proto.hashed_key[1] >> 6); + size_t incr = FastRange64(incr_ratio, used_length); + assert(incr > 0); + size_t start = idx; + for (;; i++) { + idx += incr; if (idx >= used_length) { - idx = 0; + // Wrap around (faster than %) + idx -= used_length; } - } - if (idx == start) { - // Cycling back should not happen unless there is enough random churn - // in parallel that we happen to hit each slot at a time that it's - // occupied, which is really only feasible for small structures, though - // with linear probing to find empty slots, "small" here might be - // larger than for double hashing. - assert(used_length <= 256); - ++cycles; - if (cycles > 2) { - // Fall back on standalone insert in case something goes awry to - // cause this + if (idx == start) { + // We have just completed a cycle that might not have covered all + // slots. (incr and used_length could have common factors.) + // Increment for the next cycle, which eventually ensures complete + // iteration over the set of slots before repeating. + idx++; + if (idx >= used_length) { + idx -= used_length; + } + start++; + if (start >= used_length) { + start -= used_length; + } + if (i >= used_length) { + used_length = LengthInfoToUsedLength( + length_info_.load(std::memory_order_acquire)); + if (i >= used_length * 2) { + // Cycling back should not happen unless there is enough random + // churn in parallel that we happen to hit each slot at a time + // that it's occupied, which is really only feasible for small + // structures, though with linear probing to find empty slots, + // "small" here might be larger than for double hashing. + assert(used_length <= 256); + // Fall back on standalone insert in case something goes awry to + // cause this + return nullptr; + } + } + } + if (TryInsert(proto, arr[idx], initial_countdown, take_ref, + &already_matches)) { + break; + } + if (already_matches) { return nullptr; } } @@ -3481,6 +3499,10 @@ void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, for (HandleImpl* h : to_finish_eviction) { TrackAndReleaseEvictedEntry(h, data); + // NOTE: setting likely_empty_slot here can cause us to reduce the + // portion of "at home" entries, probably because an evicted entry + // is more likely to come back than a random new entry and would be + // unable to go into its home slot. } to_finish_eviction.clear(); diff --git a/cache/clock_cache.h b/cache/clock_cache.h index eb0253199..908e64f1a 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -822,6 +822,7 @@ class AutoHyperClockTable : public BaseClockTable { // For BaseClockTable::Insert struct InsertState { uint64_t saved_length_info = 0; + size_t likely_empty_slot = 0; }; void StartInsert(InsertState& state); From 12d9386a4ff2e5efdedb2677a70f648197c4ece3 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 22 Sep 2023 13:49:19 -0700 Subject: [PATCH 147/386] Return a special OK status when the number of merge operands exceeds a threshold (#11870) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11870 Having a large number of merge operands applied at query time can have a significant effect on performance; therefore, applications might want limit the number of deltas for any given key. However, there is currently no way to establish the number of operands for certain types of queries. The ticker `READ_NUM_MERGE_OPERANDS` only provides aggregate (not per-read) information. The `PerfContext` counters `internal_merge_count` and `internal_merge_point_lookup_count` can be used to get this information on a per-query basis for iterators and single point lookups; however, there is no per-key breakdown for `MultiGet` type APIs. The patch addresses this issue by introducing a special kind of OK status which signals that an application-defined threshold on the number of merge operands has been exceeded for a given key. The threshold can be specified on a per-query basis using a new field in `ReadOptions`. Reviewed By: jaykorean Differential Revision: D49522786 fbshipit-source-id: 4265b3848d1be5ff313a3e8fb604ddf56411dd2c --- db/db_impl/db_impl.cc | 24 ++++- db/db_merge_operator_test.cc | 94 ++++++++++++++++++- include/rocksdb/db.h | 9 +- include/rocksdb/options.h | 7 ++ include/rocksdb/status.h | 16 ++++ .../merge_operand_count_threshold.md | 1 + util/status.cc | 1 + 7 files changed, 142 insertions(+), 10 deletions(-) create mode 100644 unreleased_history/new_features/merge_operand_count_threshold.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 0b23c3db0..6e2375ebe 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -748,7 +748,6 @@ void DBImpl::PrintStatistics() { } Status DBImpl::StartPeriodicTaskScheduler() { - #ifndef NDEBUG // It only used by test to disable scheduler bool disable_scheduler = false; @@ -2010,7 +2009,6 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, assert(get_impl_options.column_family); - if (read_options.timestamp) { const Status s = FailIfTsMismatchCf(get_impl_options.column_family, *(read_options.timestamp)); @@ -2220,6 +2218,12 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key, RecordTick(stats_, NUMBER_KEYS_READ); size_t size = 0; if (s.ok()) { + const auto& merge_threshold = read_options.merge_operand_count_threshold; + if (merge_threshold.has_value() && + merge_context.GetNumOperands() > merge_threshold.value()) { + s = Status::OkMergeOperandThresholdExceeded(); + } + if (get_impl_options.get_value) { if (get_impl_options.value) { size = get_impl_options.value->size(); @@ -2489,8 +2493,15 @@ std::vector DBImpl::MultiGet( } if (s.ok()) { + const auto& merge_threshold = read_options.merge_operand_count_threshold; + if (merge_threshold.has_value() && + merge_context.GetNumOperands() > merge_threshold.value()) { + s = Status::OkMergeOperandThresholdExceeded(); + } + bytes_read += value->size(); num_found++; + curr_value_size += value->size(); if (curr_value_size > read_options.value_size_soft_limit) { while (++keys_read < num_keys) { @@ -3175,6 +3186,12 @@ Status DBImpl::MultiGetImpl( assert(key->s); if (key->s->ok()) { + const auto& merge_threshold = read_options.merge_operand_count_threshold; + if (merge_threshold.has_value() && + key->merge_context.GetNumOperands() > merge_threshold) { + *(key->s) = Status::OkMergeOperandThresholdExceeded(); + } + if (key->value) { bytes_read += key->value->size(); } else { @@ -4088,7 +4105,6 @@ Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family, return s; } - const std::string& DBImpl::GetName() const { return dbname_; } Env* DBImpl::GetEnv() const { return env_; } @@ -4106,7 +4122,6 @@ SystemClock* DBImpl::GetSystemClock() const { return immutable_db_options_.clock; } - Status DBImpl::StartIOTrace(const TraceOptions& trace_options, std::unique_ptr&& trace_writer) { assert(trace_writer != nullptr); @@ -4119,7 +4134,6 @@ Status DBImpl::EndIOTrace() { return Status::OK(); } - Options DBImpl::GetOptions(ColumnFamilyHandle* column_family) const { InstrumentedMutexLock l(&mutex_); auto cfh = static_cast_with_check(column_family); diff --git a/db/db_merge_operator_test.cc b/db/db_merge_operator_test.cc index aa1253a0b..5a7028e77 100644 --- a/db/db_merge_operator_test.cc +++ b/db/db_merge_operator_test.cc @@ -9,6 +9,7 @@ #include "db/forward_iterator.h" #include "port/stack_trace.h" #include "rocksdb/merge_operator.h" +#include "rocksdb/snapshot.h" #include "util/random.h" #include "utilities/merge_operators.h" #include "utilities/merge_operators/string_append/stringappend2.h" @@ -202,7 +203,6 @@ TEST_F(DBMergeOperatorTest, MergeErrorOnIteration) { VerifyDBInternal({{"k1", "v1"}, {"k2", "corrupted"}, {"k2", "v2"}}); } - TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) { // This is like a mini-stress test dedicated to `OpFailureScope::kMustMerge`. // Some or most of it might be deleted upon adding that option to the actual @@ -358,6 +358,98 @@ TEST_F(DBMergeOperatorTest, MergeOperatorFailsWithMustMerge) { } } +TEST_F(DBMergeOperatorTest, MergeOperandThresholdExceeded) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.merge_operator = MergeOperators::CreatePutOperator(); + options.env = env_; + Reopen(options); + + std::vector keys{"foo", "bar", "baz"}; + + // Write base values. + for (const auto& key : keys) { + ASSERT_OK(Put(key, key.ToString() + "0")); + } + + // Write merge operands. Note that the first key has 1 merge operand, the + // second one has 2 merge operands, and the third one has 3 merge operands. + // Also, we'll take some snapshots to make sure the merge operands are + // preserved during flush. + std::vector snapshots; + snapshots.reserve(3); + + for (size_t i = 0; i < keys.size(); ++i) { + snapshots.emplace_back(db_); + + const std::string suffix = std::to_string(i + 1); + + for (size_t j = i; j < keys.size(); ++j) { + ASSERT_OK(Merge(keys[j], keys[j].ToString() + suffix)); + } + } + + // Verify the results and status codes of various types of point lookups. + auto verify = [&](const std::optional& threshold) { + ReadOptions read_options; + read_options.merge_operand_count_threshold = threshold; + + // Check Get() + { + for (size_t i = 0; i < keys.size(); ++i) { + PinnableSlice value; + const Status status = + db_->Get(read_options, db_->DefaultColumnFamily(), keys[i], &value); + ASSERT_OK(status); + ASSERT_EQ(status.IsOkMergeOperandThresholdExceeded(), + threshold.has_value() && i + 1 > threshold.value()); + ASSERT_EQ(value, keys[i].ToString() + std::to_string(i + 1)); + } + } + + // Check old-style MultiGet() + { + std::vector values; + std::vector statuses = db_->MultiGet(read_options, keys, &values); + + for (size_t i = 0; i < keys.size(); ++i) { + ASSERT_OK(statuses[i]); + ASSERT_EQ(statuses[i].IsOkMergeOperandThresholdExceeded(), + threshold.has_value() && i + 1 > threshold.value()); + ASSERT_EQ(values[i], keys[i].ToString() + std::to_string(i + 1)); + } + } + + // Check batched MultiGet() + { + std::vector values(keys.size()); + std::vector statuses(keys.size()); + db_->MultiGet(read_options, db_->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data()); + + for (size_t i = 0; i < keys.size(); ++i) { + ASSERT_OK(statuses[i]); + ASSERT_EQ(statuses[i].IsOkMergeOperandThresholdExceeded(), + threshold.has_value() && i + 1 > threshold.value()); + ASSERT_EQ(values[i], keys[i].ToString() + std::to_string(i + 1)); + } + } + }; + + // Test the case when the feature is disabled as well as various thresholds. + verify(std::nullopt); + for (size_t i = 0; i < 5; ++i) { + verify(i); + } + + // Flush and try again to test the case when results are served from SSTs. + ASSERT_OK(Flush()); + verify(std::nullopt); + for (size_t i = 0; i < 5; ++i) { + verify(i); + } +} + TEST_F(DBMergeOperatorTest, DataBlockBinaryAndHash) { // Basic test to check that merge operator works with data block index type // DataBlockBinaryAndHash. diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 4e764cb6a..4cd4b09c4 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -135,6 +135,11 @@ struct IngestExternalFileArg { }; struct GetMergeOperandsOptions { + // A limit on the number of merge operands returned by the GetMergeOperands() + // API. In contrast with ReadOptions::merge_operator_max_count, this is a hard + // limit: when it is exceeded, no merge operands will be returned and the + // query will fail with an Incomplete status. See also the + // DB::GetMergeOperands() API below. int expected_max_number_of_operands = 0; }; @@ -1841,7 +1846,6 @@ class DB { virtual Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); } - // Returns the unique ID which is read from IDENTITY file during the opening // of database by setting in the identity variable // Returns Status::OK if identity could be set properly @@ -1857,7 +1861,6 @@ class DB { // Returns default column family handle virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0; - virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, TablePropertiesCollection* props) = 0; virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) { @@ -1923,7 +1926,6 @@ class DB { return Status::NotSupported("NewDefaultReplayer() is not implemented."); } - // Needed for StackableDB virtual DB* GetRootDB() { return this; } @@ -2023,5 +2025,4 @@ Status RepairDB(const std::string& dbname, const DBOptions& db_options, // families encountered during the repair Status RepairDB(const std::string& dbname, const Options& options); - } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 2d164eb30..9e49e59fb 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -1552,6 +1553,12 @@ struct ReadOptions { // soft limit then all the remaining keys are returned with status Aborted. uint64_t value_size_soft_limit = std::numeric_limits::max(); + // When the number of merge operands applied exceeds this threshold + // during a successful query, the operation will return a special OK + // Status with subcode kMergeOperandThresholdExceeded. Currently only applies + // to point lookups and is disabled by default. + std::optional merge_operand_count_threshold; + // If true, all data read from underlying storage will be // verified against corresponding checksums. bool verify_checksums = true; diff --git a/include/rocksdb/status.h b/include/rocksdb/status.h index 447c3b9fe..8b3054545 100644 --- a/include/rocksdb/status.h +++ b/include/rocksdb/status.h @@ -114,6 +114,7 @@ class Status { kTxnNotPrepared = 13, kIOFenced = 14, kMergeOperatorFailed = 15, + kMergeOperandThresholdExceeded = 16, kMaxSubCode }; @@ -159,6 +160,14 @@ class Status { // changing public APIs. static Status OkOverwritten() { return Status(kOk, kOverwritten); } + // Successful, though the number of operands merged during the query exceeded + // the threshold. Note: using variants of OK status for program logic is + // discouraged, but it can be useful for communicating statistical information + // without changing public APIs. + static Status OkMergeOperandThresholdExceeded() { + return Status(kOk, kMergeOperandThresholdExceeded); + } + // Return error status of an appropriate type. static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) { return Status(kNotFound, msg, msg2); @@ -301,6 +310,13 @@ class Status { return code() == kOk && subcode() == kOverwritten; } + // Returns true iff the status indicates success *with* the number of operands + // merged exceeding the threshold + bool IsOkMergeOperandThresholdExceeded() const { + MarkChecked(); + return code() == kOk && subcode() == kMergeOperandThresholdExceeded; + } + // Returns true iff the status indicates a NotFound error. bool IsNotFound() const { MarkChecked(); diff --git a/unreleased_history/new_features/merge_operand_count_threshold.md b/unreleased_history/new_features/merge_operand_count_threshold.md new file mode 100644 index 000000000..cd73d71f2 --- /dev/null +++ b/unreleased_history/new_features/merge_operand_count_threshold.md @@ -0,0 +1 @@ +Added a new read option `merge_operand_count_threshold`. When the number of merge operands applied during a successful point lookup exceeds this threshold, the query will return a special OK status with a new subcode `kMergeOperandThresholdExceeded`. Applications might use this signal to take action to reduce the number of merge operands for the affected key(s), for example by running a compaction. diff --git a/util/status.cc b/util/status.cc index ead315848..160755d54 100644 --- a/util/status.cc +++ b/util/status.cc @@ -45,6 +45,7 @@ static const char* msgs[static_cast(Status::kMaxSubCode)] = { "Txn not prepared", // kTxnNotPrepared "IO fenced off", // kIOFenced "Merge operator failed", // kMergeOperatorFailed + "Number of operands merged exceeded threshold", // kMergeOperandThresholdExceeded }; Status::Status(Code _code, SubCode _subcode, const Slice& msg, From 6afde142663aca7081daf44fa01484fe92e70e13 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 22 Sep 2023 14:33:47 -0700 Subject: [PATCH 148/386] Add changelog entry for wide-column full merge (#11874) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11874 Add a changelog entry for https://github.com/facebook/rocksdb/pull/11858 . Reviewed By: jaykorean Differential Revision: D49557350 fbshipit-source-id: 44fcd08e9847407d9f18dd3d9363d233f4591c84 --- unreleased_history/new_features/wide_column_full_merge.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 unreleased_history/new_features/wide_column_full_merge.md diff --git a/unreleased_history/new_features/wide_column_full_merge.md b/unreleased_history/new_features/wide_column_full_merge.md new file mode 100644 index 000000000..480750820 --- /dev/null +++ b/unreleased_history/new_features/wide_column_full_merge.md @@ -0,0 +1 @@ +Added a new wide-column aware full merge API called `FullMergeV3` to `MergeOperator`. `FullMergeV3` supports wide columns both as base value and merge result, which enables the application to perform more general transformations during merges. For backward compatibility, the default implementation implements the earlier logic of applying the merge operation to the default column of any wide-column entities. Specifically, if there is no base value or the base value is a plain key-value, the default implementation falls back to `FullMergeV2`. If the base value is a wide-column entity, the default implementation invokes `FullMergeV2` to perform the merge on the default column, and leaves any other columns unchanged. From 77a1d6eafb95b2ff7dc84b9e8b46faa746f67315 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 22 Sep 2023 16:42:52 -0700 Subject: [PATCH 149/386] Fix assertion failure in AutoHCC (#11877) Summary: Example crash seen in crash test: ``` db_stress: cache/clock_cache.cc:237: bool rocksdb::clock_cache::{anonymous}::BeginSlotInsert(const rocksdb::clock_cache::ClockHandleBasicData&, rocksdb::clock_cache::ClockHandle&, uint64_t, bool*): Assertion `*already_matches == false' failed. ``` I was intentionally ignoring `already_matches` without resetting it to false for the next call. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11877 Test Plan: Reproducer no longer reproduces: ``` while ./cache_bench -cache_type=auto_hyper_clock_cache -threads=32 -populate_cache=0 -histograms=0 -report_problems -insert_percent=87 -lookup_insert_percent=2 -skew=10 -ops_per_thread=100 -cache_size=1000000; do echo hi; done ``` Reviewed By: cbi42 Differential Revision: D49562065 Pulled By: pdillinger fbshipit-source-id: 941062e6eac7a4b56157925b1cf2a0b15ff9cc9d --- cache/clock_cache.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index 32d415816..e8bce7a5b 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -2881,6 +2881,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( size_t idx = home; bool already_matches = false; + bool already_matches_ignore = false; if (TryInsert(proto, arr[idx], initial_countdown, take_ref, &already_matches)) { assert(idx == home); @@ -2888,10 +2889,12 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( return nullptr; // Here we try to populate newly-opened slots in the table, but not // when we can add something to its home slot. This makes the structure - // more performant more quickly on (initial) growth. + // more performant more quickly on (initial) growth. We ignore "already + // matches" in this case because it is unlikely and difficult to + // incorporate logic for here cleanly and efficiently. } else if (UNLIKELY(state.likely_empty_slot > 0) && TryInsert(proto, arr[state.likely_empty_slot], initial_countdown, - take_ref, &already_matches)) { + take_ref, &already_matches_ignore)) { idx = state.likely_empty_slot; } else { // We need to search for an available slot outside of the home. From 00868096017a1808e1f7f2e5ca022553c6888380 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 22 Sep 2023 16:43:50 -0700 Subject: [PATCH 150/386] Fix a bug with atomic_flush that causes DB to stuck after a flush failure (#11872) Summary: With atomic_flush=true, a flush job with younger memtables wait for older memtables to be installed before install its memtables. If the flush for older memtables failed, auto-recovery starts a resume thread which can becomes stuck waiting for all background work to finish (including the flush for younger memtables). If a non-recovery flush starts now and tries to flush, it can make the situation worse since it will fail due to background error but never rollback its memtable: https://github.com/facebook/rocksdb/blob/269478ee4618283cd6d710fdfea9687157a259c1/db/db_impl/db_impl_compaction_flush.cc#L725 This prevents any future flush to pick old memtables. A more detailed repro is in unit test. This PR fixes this issue by 1. Ensure we rollback memtables if an atomic flush fails due to background error 2. When there is a background error, abort atomic flushes that are waiting for older memtables to be installed 3. Do not schedule non-recovery flushes when there is a background error that stops background work There was another issue with atomic_flush=true where DB can hang during DB close, see more in #11867. The fix in this PR, specifically fix 2 above, should be enough to resolve it too. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11872 Test Plan: new unit test. Reviewed By: jowlyzhang Differential Revision: D49556867 Pulled By: cbi42 fbshipit-source-id: 4a0210ff28a8552a99ece7fbb0f574fd24b4da3f --- db/db_flush_test.cc | 89 ++++++++++++++- db/db_impl/db_impl.cc | 26 ++++- db/db_impl/db_impl_compaction_flush.cc | 104 +++++++++++++++--- db/error_handler.cc | 1 + db/flush_job.cc | 5 +- db/memtable_list.cc | 5 +- include/rocksdb/listener.h | 3 + .../100_atomic_flush_db_stuck_fix.md | 1 + 8 files changed, 210 insertions(+), 24 deletions(-) create mode 100644 unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md diff --git a/db/db_flush_test.cc b/db/db_flush_test.cc index d10bd3180..8537af84d 100644 --- a/db/db_flush_test.cc +++ b/db/db_flush_test.cc @@ -3358,18 +3358,24 @@ TEST_F(DBFlushTest, NonAtomicNormalFlushAbortWhenBGError) { SyncPoint::GetInstance()->EnableProcessing(); SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:RecoverSuccess", + {{"Let error recovery start", + "RecoverFromRetryableBGIOError:BeforeStart"}, + {"RecoverFromRetryableBGIOError:RecoverSuccess", "Wait for error recover"}}); ASSERT_OK(Put(Key(1), "val1")); // trigger bg flush0 for mem0 ASSERT_OK(Put(Key(2), "val2")); + // Not checking status since this wait can finish before flush starts. dbfull()->TEST_WaitForFlushMemTable().PermitUncheckedError(); // trigger bg flush1 for mem1, should see bg error and abort // before picking a memtable to flush ASSERT_OK(Put(Key(3), "val3")); + ASSERT_NOK(dbfull()->TEST_WaitForFlushMemTable()); + ASSERT_EQ(0, NumTableFilesAtLevel(0)); + TEST_SYNC_POINT("Let error recovery start"); TEST_SYNC_POINT("Wait for error recover"); // Recovery flush writes 2 memtables together into 1 file. ASSERT_EQ(1, NumTableFilesAtLevel(0)); @@ -3379,6 +3385,87 @@ TEST_F(DBFlushTest, NonAtomicNormalFlushAbortWhenBGError) { SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBFlushTest, DBStuckAfterAtomicFlushError) { + // Test for a bug with atomic flush where DB can become stuck + // after a flush error. A repro timeline: + // + // Start Flush0 for mem0 + // Start Flush1 for mem1 + // Now Flush1 will wait for Flush0 to install mem0 + // Flush0 finishes with retryable IOError, rollbacks mem0 + // Resume starts and waits for background job to finish, i.e., Flush1 + // Fill memtable again, trigger Flush2 for mem0 + // Flush2 will get error status, and not rollback mem0, see code in + // https://github.com/facebook/rocksdb/blob/b927ba5936216861c2c35ab68f50ba4a78e65747/db/db_impl/db_impl_compaction_flush.cc#L725 + // + // DB is stuck since mem0 can never be picked now + // + // The fix is to rollback mem0 in Flush2, and let Flush1 also abort upon + // background error besides waiting for older memtables to be installed. + // The recovery flush in this case should pick up all memtables + // and write them to a single L0 file. + Options opts = CurrentOptions(); + opts.atomic_flush = true; + opts.memtable_factory.reset(test::NewSpecialSkipListFactory(1)); + opts.max_write_buffer_number = 64; + opts.max_background_flushes = 4; + env_->SetBackgroundThreads(4, Env::HIGH); + DestroyAndReopen(opts); + + std::atomic_int flush_count = 0; + SyncPoint::GetInstance()->ClearAllCallBacks(); + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->SetCallBack( + "FlushJob::WriteLevel0Table:s", [&](void* s_ptr) { + int c = flush_count.fetch_add(1); + if (c == 0) { + Status* s = (Status*)(s_ptr); + IOStatus io_error = IOStatus::IOError("injected foobar"); + io_error.SetRetryable(true); + *s = io_error; + TEST_SYNC_POINT("Let flush for mem1 start"); + // Wait for Flush1 to start waiting to install flush result + TEST_SYNC_POINT("Wait for flush for mem1"); + } + }); + SyncPoint::GetInstance()->LoadDependency( + {{"Let flush for mem1 start", "Flush for mem1"}, + {"DBImpl::AtomicFlushMemTablesToOutputFiles:WaitCV", + "Wait for flush for mem1"}, + {"RecoverFromRetryableBGIOError:BeforeStart", + "Wait for resume to start"}, + {"Recovery should continue here", + "RecoverFromRetryableBGIOError:BeforeStart2"}, + {"RecoverFromRetryableBGIOError:RecoverSuccess", + "Wait for error recover"}}); + SyncPoint::GetInstance()->EnableProcessing(); + ASSERT_OK(Put(Key(1), "val1")); + // trigger Flush0 for mem0 + ASSERT_OK(Put(Key(2), "val2")); + + // trigger Flush1 for mem1 + TEST_SYNC_POINT("Flush for mem1"); + ASSERT_OK(Put(Key(3), "val3")); + + // Wait until resume started to schedule another flush + TEST_SYNC_POINT("Wait for resume to start"); + // This flush should not be scheduled due to bg error + ASSERT_OK(Put(Key(4), "val4")); + + // TEST_WaitForBackgroundWork() returns background error + // after all background work is done. + ASSERT_NOK(dbfull()->TEST_WaitForBackgroundWork()); + // Flush should abort and not writing any table + ASSERT_EQ(0, NumTableFilesAtLevel(0)); + + // Wait until this flush is done. + TEST_SYNC_POINT("Recovery should continue here"); + TEST_SYNC_POINT("Wait for error recover"); + // error recovery can schedule new flushes, but should not + // encounter error + ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork()); + ASSERT_EQ(1, NumTableFilesAtLevel(0)); +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 6e2375ebe..ace2899e9 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -454,10 +454,30 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { s = Status::ShutdownInProgress(); } if (s.ok()) { - for (auto cfd : *versions_->GetColumnFamilySet()) { - SchedulePendingCompaction(cfd); + // Since we drop all non-recovery flush requests during recovery, + // and new memtable may fill up during recovery, + // schedule one more round of flush. + FlushOptions flush_opts; + flush_opts.allow_write_stall = false; + flush_opts.wait = false; + Status status = FlushAllColumnFamilies( + flush_opts, FlushReason::kCatchUpAfterErrorRecovery); + if (!status.ok()) { + // FlushAllColumnFamilies internally should take care of setting + // background error if needed. + ROCKS_LOG_INFO(immutable_db_options_.info_log, + "The catch up flush after successful recovery failed [%s]", + s.ToString().c_str()); + } + // FlushAllColumnFamilies releases and re-acquires mutex. + if (shutdown_initiated_) { + s = Status::ShutdownInProgress(); + } else { + for (auto cfd : *versions_->GetColumnFamilySet()) { + SchedulePendingCompaction(cfd); + } + MaybeScheduleFlushOrCompaction(); } - MaybeScheduleFlushOrCompaction(); } // Wake up any waiters - in this case, it could be the shutdown thread diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 8e4f70682..b610b8d33 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -286,9 +286,10 @@ Status DBImpl::FlushMemTableToOutputFile( TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"); // Exit a flush due to bg error should not set bg error again. bool skip_set_bg_error = false; - if (s.ok() && flush_reason != FlushReason::kErrorRecovery && - flush_reason != FlushReason::kErrorRecoveryRetryFlush && - !error_handler_.GetBGError().ok()) { + if (s.ok() && !error_handler_.GetBGError().ok() && + error_handler_.IsBGWorkStopped() && + flush_reason != FlushReason::kErrorRecovery && + flush_reason != FlushReason::kErrorRecoveryRetryFlush) { // Error recovery in progress, should not pick memtable which excludes // them from being picked up by recovery flush. // This ensures that when bg error is set, no new flush can pick @@ -296,6 +297,9 @@ Status DBImpl::FlushMemTableToOutputFile( skip_set_bg_error = true; s = error_handler_.GetBGError(); assert(!s.ok()); + ROCKS_LOG_BUFFER(log_buffer, + "[JOB %d] Skip flush due to background error %s", + job_context->job_id, s.ToString().c_str()); } if (s.ok()) { @@ -573,6 +577,21 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( pick_status.push_back(false); } + bool flush_for_recovery = + bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery || + bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecoveryRetryFlush; + bool skip_set_bg_error = false; + + if (s.ok() && !error_handler_.GetBGError().ok() && + error_handler_.IsBGWorkStopped() && !flush_for_recovery) { + s = error_handler_.GetBGError(); + skip_set_bg_error = true; + assert(!s.ok()); + ROCKS_LOG_BUFFER(log_buffer, + "[JOB %d] Skip flush due to background error %s", + job_context->job_id, s.ToString().c_str()); + } + if (s.ok()) { for (int i = 0; i != num_cfs; ++i) { jobs[i]->PickMemTable(); @@ -637,7 +656,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( } } } - } else { + } else if (!skip_set_bg_error) { + // When `skip_set_bg_error` is true, no memtable is picked so + // there is no need to call Cancel() or RollbackMemtableFlush(). + // // Need to undo atomic flush if something went wrong, i.e. s is not OK and // it is not because of CF drop. // Have to cancel the flush jobs that have NOT executed because we need to @@ -693,10 +715,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( }; bool resuming_from_bg_err = - error_handler_.IsDBStopped() || - (bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery || - bg_flush_args[0].flush_reason_ == - FlushReason::kErrorRecoveryRetryFlush); + error_handler_.IsDBStopped() || flush_for_recovery; while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) { std::pair res = wait_to_install_func(); @@ -707,15 +726,27 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( s = res.first; break; } else if (!res.second) { + // we are the oldest immutable memtable + break; + } + // We are not the oldest immutable memtable + TEST_SYNC_POINT_CALLBACK( + "DBImpl::AtomicFlushMemTablesToOutputFiles:WaitCV", &res); + // + // If bg work is stopped, recovery thread first calls + // WaitForBackgroundWork() before proceeding to flush for recovery. This + // flush can block WaitForBackgroundWork() while waiting for recovery + // flush to install result. To avoid this deadlock, we should abort here + // if there is background error. + if (!flush_for_recovery && error_handler_.IsBGWorkStopped() && + !error_handler_.GetBGError().ok()) { + s = error_handler_.GetBGError(); + assert(!s.ok()); break; } atomic_flush_install_cv_.Wait(); - resuming_from_bg_err = - error_handler_.IsDBStopped() || - (bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery || - bg_flush_args[0].flush_reason_ == - FlushReason::kErrorRecoveryRetryFlush); + resuming_from_bg_err = error_handler_.IsDBStopped() || flush_for_recovery; } if (!resuming_from_bg_err) { @@ -731,6 +762,17 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( // installation. s = error_handler_.GetRecoveryError(); } + // Since we are not installing these memtables, need to rollback + // to allow future flush job to pick up these memtables. + if (!s.ok()) { + for (int i = 0; i != num_cfs; ++i) { + assert(exec_status[i].first); + assert(exec_status[i].second.ok()); + auto& mems = jobs[i]->GetMemTables(); + cfds[i]->imm()->RollbackMemtableFlush( + mems, /*rollback_succeeding_memtables=*/false); + } + } } if (s.ok()) { @@ -834,7 +876,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( // Need to undo atomic flush if something went wrong, i.e. s is not OK and // it is not because of CF drop. - if (!s.ok() && !s.IsColumnFamilyDropped()) { + if (!s.ok() && !s.IsColumnFamilyDropped() && !skip_set_bg_error) { if (log_io_s.ok()) { // Error while writing to MANIFEST. // In fact, versions_->io_status() can also be the result of renaming @@ -2233,9 +2275,13 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, WaitForPendingWrites(); if (flush_reason != FlushReason::kErrorRecoveryRetryFlush && + flush_reason != FlushReason::kCatchUpAfterErrorRecovery && (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) { // Note that, when flush reason is kErrorRecoveryRetryFlush, during the // auto retry resume, we want to avoid creating new small memtables. + // If flush reason is kCatchUpAfterErrorRecovery, we try to flush any new + // memtable that filled up during recovery, and we also want to avoid + // switching memtable to create small memtables. // Therefore, SwitchMemtable will not be called. Also, since ResumeImpl // will iterate through all the CFs and call FlushMemtable during auto // retry resume, it is possible that in some CFs, @@ -2426,7 +2472,8 @@ Status DBImpl::AtomicFlushMemTables( for (auto cfd : cfds) { if ((cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) || - flush_reason == FlushReason::kErrorRecoveryRetryFlush) { + flush_reason == FlushReason::kErrorRecoveryRetryFlush || + flush_reason == FlushReason::kCatchUpAfterErrorRecovery) { continue; } cfd->Ref(); @@ -2694,6 +2741,11 @@ void DBImpl::MaybeScheduleFlushOrCompaction() { // There has been a hard error and this call is not part of the recovery // sequence. Bail out here so we don't get into an endless loop of // scheduling BG work which will again call this function + // + // Note that a non-recovery flush can still be scheduled if + // error_handler_.IsRecoveryInProgress() returns true. We rely on + // BackgroundCallFlush() to check flush reason and drop non-recovery + // flushes. return; } else if (shutting_down_.load(std::memory_order_acquire)) { // DB is being deleted; no more background compactions @@ -3023,6 +3075,24 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context, // This cfd is already referenced FlushRequest flush_req = PopFirstFromFlushQueue(); FlushReason flush_reason = flush_req.flush_reason; + if (!error_handler_.GetBGError().ok() && error_handler_.IsBGWorkStopped() && + flush_reason != FlushReason::kErrorRecovery && + flush_reason != FlushReason::kErrorRecoveryRetryFlush) { + // Stop non-recovery flush when bg work is stopped + // Note that we drop the flush request here. + // Recovery thread should schedule further flushes after bg error + // is cleared. + status = error_handler_.GetBGError(); + assert(!status.ok()); + ROCKS_LOG_BUFFER(log_buffer, + "[JOB %d] Abort flush due to background error %s", + job_context->job_id, status.ToString().c_str()); + *reason = flush_reason; + for (auto item : flush_req.cfd_to_max_mem_id_to_persist) { + item.first->UnrefAndTryDelete(); + } + return status; + } if (!immutable_db_options_.atomic_flush && ShouldRescheduleFlushRequestToRetainUDT(flush_req)) { assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1); @@ -3165,9 +3235,9 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) { bg_cv_.SignalAll(); // In case a waiter can proceed despite the error mutex_.Unlock(); ROCKS_LOG_ERROR(immutable_db_options_.info_log, - "Waiting after background flush error: %s" + "[JOB %d] Waiting after background flush error: %s" "Accumulated background error counts: %" PRIu64, - s.ToString().c_str(), error_cnt); + job_context.job_id, s.ToString().c_str(), error_cnt); log_buffer.FlushBufferToLog(); LogFlush(immutable_db_options_.info_log); immutable_db_options_.clock->SleepForMicroseconds(1000000); diff --git a/db/error_handler.cc b/db/error_handler.cc index efadfbc80..d79455509 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -670,6 +670,7 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( // mutex is released. void ErrorHandler::RecoverFromRetryableBGIOError() { TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart"); + TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart2"); InstrumentedMutexLock l(db_mutex_); if (end_recovery_) { EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_, diff --git a/db/flush_job.cc b/db/flush_job.cc index 046abcd10..69303081f 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -79,6 +79,8 @@ const char* GetFlushReasonString(FlushReason flush_reason) { return "Error Recovery Retry Flush"; case FlushReason::kWalFull: return "WAL Full"; + case FlushReason::kCatchUpAfterErrorRecovery: + return "Catch Up After Error Recovery"; default: return "Invalid"; } @@ -311,7 +313,8 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta, if (!db_options_.atomic_flush && flush_reason_ != FlushReason::kErrorRecovery && flush_reason_ != FlushReason::kErrorRecoveryRetryFlush && - error_handler && !error_handler->GetBGError().ok()) { + error_handler && !error_handler->GetBGError().ok() && + error_handler->IsBGWorkStopped()) { cfd_->imm()->RollbackMemtableFlush( mems_, /*rollback_succeeding_memtables=*/!db_options_.atomic_flush); s = error_handler->GetBGError(); diff --git a/db/memtable_list.cc b/db/memtable_list.cc index da4721d13..dfa93461b 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -438,7 +438,6 @@ void MemTableList::RollbackMemtableFlush(const autovector& mems, TEST_SYNC_POINT("RollbackMemtableFlush"); AutoThreadOperationStageUpdater stage_updater( ThreadStatus::STAGE_MEMTABLE_ROLLBACK); - assert(!mems.empty()); #ifndef NDEBUG for (MemTable* m : mems) { assert(m->flush_in_progress_); @@ -483,7 +482,9 @@ void MemTableList::RollbackMemtableFlush(const autovector& mems, num_flush_not_started_++; } } - imm_flush_needed.store(true, std::memory_order_release); + if (!mems.empty()) { + imm_flush_needed.store(true, std::memory_order_release); + } } // Try record a successful flush in the manifest file. It might just return diff --git a/include/rocksdb/listener.h b/include/rocksdb/listener.h index 27f7d8a17..2cc30d871 100644 --- a/include/rocksdb/listener.h +++ b/include/rocksdb/listener.h @@ -163,6 +163,7 @@ enum class CompactionReason : int { const char* GetCompactionReasonString(CompactionReason compaction_reason); +// When adding flush reason, make sure to also update `GetFlushReasonString()`. enum class FlushReason : int { kOthers = 0x00, kGetLiveFiles = 0x01, @@ -180,6 +181,8 @@ enum class FlushReason : int { // will not be called to avoid many small immutable memtables. kErrorRecoveryRetryFlush = 0xc, kWalFull = 0xd, + // SwitchMemtable will not be called for this flush reason. + kCatchUpAfterErrorRecovery = 0xe, }; const char* GetFlushReasonString(FlushReason flush_reason); diff --git a/unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md b/unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md new file mode 100644 index 000000000..82893ae65 --- /dev/null +++ b/unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md @@ -0,0 +1 @@ +* Fix a bug with atomic_flush=true that can cause DB to stuck after a flush fails (#11872). \ No newline at end of file From 552bc01669e7e0ea19c08651372e6391391c1feb Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 22 Sep 2023 17:28:36 -0700 Subject: [PATCH 151/386] Surface timestamp from db to the transaction iterator (#11847) Summary: Provide an override implementation of `Iterator::timestamp` API for `BaseDeltaIterator` so that timestamp read from DB can be surfaced by an iterator created from inside of a transaction. The behavior of the API follows this rule: 1) If the entry is read from within the transaction, an empty `Slice` is returned as the timestamp, regardless of whether `Transaction::SetCommitTimestamp` is called. 2) If the entry is read from the DB, the corresponding `DBIter::timestamp()` API's result is returned. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11847 Test Plan: make all check add some unit test Reviewed By: ltamasi Differential Revision: D49377359 Pulled By: jowlyzhang fbshipit-source-id: 1511ead262ce3515ee6c6e0f829f1b69a10fe994 --- .../write_committed_transaction_ts_test.cc | 82 ++++++++++++++++++- .../write_batch_with_index_internal.cc | 4 + .../write_batch_with_index_internal.h | 1 + 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/utilities/transactions/write_committed_transaction_ts_test.cc b/utilities/transactions/write_committed_transaction_ts_test.cc index dc25b9da8..595e7ad1a 100644 --- a/utilities/transactions/write_committed_transaction_ts_test.cc +++ b/utilities/transactions/write_committed_transaction_ts_test.cc @@ -98,6 +98,38 @@ TEST_P(WriteCommittedTxnWithTsTest, SanityChecks) { txn1.reset(); } +void CheckKeyValueTsWithIterator( + Iterator* iter, + std::vector> entries) { + size_t num_entries = entries.size(); + // test forward iteration + for (size_t i = 0; i < num_entries; i++) { + auto [key, value, timestamp] = entries[i]; + if (i == 0) { + iter->Seek(key); + } else { + iter->Next(); + } + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), key); + ASSERT_EQ(iter->value(), value); + ASSERT_EQ(iter->timestamp(), timestamp); + } + // test backward iteration + for (size_t i = 0; i < num_entries; i++) { + auto [key, value, timestamp] = entries[num_entries - 1 - i]; + if (i == 0) { + iter->SeekForPrev(key); + } else { + iter->Prev(); + } + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), key); + ASSERT_EQ(iter->value(), value); + ASSERT_EQ(iter->timestamp(), timestamp); + } +} + TEST_P(WriteCommittedTxnWithTsTest, ReOpenWithTimestamp) { options.merge_operator = MergeOperators::CreateUInt64AddOperator(); ASSERT_OK(ReOpenNoDelete()); @@ -128,17 +160,57 @@ TEST_P(WriteCommittedTxnWithTsTest, ReOpenWithTimestamp) { std::unique_ptr txn1( NewTxn(WriteOptions(), TransactionOptions())); assert(txn1); + + std::string write_ts; + uint64_t write_ts_int = 23; + PutFixed64(&write_ts, write_ts_int); + ReadOptions read_opts; + std::string read_ts; + PutFixed64(&read_ts, write_ts_int + 1); + Slice read_ts_slice = read_ts; + read_opts.timestamp = &read_ts_slice; + + ASSERT_OK(txn1->Put(handles_[1], "bar", "value0")); ASSERT_OK(txn1->Put(handles_[1], "foo", "value1")); + // (key, value, ts) pairs to check. + std::vector> + entries_to_check; + entries_to_check.emplace_back("bar", "value0", ""); + entries_to_check.emplace_back("foo", "value1", ""); + { std::string buf; PutFixed64(&buf, 23); ASSERT_OK(txn1->Put("id", buf)); ASSERT_OK(txn1->Merge("id", buf)); } + + // Check (key, value, ts) with overwrites in txn before `SetCommitTimestamp`. + if (std::get<2>(GetParam())) { // enable_indexing = true + std::unique_ptr iter(txn1->GetIterator(read_opts, handles_[1])); + CheckKeyValueTsWithIterator(iter.get(), entries_to_check); + } + ASSERT_OK(txn1->SetName("txn1")); ASSERT_OK(txn1->Prepare()); - ASSERT_OK(txn1->SetCommitTimestamp(/*ts=*/23)); + ASSERT_OK(txn1->SetCommitTimestamp(write_ts_int)); + + // Check (key, value, ts) with overwrites in txn after `SetCommitTimestamp`. + if (std::get<2>(GetParam())) { // enable_indexing = true + std::unique_ptr iter(txn1->GetIterator(read_opts, handles_[1])); + CheckKeyValueTsWithIterator(iter.get(), entries_to_check); + } + ASSERT_OK(txn1->Commit()); + entries_to_check.clear(); + entries_to_check.emplace_back("bar", "value0", write_ts); + entries_to_check.emplace_back("foo", "value1", write_ts); + + // Check (key, value, ts) pairs with overwrites in txn after `Commit`. + { + std::unique_ptr iter(txn1->GetIterator(read_opts, handles_[1])); + CheckKeyValueTsWithIterator(iter.get(), entries_to_check); + } txn1.reset(); { @@ -159,6 +231,14 @@ TEST_P(WriteCommittedTxnWithTsTest, ReOpenWithTimestamp) { assert(result); ASSERT_EQ(46, ival); } + + // Check (key, value, ts) pairs without overwrites in txn. + { + std::unique_ptr txn2( + NewTxn(WriteOptions(), TransactionOptions())); + std::unique_ptr iter(txn2->GetIterator(read_opts, handles_[1])); + CheckKeyValueTsWithIterator(iter.get(), entries_to_check); + } } TEST_P(WriteCommittedTxnWithTsTest, RecoverFromWal) { diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index 7ffc9ea6f..4e9c35743 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -174,6 +174,10 @@ Slice BaseDeltaIterator::value() const { } } +Slice BaseDeltaIterator::timestamp() const { + return current_at_base_ ? base_iterator_->timestamp() : Slice(); +} + Status BaseDeltaIterator::status() const { if (!status_.ok()) { return status_; diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index 3798532b4..c8c201804 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -50,6 +50,7 @@ class BaseDeltaIterator : public Iterator { void Prev() override; Slice key() const override; Slice value() const override; + Slice timestamp() const override; Status status() const override; void Invalidate(Status s); From 48589b961f25aff08d4d928b1bc4b5ae0500eed6 Mon Sep 17 00:00:00 2001 From: anand76 Date: Fri, 22 Sep 2023 18:07:46 -0700 Subject: [PATCH 152/386] Fix updating the capacity of a tiered cache (#11873) Summary: Updating the tiered cache (cache allocated using ```NewTieredCache()```) by calling ```SetCapacity()``` on it was not working properly. The initial creation would set the primary cache capacity to the combined primary and compressed secondary cache capacity. But ```SetCapacity()``` would just set the primary cache capacity, with no way to change the secondary cache capacity. Additionally, the API was confusing, since the primary and compressed secondary capacities would be specified separately during creation, but ```SetCapacity``` took the combined capacity. With this fix, the user always specifies the total budget and compressed secondary cache ratio on creation. Subsequently, `SetCapacity` will distribute the new capacity across the two caches by the same ratio. The `NewTieredCache` API has been changed to take the total cache capacity (inclusive of both the primary and the compressed secondary cache) and the ratio of total capacity to allocate to the compressed cache. These are specified in `TieredCacheOptions`. Any capacity specified in `LRUCacheOptions`, `HyperClockCacheOptions` and `CompressedSecondaryCacheOptions` is ignored. A new API, `UpdateTieredCache` is provided to dynamically update the total capacity, ratio of compressed cache, and admission policy. Tests: New unit tests Pull Request resolved: https://github.com/facebook/rocksdb/pull/11873 Reviewed By: akankshamahajan15 Differential Revision: D49562250 Pulled By: anand1976 fbshipit-source-id: 57033bc713b68d5da6292207765a6b3dbe539ddf --- cache/cache_reservation_manager.h | 7 +- cache/compressed_secondary_cache.cc | 4 +- cache/compressed_secondary_cache_test.cc | 157 +++++++++++++- cache/secondary_cache_adapter.cc | 193 +++++++++++++++++- cache/secondary_cache_adapter.h | 10 + cache/tiered_secondary_cache_test.cc | 7 +- include/rocksdb/cache.h | 29 ++- .../bug_fixes/dyn_tiered_cache_capacity.md | 1 + .../dyn_tiered_cache_update.md | 1 + 9 files changed, 383 insertions(+), 26 deletions(-) create mode 100644 unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md create mode 100644 unreleased_history/public_api_changes/dyn_tiered_cache_update.md diff --git a/cache/cache_reservation_manager.h b/cache/cache_reservation_manager.h index 08bf59b00..a7b06dea2 100644 --- a/cache/cache_reservation_manager.h +++ b/cache/cache_reservation_manager.h @@ -273,9 +273,10 @@ class ConcurrentCacheReservationManager std::size_t total_mem_used = cache_res_mgr_->GetTotalMemoryUsed(); Status s; if (!increase) { - assert(total_mem_used >= memory_used_delta); - s = cache_res_mgr_->UpdateCacheReservation(total_mem_used - - memory_used_delta); + s = cache_res_mgr_->UpdateCacheReservation( + (total_mem_used > memory_used_delta) + ? (total_mem_used - memory_used_delta) + : 0); } else { s = cache_res_mgr_->UpdateCacheReservation(total_mem_used + memory_used_delta); diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index 3a35945e2..32b30f0ed 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -26,9 +26,7 @@ CompressedSecondaryCache::CompressedSecondaryCache( cache_))), disable_cache_(opts.capacity == 0) {} -CompressedSecondaryCache::~CompressedSecondaryCache() { - assert(cache_res_mgr_->GetTotalReservedCacheSize() == 0); -} +CompressedSecondaryCache::~CompressedSecondaryCache() {} std::unique_ptr CompressedSecondaryCache::Lookup( const Slice& key, const Cache::CacheItemHelper* helper, diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index eb31738f7..d82be1073 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -989,11 +989,11 @@ class CompressedSecCacheTestWithTiered CompressedSecCacheTestWithTiered() { LRUCacheOptions lru_opts; HyperClockCacheOptions hcc_opts( - /*_capacity=*/70 << 20, + /*_capacity=*/0, /*_estimated_entry_charge=*/256 << 10, /*_num_shard_bits=*/0); TieredCacheOptions opts; - lru_opts.capacity = 70 << 20; + lru_opts.capacity = 0; lru_opts.num_shard_bits = 0; lru_opts.high_pri_pool_ratio = 0; opts.cache_type = std::get<0>(GetParam()); @@ -1004,8 +1004,10 @@ class CompressedSecCacheTestWithTiered } opts.adm_policy = std::get<1>(GetParam()); ; - opts.comp_cache_opts.capacity = 30 << 20; + opts.comp_cache_opts.capacity = 0; opts.comp_cache_opts.num_shard_bits = 0; + opts.total_capacity = 100 << 20; + opts.compressed_secondary_ratio = 0.3; cache_ = NewTieredCache(opts); cache_res_mgr_ = std::make_shared>( @@ -1023,7 +1025,7 @@ class CompressedSecCacheTestWithTiered protected: CacheReservationManager* cache_res_mgr() { return cache_res_mgr_.get(); } - Cache* GetTieredCache() { return cache_.get(); } + std::shared_ptr GetTieredCache() { return cache_; } Cache* GetCache() { return static_cast_with_check( @@ -1110,7 +1112,7 @@ TEST_P(CompressedSecCacheTestWithTiered, AdmissionPolicy) { return; } - Cache* tiered_cache = GetTieredCache(); + Cache* tiered_cache = GetTieredCache().get(); Cache* cache = GetCache(); std::vector keys; std::vector vals; @@ -1165,6 +1167,151 @@ TEST_P(CompressedSecCacheTestWithTiered, AdmissionPolicy) { ASSERT_EQ(handle1, nullptr); } +TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdate) { + CompressedSecondaryCache* sec_cache = + reinterpret_cast(GetSecondaryCache()); + std::shared_ptr tiered_cache = GetTieredCache(); + + // Use EXPECT_PRED3 instead of EXPECT_NEAR to void too many size_t to + // double explicit casts + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (30 << 20), + GetPercent(30 << 20, 1)); + size_t sec_capacity; + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (30 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 130 << 20)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (39 << 20), + GetPercent(39 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (39 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 70 << 20)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (21 << 20), + GetPercent(21 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (21 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (30 << 20), + GetPercent(30 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (30 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.4)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (40 << 20), + GetPercent(40 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (40 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.2)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (20 << 20), + GetPercent(20 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (20 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 1.0)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (100 << 20), + GetPercent(100 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 100 << 20); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.0)); + // Only check usage for LRU cache. HCC shows a 64KB usage for some reason + if (std::get<0>(GetParam()) == PrimaryCacheType::kCacheTypeLRU) { + ASSERT_EQ(GetCache()->GetUsage(), 0); + } + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 0); + + ASSERT_NOK(UpdateTieredCache(tiered_cache, -1, 0.3)); + // Only check usage for LRU cache. HCC shows a 64KB usage for some reason + if (std::get<0>(GetParam()) == PrimaryCacheType::kCacheTypeLRU) { + ASSERT_EQ(GetCache()->GetUsage(), 0); + } + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 0); +} + +TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) { + CompressedSecondaryCache* sec_cache = + reinterpret_cast(GetSecondaryCache()); + std::shared_ptr tiered_cache = GetTieredCache(); + + ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(10 << 20)); + // Use EXPECT_PRED3 instead of EXPECT_NEAR to void too many size_t to + // double explicit casts + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (37 << 20), + GetPercent(37 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (3 << 20), + GetPercent(3 << 20, 1)); + size_t sec_capacity; + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (30 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 70 << 20)); + // Only check usage for LRU cache. HCC is slightly off for some reason + if (std::get<0>(GetParam()) == PrimaryCacheType::kCacheTypeLRU) { + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (28 << 20), + GetPercent(28 << 20, 1)); + } + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (3 << 20), + GetPercent(3 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (21 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 130 << 20)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (46 << 20), + GetPercent(46 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (3 << 20), + GetPercent(3 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (39 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (37 << 20), + GetPercent(37 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (3 << 20), + GetPercent(3 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (30 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.39)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (45 << 20), + GetPercent(45 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (4 << 20), + GetPercent(4 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (39 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.2)); + // Only check usage for LRU cache. HCC is slightly off for some reason + if (std::get<0>(GetParam()) == PrimaryCacheType::kCacheTypeLRU) { + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (28 << 20), + GetPercent(28 << 20, 1)); + } + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (2 << 20), + GetPercent(2 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (20 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 1.0)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (100 << 20), + GetPercent(100 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (10 << 20), + GetPercent(10 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 100 << 20); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.0)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (10 << 20), + GetPercent(10 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 0); + + ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(0)); +} + INSTANTIATE_TEST_CASE_P( CompressedSecCacheTests, CompressedSecCacheTestWithTiered, ::testing::Values( diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index dba68e8d2..28027811a 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -18,6 +18,7 @@ struct Dummy { }; const Dummy kDummy{}; Cache::ObjectPtr const kDummyObj = const_cast(&kDummy); +const char* kTieredCacheName = "TieredCache"; } // namespace // When CacheWithSecondaryAdapter is constructed with the distribute_cache_res @@ -108,7 +109,7 @@ CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() { // use after free target_->SetEvictionCallback({}); #ifndef NDEBUG - if (distribute_cache_res_) { + if (distribute_cache_res_ && !ratio_changed_) { size_t sec_capacity = 0; Status s = secondary_cache_->GetCapacity(sec_capacity); assert(s.ok()); @@ -416,37 +417,188 @@ std::string CacheWithSecondaryAdapter::GetPrintableOptions() const { } const char* CacheWithSecondaryAdapter::Name() const { - // To the user, at least for now, configure the underlying cache with - // a secondary cache. So we pretend to be that cache - return target_->Name(); + if (distribute_cache_res_) { + return kTieredCacheName; + } else { + // To the user, at least for now, configure the underlying cache with + // a secondary cache. So we pretend to be that cache + return target_->Name(); + } +} + +// Update the total cache capacity. If we're distributing cache reservations +// to both primary and secondary, then update the pri_cache_res_reservation +// as well. At the moment, we don't have a good way of handling the case +// where the new capacity < total cache reservations. +void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { + size_t sec_capacity = static_cast( + capacity * (distribute_cache_res_ ? sec_cache_res_ratio_ : 0.0)); + size_t old_sec_capacity = 0; + + if (distribute_cache_res_) { + MutexLock m(&mutex_); + + Status s = secondary_cache_->GetCapacity(old_sec_capacity); + if (!s.ok()) { + return; + } + if (old_sec_capacity > sec_capacity) { + // We're shrinking the cache. We do things in the following order to + // avoid a temporary spike in usage over the configured capacity - + // 1. Lower the secondary cache capacity + // 2. Credit an equal amount (by decreasing pri_cache_res_) to the + // primary cache + // 3. Decrease the primary cache capacity to the total budget + s = secondary_cache_->SetCapacity(sec_capacity); + if (s.ok()) { + s = pri_cache_res_->UpdateCacheReservation( + old_sec_capacity - sec_capacity, + /*increase=*/false); + assert(s.ok()); + target_->SetCapacity(capacity); + } + } else { + // We're expanding the cache. Do it in the following order to avoid + // unnecessary evictions - + // 1. Increase the primary cache capacity to total budget + // 2. Reserve additional memory in primary on behalf of secondary (by + // increasing pri_cache_res_ reservation) + // 3. Increase secondary cache capacity + target_->SetCapacity(capacity); + s = pri_cache_res_->UpdateCacheReservation( + sec_capacity - old_sec_capacity, + /*increase=*/true); + assert(s.ok()); + s = secondary_cache_->SetCapacity(sec_capacity); + assert(s.ok()); + } + } else { + // No cache reservation distribution. Just set the primary cache capacity. + target_->SetCapacity(capacity); + } +} + +// Update the secondary/primary allocation ratio (remember, the primary +// capacity is the total memory budget when distribute_cache_res_ is true). +// When the ratio changes, we may accumulate some error in the calculations +// for secondary cache inflate/deflate and pri_cache_res_ reservations. +// This is due to the rounding of the reservation amount. +// +// We rely on the current pri_cache_res_ total memory used to estimate the +// new secondary cache reservation after the ratio change. For this reason, +// once the ratio is lowered to 0.0 (effectively disabling the secondary +// cache and pri_cache_res_ total mem used going down to 0), we cannot +// increase the ratio and re-enable it, We might remove this limitation +// in the future. +Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( + double compressed_secondary_ratio) { + if (!distribute_cache_res_ || sec_cache_res_ratio_ == 0.0) { + return Status::NotSupported(); + } + + MutexLock m(&mutex_); + size_t pri_capacity = target_->GetCapacity(); + size_t sec_capacity = + static_cast(pri_capacity * compressed_secondary_ratio); + size_t old_sec_capacity; + Status s = secondary_cache_->GetCapacity(old_sec_capacity); + if (!s.ok()) { + return s; + } + + assert(old_sec_capacity >= pri_cache_res_->GetTotalMemoryUsed()); + size_t old_sec_reserved = + old_sec_capacity - pri_cache_res_->GetTotalMemoryUsed(); + // Calculate the new secondary cache reservation + size_t sec_reserved = static_cast( + old_sec_reserved * + (double)(compressed_secondary_ratio / sec_cache_res_ratio_)); + sec_cache_res_ratio_ = compressed_secondary_ratio; + if (sec_capacity > old_sec_capacity) { + // We're increasing the ratio, thus ending up with a larger secondary + // cache and a smaller usable primary cache capacity. Similar to + // SetCapacity(), we try to avoid a temporary increase in total usage + // beyond teh configured capacity - + // 1. A higher secondary cache ratio means it gets a higher share of + // cache reservations. So first account for that by deflating the + // secondary cache + // 2. Increase pri_cache_res_ reservation to reflect the new secondary + // cache utilization (increase in capacity - increase in share of cache + // reservation) + // 3. Increase secondary cache capacity + assert(sec_reserved > old_sec_reserved || sec_reserved == 0); + s = secondary_cache_->Deflate(sec_reserved - old_sec_reserved); + assert(s.ok()); + s = pri_cache_res_->UpdateCacheReservation( + (sec_capacity - old_sec_capacity) - (sec_reserved - old_sec_reserved), + /*increase=*/true); + assert(s.ok()); + s = secondary_cache_->SetCapacity(sec_capacity); + assert(s.ok()); + } else { + // We're shrinking the ratio. Try to avoid unnecessary evictions - + // 1. Lower the secondary cache capacity + // 2. Decrease pri_cache_res_ reservation to relect lower secondary + // cache utilization (decrease in capacity - decrease in share of cache + // reservations) + // 3. Inflate the secondary cache to give it back the reduction in its + // share of cache reservations + assert(old_sec_reserved > sec_reserved || sec_reserved == 0); + s = secondary_cache_->SetCapacity(sec_capacity); + if (s.ok()) { + s = pri_cache_res_->UpdateCacheReservation( + (old_sec_capacity - sec_capacity) - (old_sec_reserved - sec_reserved), + /*increase=*/false); + assert(s.ok()); + s = secondary_cache_->Inflate(old_sec_reserved - sec_reserved); + assert(s.ok()); + } + } + +#ifndef NDEBUG + // As mentioned in the function comments, we may accumulate some erros when + // the ratio is changed. We set a flag here which disables some assertions + // in the destructor + ratio_changed_ = true; +#endif + return s; +} + +Status CacheWithSecondaryAdapter::UpdateAdmissionPolicy( + TieredAdmissionPolicy adm_policy) { + adm_policy_ = adm_policy; + return Status::OK(); } -std::shared_ptr NewTieredCache(TieredCacheOptions& opts) { - if (!opts.cache_opts) { +std::shared_ptr NewTieredCache(const TieredCacheOptions& _opts) { + if (!_opts.cache_opts) { return nullptr; } - if (opts.adm_policy >= TieredAdmissionPolicy::kAdmPolicyMax) { + if (_opts.adm_policy >= TieredAdmissionPolicy::kAdmPolicyMax) { return nullptr; } + TieredCacheOptions opts = _opts; std::shared_ptr cache; if (opts.cache_type == PrimaryCacheType::kCacheTypeLRU) { LRUCacheOptions cache_opts = *(static_cast_with_check( opts.cache_opts)); - cache_opts.capacity += opts.comp_cache_opts.capacity; + cache_opts.capacity = opts.total_capacity; cache = cache_opts.MakeSharedCache(); } else if (opts.cache_type == PrimaryCacheType::kCacheTypeHCC) { HyperClockCacheOptions cache_opts = *(static_cast_with_check( opts.cache_opts)); - cache_opts.capacity += opts.comp_cache_opts.capacity; + cache_opts.capacity = opts.total_capacity; cache = cache_opts.MakeSharedCache(); } else { return nullptr; } std::shared_ptr sec_cache; + opts.comp_cache_opts.capacity = static_cast( + opts.total_capacity * opts.compressed_secondary_ratio); sec_cache = NewCompressedSecondaryCache(opts.comp_cache_opts); if (opts.nvm_sec_cache) { @@ -463,4 +615,27 @@ std::shared_ptr NewTieredCache(TieredCacheOptions& opts) { return std::make_shared( cache, sec_cache, opts.adm_policy, /*distribute_cache_res=*/true); } + +Status UpdateTieredCache(const std::shared_ptr& cache, + int64_t total_capacity, + double compressed_secondary_ratio, + TieredAdmissionPolicy adm_policy) { + if (!cache || strcmp(cache->Name(), kTieredCacheName)) { + return Status::InvalidArgument(); + } + CacheWithSecondaryAdapter* tiered_cache = + static_cast(cache.get()); + + Status s; + if (total_capacity > 0) { + tiered_cache->SetCapacity(total_capacity); + } + if (compressed_secondary_ratio >= 0.0 && compressed_secondary_ratio <= 1.0) { + s = tiered_cache->UpdateCacheReservationRatio(compressed_secondary_ratio); + } + if (adm_policy < TieredAdmissionPolicy::kAdmPolicyMax) { + s = tiered_cache->UpdateAdmissionPolicy(adm_policy); + } + return s; +} } // namespace ROCKSDB_NAMESPACE diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index c9b846df6..34d52a665 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -45,6 +45,12 @@ class CacheWithSecondaryAdapter : public CacheWrapper { const char* Name() const override; + void SetCapacity(size_t capacity) override; + + Status UpdateCacheReservationRatio(double ratio); + + Status UpdateAdmissionPolicy(TieredAdmissionPolicy adm_policy); + Cache* TEST_GetCache() { return target_.get(); } SecondaryCache* TEST_GetSecondaryCache() { return secondary_cache_.get(); } @@ -75,6 +81,10 @@ class CacheWithSecondaryAdapter : public CacheWrapper { // Fraction of a cache memory reservation to be assigned to the secondary // cache double sec_cache_res_ratio_; + port::Mutex mutex_; +#ifndef NDEBUG + bool ratio_changed_ = false; +#endif }; } // namespace ROCKSDB_NAMESPACE diff --git a/cache/tiered_secondary_cache_test.cc b/cache/tiered_secondary_cache_test.cc index 67b0e3955..69678813b 100644 --- a/cache/tiered_secondary_cache_test.cc +++ b/cache/tiered_secondary_cache_test.cc @@ -182,14 +182,17 @@ class DBTieredSecondaryCacheTest : public DBTestBase { size_t nvm_capacity) { LRUCacheOptions lru_opts; TieredCacheOptions opts; - lru_opts.capacity = pri_capacity; + lru_opts.capacity = 0; lru_opts.num_shard_bits = 0; lru_opts.high_pri_pool_ratio = 0; opts.cache_opts = &lru_opts; opts.cache_type = PrimaryCacheType::kCacheTypeLRU; opts.adm_policy = TieredAdmissionPolicy::kAdmPolicyThreeQueue; - opts.comp_cache_opts.capacity = compressed_capacity; + opts.comp_cache_opts.capacity = 0; opts.comp_cache_opts.num_shard_bits = 0; + opts.total_capacity = pri_capacity + compressed_capacity; + opts.compressed_secondary_ratio = + (double)compressed_capacity / opts.total_capacity; nvm_sec_cache_.reset(new TestSecondaryCache(nvm_capacity)); opts.nvm_sec_cache = nvm_sec_cache_; cache_ = NewTieredCache(opts); diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 061ebbb36..d3762b4a2 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -12,6 +12,7 @@ #pragma once #include +#include #include #include @@ -498,16 +499,36 @@ enum TieredAdmissionPolicy { // allocations costed to the block cache, will be distributed // proportionally across both the primary and secondary. struct TieredCacheOptions { - ShardedCacheOptions* cache_opts; - PrimaryCacheType cache_type; - TieredAdmissionPolicy adm_policy; + ShardedCacheOptions* cache_opts = nullptr; + PrimaryCacheType cache_type = PrimaryCacheType::kCacheTypeLRU; + TieredAdmissionPolicy adm_policy = TieredAdmissionPolicy::kAdmPolicyAuto; CompressedSecondaryCacheOptions comp_cache_opts; + // Any capacity specified in LRUCacheOptions, HyperClockCacheOptions and + // CompressedSecondaryCacheOptions is ignored + // The total_capacity specified here is taken as the memory budget and + // divided between the primary block cache and compressed secondary cache + size_t total_capacity = 0; + double compressed_secondary_ratio = 0.0; // An optional secondary cache that will serve as the persistent cache // tier. If present, compressed blocks will be written to this // secondary cache. std::shared_ptr nvm_sec_cache; }; +extern std::shared_ptr NewTieredCache( + const TieredCacheOptions& cache_opts); + // EXPERIMENTAL -extern std::shared_ptr NewTieredCache(TieredCacheOptions& cache_opts); +// Dynamically update some of the parameters of a TieredCache. The input +// cache shared_ptr should have been allocated using NewTieredVolatileCache. +// At the moment, there are a couple of limitations - +// 1. The total_capacity should be > the WriteBufferManager max size, if +// using the block cache charging feature +// 2. Once the compressed secondary cache is disabled by setting the +// compressed_secondary_ratio to 0.0, it cannot be dynamically re-enabled +// again +extern Status UpdateTieredCache( + const std::shared_ptr& cache, int64_t total_capacity = -1, + double compressed_secondary_ratio = std::numeric_limits::max(), + TieredAdmissionPolicy adm_policy = TieredAdmissionPolicy::kAdmPolicyMax); } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md b/unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md new file mode 100644 index 000000000..9cebc73c1 --- /dev/null +++ b/unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md @@ -0,0 +1 @@ +Updating the tiered cache (cache allocated using NewTieredCache()) by calling SetCapacity() on it was not working properly. The initial creation would set the primary cache capacity to the combined primary and compressed secondary cache capacity. But SetCapacity() would just set the primary cache capacity. With this fix, the user always specifies the total budget and compressed secondary cache ratio on creation. Subsequently, SetCapacity() will distribute the new capacity across the two caches by the same ratio. diff --git a/unreleased_history/public_api_changes/dyn_tiered_cache_update.md b/unreleased_history/public_api_changes/dyn_tiered_cache_update.md new file mode 100644 index 000000000..ebe0c3925 --- /dev/null +++ b/unreleased_history/public_api_changes/dyn_tiered_cache_update.md @@ -0,0 +1 @@ +The `NewTieredCache` API has been changed to take the total cache capacity (inclusive of both the primary and the compressed secondary cache) and the ratio of total capacity to allocate to the compressed cache. These are specified in `TieredCacheOptions`. Any capacity specified in `LRUCacheOptions`, `HyperClockCacheOptions` and `CompressedSecondaryCacheOptions` is ignored. A new API, `UpdateTieredCache` is provided to dynamically update the total capacity, ratio of compressed cache, and admission policy. From 3d67b5e8e5b44946f35c5bbfeea048df7a1d5e93 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Fri, 22 Sep 2023 18:12:08 -0700 Subject: [PATCH 153/386] Lookup ahead in block cache ahead to tune Readaheadsize (#11860) Summary: Implement block cache lookup to determine readahead_size during scans. It's enabled if auto_readahead_size, block_cache and iterate_upper_bound - all three are set. Design - 1. Whenever there is a cache miss and FilePrefetchBuffer is called, a callback is made to determine readahead_size for that prefetching. 2. The callback iterates over index and do block cache lookup for each data block handle until existing readahead_size is reached. Then It removes the cache hit data blocks from end to calculate optimized readahead_size. 3. Since index_iter_ is moved, it stores block handles in a queue, and use that queue to get block handle instead of doing index_iter_->Next(). 4. This is for Sync scans. Async scans support is in progress. NOTE: The issue right now is after Seek and Next, if Prev is called, there is no way to do Prev operation. index_iter_ is already pointing to a different block. So it returns "Not supported" in that case with error message - "auto tuning of readahead size is not supported with Prev op" Pull Request resolved: https://github.com/facebook/rocksdb/pull/11860 Test Plan: - Added new unit test - crash_tests - Running scans locally to check for any regression Reviewed By: anand1976 Differential Revision: D49548118 Pulled By: akankshamahajan15 fbshipit-source-id: f1aee409a71b4ad9e5bf3610f43edf30c6630c78 --- db_stress_tool/db_stress_test_base.cc | 3 +- db_stress_tool/no_batched_ops_stress.cc | 3 +- file/file_prefetch_buffer.cc | 4 +- file/file_prefetch_buffer.h | 20 +- file/prefetch_test.cc | 240 ++++++++++++- include/rocksdb/options.h | 2 + .../block_based/block_based_table_iterator.cc | 315 ++++++++++++++---- .../block_based/block_based_table_iterator.h | 82 ++++- table/block_based/block_based_table_reader.cc | 62 +++- table/block_based/block_based_table_reader.h | 27 +- table/block_based/block_prefetcher.cc | 23 +- table/block_based/block_prefetcher.h | 10 +- table/block_based/partitioned_filter_block.cc | 2 +- .../block_based/partitioned_index_iterator.cc | 3 +- table/block_based/partitioned_index_reader.cc | 2 +- .../auto_readahead_size.md | 1 + 16 files changed, 697 insertions(+), 102 deletions(-) create mode 100644 unreleased_history/performance_improvements/auto_readahead_size.md diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index c79c0e237..3f28b7a8e 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1375,7 +1375,8 @@ Status StressTest::TestIterate(ThreadState* thread, key, op_logs, &diverged); const bool no_reverse = - (FLAGS_memtablerep == "prefix_hash" && !expect_total_order); + (FLAGS_memtablerep == "prefix_hash" && !expect_total_order) || + FLAGS_auto_readahead_size; for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); ++i) { if (no_reverse || thread->rand.OneIn(2)) { iter->Next(); diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 92d0c9fca..80e5942e8 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1914,7 +1914,8 @@ class NonBatchedOpsStressTest : public StressTest { if (static_cast(curr) < lb) { iter->Next(); op_logs += "N"; - } else if (static_cast(curr) >= ub) { + } else if (static_cast(curr) >= ub && + !FLAGS_auto_readahead_size) { iter->Prev(); op_logs += "P"; } else { diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index f7c7f4a83..f09e57033 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -660,8 +660,8 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( return false; } } - UpdateReadAheadSizeForUpperBound(offset, n); - s = Prefetch(opts, reader, offset, n + readahead_size_); + size_t current_readahead_size = ReadAheadSizeTuning(offset, n); + s = Prefetch(opts, reader, offset, n + current_readahead_size); } if (!s.ok()) { if (status) { diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index 875f258ea..2be6b9f8a 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -90,6 +90,7 @@ class FilePrefetchBuffer { uint64_t num_file_reads_for_auto_readahead = 0, uint64_t upper_bound_offset = 0, FileSystem* fs = nullptr, SystemClock* clock = nullptr, Statistics* stats = nullptr, + const std::function& cb = nullptr, FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) : curr_(0), readahead_size_(readahead_size), @@ -108,7 +109,8 @@ class FilePrefetchBuffer { clock_(clock), stats_(stats), usage_(usage), - upper_bound_offset_(upper_bound_offset) { + upper_bound_offset_(upper_bound_offset), + readaheadsize_cb_(cb) { assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) || (num_file_reads_ == 0)); // If ReadOptions.async_io is enabled, data is asynchronously filled in @@ -448,6 +450,21 @@ class FilePrefetchBuffer { return false; } + // Performs tuning to calculate readahead_size. + size_t ReadAheadSizeTuning(uint64_t offset, size_t n) { + UpdateReadAheadSizeForUpperBound(offset, n); + + if (readaheadsize_cb_ != nullptr && readahead_size_ > 0) { + size_t updated_readahead_size = 0; + readaheadsize_cb_(offset, readahead_size_, updated_readahead_size); + if (readahead_size_ != updated_readahead_size) { + RecordTick(stats_, READAHEAD_TRIMMED); + } + return updated_readahead_size; + } + return readahead_size_; + } + std::vector bufs_; // curr_ represents the index for bufs_ indicating which buffer is being // consumed currently. @@ -494,5 +511,6 @@ class FilePrefetchBuffer { // ReadOptions.auto_readahead_size are set to trim readahead_size upto // upper_bound_offset_ during prefetching. uint64_t upper_bound_offset_ = 0; + std::function readaheadsize_cb_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 5762be1a0..ef71dd2ef 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -654,9 +654,6 @@ TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) { SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start", [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->EnableProcessing(); - SyncPoint::GetInstance()->EnableProcessing(); Status s = TryReopen(options); @@ -1233,6 +1230,243 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) { Close(); } +TEST_P(PrefetchTest, PrefetchWithBlockLookupAutoTuneTest) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + + std::shared_ptr fs = + std::make_shared(FileSystem::Default(), false); + + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + Options options; + SetGenericOptions(env.get(), /*use_direct_io=*/false, options); + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions table_options; + SetBlockBasedTableOptions(table_options); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + Status s = TryReopen(options); + ASSERT_OK(s); + + Random rnd(309); + WriteBatch batch; + + for (int i = 0; i < 26; i++) { + std::string key = "my_key_"; + + for (int j = 0; j < 10; j++) { + key += char('a' + i); + ASSERT_OK(batch.Put(key, rnd.RandomString(1000))); + } + } + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + std::string start_key = "my_key_a"; + + std::string end_key = "my_key_"; + for (int j = 0; j < 10; j++) { + end_key += char('a' + 25); + } + + Slice least(start_key.data(), start_key.size()); + Slice greatest(end_key.data(), end_key.size()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); + + // Try with different num_file_reads_for_auto_readahead from 0 to 3. + for (size_t i = 0; i < 3; i++) { + std::shared_ptr cache = NewLRUCache(1024 * 1024, 2); + table_options.block_cache = cache; + table_options.no_block_cache = false; + table_options.num_file_reads_for_auto_readahead = i; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + s = TryReopen(options); + ASSERT_OK(s); + + // Warm up the cache. + { + auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); + + iter->Seek("my_key_bbb"); + ASSERT_TRUE(iter->Valid()); + + iter->Seek("my_key_ccccccccc"); + ASSERT_TRUE(iter->Valid()); + + iter->Seek("my_key_ddd"); + ASSERT_TRUE(iter->Valid()); + + iter->Seek("my_key_ddddddd"); + ASSERT_TRUE(iter->Valid()); + + iter->Seek("my_key_e"); + ASSERT_TRUE(iter->Valid()); + + iter->Seek("my_key_eeeee"); + ASSERT_TRUE(iter->Valid()); + + iter->Seek("my_key_eeeeeeeee"); + ASSERT_TRUE(iter->Valid()); + } + + ReadOptions ropts; + ropts.auto_readahead_size = true; + ReadOptions cmp_ro; + cmp_ro.auto_readahead_size = false; + + if (std::get<0>(GetParam())) { + ropts.readahead_size = cmp_ro.readahead_size = 32768; + } + + // With and without tuning readahead_size. + { + ASSERT_OK(options.statistics->Reset()); + // Seek. + { + Slice ub = Slice("my_key_uuu"); + Slice* ub_ptr = &ub; + cmp_ro.iterate_upper_bound = ub_ptr; + ropts.iterate_upper_bound = ub_ptr; + + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + auto cmp_iter = std::unique_ptr(db_->NewIterator(cmp_ro)); + + Slice seek_key = Slice("my_key_aaa"); + iter->Seek(seek_key); + cmp_iter->Seek(seek_key); + + while (iter->Valid() && cmp_iter->Valid()) { + if (iter->key() != cmp_iter->key()) { + // Error + ASSERT_TRUE(false); + } + iter->Next(); + cmp_iter->Next(); + } + + uint64_t readahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_GT(readahead_trimmed, 0); + + ASSERT_OK(cmp_iter->status()); + ASSERT_OK(iter->status()); + } + + // Reseek with new upper_bound_iterator. + { + Slice ub = Slice("my_key_y"); + ropts.iterate_upper_bound = &ub; + cmp_ro.iterate_upper_bound = &ub; + + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + auto cmp_iter = std::unique_ptr(db_->NewIterator(cmp_ro)); + + Slice reseek_key = Slice("my_key_v"); + iter->Seek(reseek_key); + cmp_iter->Seek(reseek_key); + + while (iter->Valid() && cmp_iter->Valid()) { + if (iter->key() != cmp_iter->key()) { + // Error + ASSERT_TRUE(false); + } + iter->Next(); + cmp_iter->Next(); + } + + uint64_t readahead_trimmed = + options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); + ASSERT_GT(readahead_trimmed, 0); + + ASSERT_OK(cmp_iter->status()); + ASSERT_OK(iter->status()); + } + } + Close(); + } +} + +TEST_F(PrefetchTest, PrefetchWithBlockLookupAutoTuneWithPrev) { + if (mem_env_ || encrypted_env_) { + ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); + return; + } + + // First param is if the mockFS support_prefetch or not + std::shared_ptr fs = + std::make_shared(FileSystem::Default(), false); + + std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); + Options options; + SetGenericOptions(env.get(), /*use_direct_io=*/false, options); + options.statistics = CreateDBStatistics(); + BlockBasedTableOptions table_options; + SetBlockBasedTableOptions(table_options); + std::shared_ptr cache = NewLRUCache(1024 * 1024, 2); + table_options.block_cache = cache; + table_options.no_block_cache = false; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + Status s = TryReopen(options); + ASSERT_OK(s); + + Random rnd(309); + WriteBatch batch; + + for (int i = 0; i < 26; i++) { + std::string key = "my_key_"; + + for (int j = 0; j < 10; j++) { + key += char('a' + i); + ASSERT_OK(batch.Put(key, rnd.RandomString(1000))); + } + } + ASSERT_OK(db_->Write(WriteOptions(), &batch)); + + std::string start_key = "my_key_a"; + + std::string end_key = "my_key_"; + for (int j = 0; j < 10; j++) { + end_key += char('a' + 25); + } + + Slice least(start_key.data(), start_key.size()); + Slice greatest(end_key.data(), end_key.size()); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); + + ReadOptions ropts; + ropts.auto_readahead_size = true; + + { + // Seek. + Slice ub = Slice("my_key_uuu"); + Slice* ub_ptr = &ub; + ropts.iterate_upper_bound = ub_ptr; + ropts.auto_readahead_size = true; + + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + + Slice seek_key = Slice("my_key_bbb"); + iter->Seek(seek_key); + ASSERT_TRUE(iter->Valid()); + + // Prev op should fail with auto tuning of readahead_size. + iter->Prev(); + ASSERT_TRUE(iter->status().IsNotSupported()); + ASSERT_FALSE(iter->Valid()); + + // Reseek would follow as usual. + iter->Seek(seek_key); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + } + Close(); +} + // This test verifies the functionality of ReadOptions.adaptive_readahead. TEST_P(PrefetchTest, DBIterLevelReadAhead) { const int kNumKeys = 1000; diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 9e49e59fb..ac70def24 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1719,6 +1719,8 @@ struct ReadOptions { // during scans internally. // For this feature to enabled, iterate_upper_bound must also be specified. // + // NOTE: Not supported with Prev operation and it will be return NotSupported + // error. Enable it for forward scans only. // Default: false bool auto_readahead_size = false; diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 9cc8ca8c9..57d14a285 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -18,10 +18,21 @@ void BlockBasedTableIterator::Seek(const Slice& target) { void BlockBasedTableIterator::SeekImpl(const Slice* target, bool async_prefetch) { - bool is_first_pass = true; + ResetBlockCacheLookupVar(); + bool is_first_pass = !async_read_in_progress_; + bool autotune_readaheadsize = is_first_pass && + read_options_.auto_readahead_size && + read_options_.iterate_upper_bound; + + if (autotune_readaheadsize && + table_->get_rep()->table_options.block_cache.get() && + !read_options_.async_io) { + readahead_cache_lookup_ = true; + } + + // Second pass. if (async_read_in_progress_) { AsyncInitDataBlock(false); - is_first_pass = false; } is_out_of_bound_ = false; @@ -44,7 +55,11 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } bool need_seek_index = true; - if (block_iter_points_to_real_block_ && block_iter_.Valid()) { + + // In case of readahead_cache_lookup_, index_iter_ could change to find the + // readahead size in BlockCacheLookupForReadAheadSize so it needs to reseek. + if (IsIndexAtCurr() && block_iter_points_to_real_block_ && + block_iter_.Valid()) { // Reseek. prev_block_offset_ = index_iter_->value().handle.offset(); @@ -79,8 +94,9 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } } - if (read_options_.auto_readahead_size && read_options_.iterate_upper_bound && - is_first_pass) { + is_index_at_curr_block_ = true; + + if (autotune_readaheadsize) { FindReadAheadSizeUpperBound(); if (target) { index_iter_->Seek(*target); @@ -95,6 +111,9 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } } + // After reseek, index_iter_ point to the right key i.e. target in + // case of readahead_cache_lookup_. So index_iter_ can be used directly. + IndexValue v = index_iter_->value(); const bool same_block = block_iter_points_to_real_block_ && v.handle.offset() == prev_block_offset_; @@ -172,6 +191,7 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { } SavePrevIndexValue(); + ResetBlockCacheLookupVar(); // Call Seek() rather than SeekForPrev() in the index block, because the // target data block will likely to contain the position for `target`, the @@ -211,6 +231,8 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { } } + is_index_at_curr_block_ = true; + InitDataBlock(); block_iter_.SeekForPrev(target); @@ -225,12 +247,19 @@ void BlockBasedTableIterator::SeekToLast() { is_out_of_bound_ = false; is_at_first_key_from_index_ = false; seek_stat_state_ = kNone; + SavePrevIndexValue(); + ResetBlockCacheLookupVar(); + index_iter_->SeekToLast(); + if (!index_iter_->Valid()) { ResetDataIter(); return; } + + is_index_at_curr_block_ = true; + InitDataBlock(); block_iter_.SeekToLast(); FindKeyBackward(); @@ -259,6 +288,14 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) { } void BlockBasedTableIterator::Prev() { + // Return Error. + if (readahead_cache_lookup_) { + block_iter_.Invalidate(Status::NotSupported( + "auto tuning of readahead_size is not supported with Prev operation.")); + return; + } + + ResetBlockCacheLookupVar(); if (is_at_first_key_from_index_) { is_at_first_key_from_index_ = false; @@ -278,7 +315,18 @@ void BlockBasedTableIterator::Prev() { } void BlockBasedTableIterator::InitDataBlock() { - BlockHandle data_block_handle = index_iter_->value().handle; + BlockHandle data_block_handle; + bool is_in_cache = false; + bool use_block_cache_for_lookup = true; + + if (DoesContainBlockHandles()) { + data_block_handle = block_handles_.front().index_val_.handle; + is_in_cache = block_handles_.front().is_cache_hit_; + use_block_cache_for_lookup = false; + } else { + data_block_handle = index_iter_->value().handle; + } + if (!block_iter_points_to_real_block_ || data_block_handle.offset() != prev_block_offset_ || // if previous attempt of reading the block missed cache, try again @@ -286,26 +334,50 @@ void BlockBasedTableIterator::InitDataBlock() { if (block_iter_points_to_real_block_) { ResetDataIter(); } - auto* rep = table_->get_rep(); bool is_for_compaction = lookup_context_.caller == TableReaderCaller::kCompaction; - // Prefetch additional data for range scans (iterators). - // Implicit auto readahead: - // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0. - // Explicit user requested readahead: - // Enabled from the very first IO when ReadOptions.readahead_size is set. - block_prefetcher_.PrefetchIfNeeded( - rep, data_block_handle, read_options_.readahead_size, is_for_compaction, - /*no_sequential_checking=*/false, read_options_); - Status s; - table_->NewDataBlockIterator( - read_options_, data_block_handle, &block_iter_, BlockType::kData, - /*get_context=*/nullptr, &lookup_context_, - block_prefetcher_.prefetch_buffer(), - /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, - /*use_block_cache_for_lookup=*/true); + + // Initialize Data Block From CacheableEntry. + if (is_in_cache) { + Status s; + block_iter_.Invalidate(Status::OK()); + table_->NewDataBlockIterator( + read_options_, (block_handles_.front().cachable_entry_).As(), + &block_iter_, s); + } else { + auto* rep = table_->get_rep(); + + std::function readaheadsize_cb = + nullptr; + if (readahead_cache_lookup_) { + readaheadsize_cb = std::bind( + &BlockBasedTableIterator::BlockCacheLookupForReadAheadSize, this, + std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3); + } + + // Prefetch additional data for range scans (iterators). + // Implicit auto readahead: + // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0. + // Explicit user requested readahead: + // Enabled from the very first IO when ReadOptions.readahead_size is + // set. + block_prefetcher_.PrefetchIfNeeded( + rep, data_block_handle, read_options_.readahead_size, + is_for_compaction, + /*no_sequential_checking=*/false, read_options_, readaheadsize_cb); + + Status s; + table_->NewDataBlockIterator( + read_options_, data_block_handle, &block_iter_, BlockType::kData, + /*get_context=*/nullptr, &lookup_context_, + block_prefetcher_.prefetch_buffer(), + /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, + use_block_cache_for_lookup); + } block_iter_points_to_real_block_ = true; + CheckDataBlockWithinUpperBound(); if (!is_for_compaction && (seek_stat_state_ & kDataBlockReadSinceLastSeek) == 0) { @@ -331,6 +403,16 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { ResetDataIter(); } auto* rep = table_->get_rep(); + + std::function readaheadsize_cb = + nullptr; + if (readahead_cache_lookup_) { + readaheadsize_cb = std::bind( + &BlockBasedTableIterator::BlockCacheLookupForReadAheadSize, this, + std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3); + } + // Prefetch additional data for range scans (iterators). // Implicit auto readahead: // Enabled after 2 sequential IOs when ReadOptions.readahead_size == 0. @@ -343,7 +425,7 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { block_prefetcher_.PrefetchIfNeeded( rep, data_block_handle, read_options_.readahead_size, is_for_compaction, /*no_sequential_checking=*/read_options_.async_io, - read_options_); + read_options_, readaheadsize_cb); Status s; table_->NewDataBlockIterator( @@ -398,20 +480,29 @@ bool BlockBasedTableIterator::MaterializeCurrentBlock() { block_iter_.SeekToFirst(); + // MaterializeCurrentBlock is called when block is actually read by + // calling InitDataBlock. is_at_first_key_from_index_ will be false for block + // handles placed in blockhandle. So index_ will be pointing to current block. + // After InitDataBlock, index_iter_ can point to different block if + // BlockCacheLookupForReadAheadSize is called. + IndexValue index_val; + if (DoesContainBlockHandles()) { + index_val = block_handles_.front().index_val_; + } else { + index_val = index_iter_->value(); + } + if (!block_iter_.Valid() || - icomp_.Compare(block_iter_.key(), - index_iter_->value().first_internal_key) != 0) { + icomp_.Compare(block_iter_.key(), index_val.first_internal_key) != 0) { block_iter_.Invalidate(Status::Corruption( "first key in index doesn't match first key in block")); return false; } - return true; } void BlockBasedTableIterator::FindKeyForward() { // This method's code is kept short to make it likely to be inlined. - assert(!is_out_of_bound_); assert(block_iter_points_to_real_block_); @@ -434,40 +525,66 @@ void BlockBasedTableIterator::FindBlockForward() { return; } // Whether next data block is out of upper bound, if there is one. + // index_iter_ can point to different block in case of + // readahead_cache_lookup_. readahead_cache_lookup_ will be handle the + // upper_bound check. const bool next_block_is_out_of_bound = - read_options_.iterate_upper_bound != nullptr && + IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr && block_iter_points_to_real_block_ && block_upper_bound_check_ == BlockUpperBound::kUpperBoundInCurBlock; + assert(!next_block_is_out_of_bound || user_comparator_.CompareWithoutTimestamp( *read_options_.iterate_upper_bound, /*a_has_ts=*/false, index_iter_->user_key(), /*b_has_ts=*/true) <= 0); + ResetDataIter(); - index_iter_->Next(); - if (next_block_is_out_of_bound) { - // The next block is out of bound. No need to read it. - TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr); - // We need to make sure this is not the last data block before setting - // is_out_of_bound_, since the index key for the last data block can be - // larger than smallest key of the next file on the same level. - if (index_iter_->Valid()) { - is_out_of_bound_ = true; - } - return; - } - if (!index_iter_->Valid()) { - return; + if (DoesContainBlockHandles()) { + // Advance and point to that next Block handle to make that block handle + // current. + block_handles_.pop_front(); } - IndexValue v = index_iter_->value(); + if (!DoesContainBlockHandles()) { + // For readahead_cache_lookup_ enabled scenario - + // 1. In case of Seek, block_handle will be empty and it should be follow + // as usual doing index_iter_->Next(). + // 2. If block_handles is empty and index is not at current because of + // lookup (during Next), it should skip doing index_iter_->Next(), as + // it's already pointing to next block; + if (IsIndexAtCurr()) { + index_iter_->Next(); + } else { + // Skip Next as index_iter_ already points to correct index when it + // iterates in BlockCacheLookupForReadAheadSize. + is_index_at_curr_block_ = true; + } - if (!v.first_internal_key.empty() && allow_unprepared_value_) { - // Index contains the first key of the block. Defer reading the block. - is_at_first_key_from_index_ = true; - return; - } + if (next_block_is_out_of_bound) { + // The next block is out of bound. No need to read it. + TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", + nullptr); + // We need to make sure this is not the last data block before setting + // is_out_of_bound_, since the index key for the last data block can be + // larger than smallest key of the next file on the same level. + if (index_iter_->Valid()) { + is_out_of_bound_ = true; + } + return; + } + if (!index_iter_->Valid()) { + return; + } + IndexValue v = index_iter_->value(); + + if (!v.first_internal_key.empty() && allow_unprepared_value_) { + // Index contains the first key of the block. Defer reading the block. + is_at_first_key_from_index_ = true; + return; + } + } InitDataBlock(); block_iter_.SeekToFirst(); } while (!block_iter_.Valid()); @@ -495,7 +612,7 @@ void BlockBasedTableIterator::FindKeyBackward() { } void BlockBasedTableIterator::CheckOutOfBound() { - if (read_options_.iterate_upper_bound != nullptr && + if (IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr && block_upper_bound_check_ != BlockUpperBound::kUpperBoundBeyondCurBlock && Valid()) { is_out_of_bound_ = @@ -506,7 +623,7 @@ void BlockBasedTableIterator::CheckOutOfBound() { } void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() { - if (read_options_.iterate_upper_bound != nullptr && + if (IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr && block_iter_points_to_real_block_) { block_upper_bound_check_ = (user_comparator_.CompareWithoutTimestamp( *read_options_.iterate_upper_bound, @@ -532,15 +649,7 @@ void BlockBasedTableIterator::FindReadAheadSizeUpperBound() { // If curr block's index key >= iterate_upper_bound, it // means all the keys in next block or above are out of // bound. - bool next_block_out_of_bound = - (user_comparator_.CompareWithoutTimestamp( - index_iter_->user_key(), - /*a_has_ts=*/true, *read_options_.iterate_upper_bound, - /*b_has_ts=*/false) >= 0 - ? true - : false); - - if (next_block_out_of_bound) { + if (IsNextBlockOutOfBound()) { break; } @@ -558,4 +667,92 @@ void BlockBasedTableIterator::FindReadAheadSizeUpperBound() { block_prefetcher_.SetUpperBoundOffset(start_offset + total_bytes_till_upper_bound); } + +void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( + uint64_t offset, size_t readahead_size, size_t& updated_readahead_size) { + updated_readahead_size = readahead_size; + + // readahead_cache_lookup_ can be set false after Seek, if after Seek or Next + // there is SeekForPrev or any other backward operation. + if (!readahead_cache_lookup_) { + return; + } + + assert(!DoesContainBlockHandles()); + assert(index_iter_->value().handle.offset() == offset); + + // Error. current offset should be equal to what's requested for prefetching. + if (index_iter_->value().handle.offset() != offset) { + return; + } + + size_t current_readahead_size = 0; + size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); + + // Add the current block to block_handles_. + { + BlockHandleInfo block_handle_info; + block_handle_info.index_val_ = index_iter_->value(); + block_handles_.emplace_back(std::move(block_handle_info)); + } + + // Current block is included in length. Readahead should start from next + // block. + index_iter_->Next(); + is_index_at_curr_block_ = false; + + while (index_iter_->Valid()) { + BlockHandle block_handle = index_iter_->value().handle; + + // Adding this data block exceeds passed down readahead_size. So this data + // block won't be added. + if (current_readahead_size + block_handle.size() + footer > + readahead_size) { + break; + } + + current_readahead_size += block_handle.size(); + current_readahead_size += footer; + + // For current data block, do the lookup in the cache. Lookup should pin the + // data block and add the placeholder for cache. + BlockHandleInfo block_handle_info; + block_handle_info.index_val_ = index_iter_->value(); + + Status s = table_->LookupAndPinBlocksInCache( + read_options_, block_handle, + &(block_handle_info.cachable_entry_).As()); + if (!s.ok()) { + break; + } + block_handle_info.is_cache_hit_ = + (block_handle_info.cachable_entry_.GetValue() || + block_handle_info.cachable_entry_.GetCacheHandle()); + + // Add the handle to the queue. + block_handles_.emplace_back(std::move(block_handle_info)); + + // Can't figure out for current block if current block + // is out of bound. But for next block we can find that. + // If curr block's index key >= iterate_upper_bound, it + // means all the keys in next block or above are out of + // bound. + if (IsNextBlockOutOfBound()) { + break; + } + index_iter_->Next(); + }; + + // Iterate cache hit block handles from the end till a Miss is there, to + // update the readahead_size. + for (auto it = block_handles_.rbegin(); + it != block_handles_.rend() && (*it).is_cache_hit_ == true; ++it) { + current_readahead_size -= (*it).index_val_.handle.size(); + current_readahead_size -= footer; + } + + updated_readahead_size = current_readahead_size; + ResetPreviousBlockOffset(); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index ce407467e..c668ced66 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -7,6 +7,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once +#include + #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_based_table_reader_impl.h" #include "table/block_based/block_prefetcher.h" @@ -44,7 +46,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { async_read_in_progress_(false), is_last_level_(table->IsLastLevel()) {} - ~BlockBasedTableIterator() {} + ~BlockBasedTableIterator() override { ClearBlockHandles(); } void Seek(const Slice& target) override; void SeekForPrev(const Slice& target) override; @@ -58,6 +60,11 @@ class BlockBasedTableIterator : public InternalIteratorBase { (is_at_first_key_from_index_ || (block_iter_points_to_real_block_ && block_iter_.Valid())); } + + // For block cache readahead lookup scenario - + // If is_at_first_key_from_index_ is true, InitDataBlock hasn't been + // called. It means block_handles is empty and index_ point to current block. + // So index_iter_ can be accessed directly. Slice key() const override { assert(Valid()); if (is_at_first_key_from_index_) { @@ -74,6 +81,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { return block_iter_.user_key(); } } + bool PrepareValue() override { assert(Valid()); @@ -104,8 +112,12 @@ class BlockBasedTableIterator : public InternalIteratorBase { return block_iter_.value(); } Status status() const override { - // Prefix index set status to NotFound when the prefix does not exist - if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) { + // In case of block cache readahead lookup, it won't add the block to + // block_handles if it's index is invalid. So index_iter_->status check can + // be skipped. + // Prefix index set status to NotFound when the prefix does not exist. + if (IsIndexAtCurr() && !index_iter_->status().ok() && + !index_iter_->status().IsNotFound()) { return index_iter_->status(); } else if (block_iter_points_to_real_block_) { return block_iter_.status(); @@ -159,7 +171,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { } void SavePrevIndexValue() { - if (block_iter_points_to_real_block_) { + if (block_iter_points_to_real_block_ && IsIndexAtCurr()) { // Reseek. If they end up with the same data block, we shouldn't re-fetch // the same data block. prev_block_offset_ = index_iter_->value().handle.offset(); @@ -235,6 +247,18 @@ class BlockBasedTableIterator : public InternalIteratorBase { kReportOnUseful = 1 << 2, }; + // BlockHandleInfo is used to store the info needed when block cache lookup + // ahead is enabled to tune readahead_size. + struct BlockHandleInfo { + BlockHandleInfo() {} + + IndexValue index_val_; + bool is_cache_hit_ = false; + CachableEntry cachable_entry_; + }; + + bool IsIndexAtCurr() const { return is_index_at_curr_block_; } + const BlockBasedTable* table_; const ReadOptions& read_options_; const InternalKeyComparator& icomp_; @@ -268,6 +292,22 @@ class BlockBasedTableIterator : public InternalIteratorBase { mutable SeekStatState seek_stat_state_ = SeekStatState::kNone; bool is_last_level_; + // If set to true, it'll lookup in the cache ahead to estimate the readahead + // size based on cache hit and miss. + bool readahead_cache_lookup_ = false; + + // It stores all the block handles that are lookuped in cache ahead when + // BlockCacheLookupForReadAheadSize is called. Since index_iter_ may point to + // different blocks when readahead_size is calculated in + // BlockCacheLookupForReadAheadSize, to avoid index_iter_ reseek, + // block_handles_ is used. + std::deque block_handles_; + + // During cache lookup to find readahead size, index_iter_ is iterated and it + // can point to a different block. is_index_at_curr_block_ keeps track of + // that. + bool is_index_at_curr_block_ = true; + // If `target` is null, seek to first. void SeekImpl(const Slice* target, bool async_prefetch); @@ -307,6 +347,40 @@ class BlockBasedTableIterator : public InternalIteratorBase { return true; } + // *** BEGIN APIs relevant to auto tuning of readahead_size *** void FindReadAheadSizeUpperBound(); + + // This API is called to lookup the data blocks ahead in the cache to estimate + // the current readahead_size. + void BlockCacheLookupForReadAheadSize(uint64_t offset, size_t readahead_size, + size_t& updated_readahead_size); + + void ResetBlockCacheLookupVar() { + readahead_cache_lookup_ = false; + ClearBlockHandles(); + } + + bool IsNextBlockOutOfBound() { + // If curr block's index key >= iterate_upper_bound, it means all the keys + // in next block or above are out of bound. + return (user_comparator_.CompareWithoutTimestamp( + index_iter_->user_key(), + /*a_has_ts=*/true, *read_options_.iterate_upper_bound, + /*b_has_ts=*/false) >= 0 + ? true + : false); + } + + void ClearBlockHandles() { block_handles_.clear(); } + + // Reset prev_block_offset_. If index_iter_ has moved ahead, it won't get + // accurate prev_block_offset_. + void ResetPreviousBlockOffset() { + prev_block_offset_ = std::numeric_limits::max(); + } + + bool DoesContainBlockHandles() { return !block_handles_.empty(); } + + // *** END APIs relevant to auto tuning of readahead_size *** }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index c53aa0fb9..7658150aa 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -101,7 +101,10 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) { bool for_compaction, CachableEntry* block_entry, \ GetContext* get_context, BlockCacheLookupContext* lookup_context, \ BlockContents* contents, bool async_read, \ - bool use_block_cache_for_lookup) const; + bool use_block_cache_for_lookup) const; \ + template Status BlockBasedTable::LookupAndPinBlocksInCache( \ + const ReadOptions& ro, const BlockHandle& handle, \ + CachableEntry* out_parsed_block) const; INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock); INSTANTIATE_BLOCKLIKE_TEMPLATES(UncompressionDict); @@ -885,6 +888,7 @@ Status BlockBasedTable::PrefetchTail( true /* track_min_offset */, false /* implicit_auto_readahead */, 0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */, 0 /* upper_bound_offset */, nullptr /* fs */, nullptr /* clock */, stats, + /* readahead_cb */ nullptr, FilePrefetchBufferUsage::kTableOpenPrefetchTail)); if (s.ok()) { @@ -1470,6 +1474,62 @@ IndexBlockIter* BlockBasedTable::InitBlockIterator( block_contents_pinned, rep->user_defined_timestamps_persisted); } +// Right now only called for Data blocks. +template +Status BlockBasedTable::LookupAndPinBlocksInCache( + const ReadOptions& ro, const BlockHandle& handle, + CachableEntry* out_parsed_block) const { + BlockCacheInterface block_cache{ + rep_->table_options.block_cache.get()}; + + assert(block_cache); + + Status s; + CachableEntry uncompression_dict; + if (rep_->uncompression_dict_reader) { + const bool no_io = (ro.read_tier == kBlockCacheTier); + s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( + /* prefetch_buffer= */ nullptr, ro, no_io, ro.verify_checksums, + /* get_context= */ nullptr, /* lookup_context= */ nullptr, + &uncompression_dict); + if (!s.ok()) { + return s; + } + } + + // Do the lookup. + CacheKey key_data = GetCacheKey(rep_->base_cache_key, handle); + const Slice key = key_data.AsSlice(); + + Statistics* statistics = rep_->ioptions.statistics.get(); + + BlockCreateContext create_ctx = rep_->create_context; + create_ctx.dict = uncompression_dict.GetValue() + ? uncompression_dict.GetValue() + : &UncompressionDict::GetEmptyDict(); + + auto cache_handle = + block_cache.LookupFull(key, &create_ctx, GetCachePriority(), + statistics, rep_->ioptions.lowest_used_cache_tier); + + if (!cache_handle) { + UpdateCacheMissMetrics(TBlocklike::kBlockType, /* get_context = */ nullptr); + return s; + } + + // Found in Cache. + TBlocklike* value = block_cache.Value(cache_handle); + if (value) { + UpdateCacheHitMetrics(TBlocklike::kBlockType, /* get_context = */ nullptr, + block_cache.get()->GetUsage(cache_handle)); + } + out_parsed_block->SetCachedValue(value, block_cache.get(), cache_handle); + + assert(!out_parsed_block->IsEmpty()); + + return s; +} + // If contents is nullptr, this function looks up the block caches for the // data block referenced by handle, and read the block from disk if necessary. // If contents is non-null, it skips the cache lookup and disk read, since diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 32c5f6852..ed6af9b33 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -280,6 +280,11 @@ class BlockBasedTable : public TableReader { Status GetKVPairsFromDataBlocks(const ReadOptions& read_options, std::vector* kv_pair_blocks); + template + Status LookupAndPinBlocksInCache( + const ReadOptions& ro, const BlockHandle& handle, + CachableEntry* out_parsed_block) const; + struct Rep; Rep* get_rep() { return rep_; } @@ -687,31 +692,33 @@ struct BlockBasedTable::Rep { uint64_t sst_number_for_tracing() const { return file ? TableFileNameToNumber(file->file_name()) : UINT64_MAX; } - void CreateFilePrefetchBuffer(size_t readahead_size, - size_t max_readahead_size, - std::unique_ptr* fpb, - bool implicit_auto_readahead, - uint64_t num_file_reads, - uint64_t num_file_reads_for_auto_readahead, - uint64_t upper_bound_offset) const { + void CreateFilePrefetchBuffer( + size_t readahead_size, size_t max_readahead_size, + std::unique_ptr* fpb, bool implicit_auto_readahead, + uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, + uint64_t upper_bound_offset, + const std::function& readaheadsize_cb) + const { fpb->reset(new FilePrefetchBuffer( readahead_size, max_readahead_size, !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */, implicit_auto_readahead, num_file_reads, num_file_reads_for_auto_readahead, upper_bound_offset, - ioptions.fs.get(), ioptions.clock, ioptions.stats)); + ioptions.fs.get(), ioptions.clock, ioptions.stats, readaheadsize_cb)); } void CreateFilePrefetchBufferIfNotExists( size_t readahead_size, size_t max_readahead_size, std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, - uint64_t upper_bound_offset) const { + uint64_t upper_bound_offset, + const std::function& readaheadsize_cb) + const { if (!(*fpb)) { CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb, implicit_auto_readahead, num_file_reads, num_file_reads_for_auto_readahead, - upper_bound_offset); + upper_bound_offset, readaheadsize_cb); } } diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 7a36ad58c..218d3821f 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -12,15 +12,13 @@ #include "table/block_based/block_based_table_reader.h" namespace ROCKSDB_NAMESPACE { -void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, - const BlockHandle& handle, - const size_t readahead_size, - bool is_for_compaction, - const bool no_sequential_checking, - const ReadOptions& read_options) { +void BlockPrefetcher::PrefetchIfNeeded( + const BlockBasedTable::Rep* rep, const BlockHandle& handle, + const size_t readahead_size, bool is_for_compaction, + const bool no_sequential_checking, const ReadOptions& read_options, + const std::function& readaheadsize_cb) { const size_t len = BlockBasedTable::BlockSizeWithTrailer(handle); const size_t offset = handle.offset(); - if (is_for_compaction) { if (!rep->file->use_direct_io()) { // If FS supports prefetching (readahead_limit_ will be non zero in that @@ -49,7 +47,7 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, compaction_readahead_size_, compaction_readahead_size_, &prefetch_buffer_, /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, - /*upper_bound_offset=*/0); + /*upper_bound_offset=*/0, /*readaheadsize_cb=*/nullptr); return; } @@ -58,7 +56,8 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, rep->CreateFilePrefetchBufferIfNotExists( readahead_size, readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, - /*num_file_reads_for_auto_readahead=*/0, upper_bound_offset_); + /*num_file_reads_for_auto_readahead=*/0, upper_bound_offset_, + readaheadsize_cb); return; } @@ -83,7 +82,7 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, &prefetch_buffer_, /*implicit_auto_readahead=*/true, /*num_file_reads=*/0, rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_); + upper_bound_offset_, readaheadsize_cb); return; } @@ -114,7 +113,7 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_); + upper_bound_offset_, readaheadsize_cb); return; } @@ -138,7 +137,7 @@ void BlockPrefetcher::PrefetchIfNeeded(const BlockBasedTable::Rep* rep, initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_); + upper_bound_offset_, readaheadsize_cb); return; } diff --git a/table/block_based/block_prefetcher.h b/table/block_based/block_prefetcher.h index 859a85f66..7e075c08e 100644 --- a/table/block_based/block_prefetcher.h +++ b/table/block_based/block_prefetcher.h @@ -18,11 +18,11 @@ class BlockPrefetcher { readahead_size_(initial_auto_readahead_size), initial_auto_readahead_size_(initial_auto_readahead_size) {} - void PrefetchIfNeeded(const BlockBasedTable::Rep* rep, - const BlockHandle& handle, size_t readahead_size, - bool is_for_compaction, - const bool no_sequential_checking, - const ReadOptions& read_options); + void PrefetchIfNeeded( + const BlockBasedTable::Rep* rep, const BlockHandle& handle, + size_t readahead_size, bool is_for_compaction, + const bool no_sequential_checking, const ReadOptions& read_options, + const std::function& readaheadsize_cb); FilePrefetchBuffer* prefetch_buffer() { return prefetch_buffer_.get(); } void UpdateReadPattern(const uint64_t& offset, const size_t& len) { diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 8b4174645..9c0a49660 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -498,7 +498,7 @@ Status PartitionedFilterBlockReader::CacheDependencies( rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /* Implicit autoreadahead */, 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, - /*upper_bound_offset*/ 0); + /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr); IOOptions opts; s = rep->file->PrepareIOOptions(ro, opts); diff --git a/table/block_based/partitioned_index_iterator.cc b/table/block_based/partitioned_index_iterator.cc index db1250f22..cc6f70130 100644 --- a/table/block_based/partitioned_index_iterator.cc +++ b/table/block_based/partitioned_index_iterator.cc @@ -91,7 +91,8 @@ void PartitionedIndexIterator::InitPartitionedIndexBlock() { // Enabled from the very first IO when ReadOptions.readahead_size is set. block_prefetcher_.PrefetchIfNeeded( rep, partitioned_index_handle, read_options_.readahead_size, - is_for_compaction, /*no_sequential_checking=*/false, read_options_); + is_for_compaction, /*no_sequential_checking=*/false, read_options_, + /*readaheadsize_cb=*/nullptr); Status s; table_->NewDataBlockIterator( read_options_, partitioned_index_handle, &block_iter_, diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index 2b8b5bce1..9f3f339a1 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -170,7 +170,7 @@ Status PartitionIndexReader::CacheDependencies( rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /*Implicit auto readahead*/, 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, - /*upper_bound_offset*/ 0); + /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr); IOOptions opts; { Status s = rep->file->PrepareIOOptions(ro, opts); diff --git a/unreleased_history/performance_improvements/auto_readahead_size.md b/unreleased_history/performance_improvements/auto_readahead_size.md new file mode 100644 index 000000000..57622ff12 --- /dev/null +++ b/unreleased_history/performance_improvements/auto_readahead_size.md @@ -0,0 +1 @@ +Added additional improvements in tuning readahead_size during Scans when auto_readahead_size is enabled. However it's not supported with Iterator::Prev operation and will return NotSupported error. From 49da91ec097b4efcd8a8e4dc1b287e9f81eb4093 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Sat, 23 Sep 2023 11:02:19 -0700 Subject: [PATCH 154/386] Update files for version 8.8 (#11878) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11878 Reviewed By: ajkr Differential Revision: D49568389 Pulled By: cbi42 fbshipit-source-id: b2022735799be9b5e81e03dfb418f8b104632ecf --- HISTORY.md | 45 +++++++++++++++++++ include/rocksdb/version.h | 2 +- tools/check_format_compatible.sh | 2 +- ...fered_io_compaction_readahead_size_zero.md | 1 - .../exclude_some_l0_size_amp.md | 1 - .../ldb_scan_command_output_change.md | 1 - .../001_check_iter_status_data_loss.md | 1 - ...check_more_iter_status_for_delete_range.md | 1 - .../100_atomic_flush_db_stuck_fix.md | 1 - .../bug_fixes/100_rollback_pending_flush.md | 1 - .../bug_fixes/auto_tuning_async_fix.md | 1 - .../bug_fixes/compressed_sec_cache_disable.md | 1 - .../bug_fixes/dyn_tiered_cache_capacity.md | 1 - .../bug_fixes/fix_multiget_sv_cleanup.md | 1 - .../fix_row_cache_falsely_return_kNotFound.md | 1 - .../fixed_generic_rate_limiter_hang.md | 1 - .../bug_fixes/no_compaction_scheduled_bug.md | 1 - unreleased_history/bug_fixes/opt_seek.md | 1 - .../bug_fixes/sst_dump_for_udt.md | 1 - .../bug_fixes/upper_bound_autoreadahead.md | 1 - .../verify_file_checksum_stat_bug.md | 1 - unreleased_history/new_features/auto_hcc.md | 1 - .../new_features/compaction_time_stats.md | 1 - ...get_entity_in_secondary_and_readonly_db.md | 1 - .../new_features/iterator-refresh-snapshot.md | 1 - .../merge_operand_count_threshold.md | 1 - .../new_features/mutable_bloom_before.md | 1 - .../new_features/secondary_cache_stacking.md | 1 - .../new_features/wide_column_full_merge.md | 1 - .../wide_column_support_in_ldb.md | 1 - .../auto_readahead_size.md | 1 - .../avoid_double_lookup.md | 1 - .../compact_filter_context_more_info.md | 1 - ...compaction_readahead_size_option_change.md | 1 - .../compression_options_level_lz4.md | 1 - .../dyn_tiered_cache_update.md | 1 - .../public_api_changes/new_tiered_cache.md | 1 - 37 files changed, 47 insertions(+), 36 deletions(-) delete mode 100644 unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md delete mode 100644 unreleased_history/behavior_changes/exclude_some_l0_size_amp.md delete mode 100644 unreleased_history/behavior_changes/ldb_scan_command_output_change.md delete mode 100644 unreleased_history/bug_fixes/001_check_iter_status_data_loss.md delete mode 100644 unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md delete mode 100644 unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md delete mode 100644 unreleased_history/bug_fixes/100_rollback_pending_flush.md delete mode 100644 unreleased_history/bug_fixes/auto_tuning_async_fix.md delete mode 100644 unreleased_history/bug_fixes/compressed_sec_cache_disable.md delete mode 100644 unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md delete mode 100644 unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md delete mode 100644 unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md delete mode 100644 unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md delete mode 100644 unreleased_history/bug_fixes/no_compaction_scheduled_bug.md delete mode 100644 unreleased_history/bug_fixes/opt_seek.md delete mode 100644 unreleased_history/bug_fixes/sst_dump_for_udt.md delete mode 100644 unreleased_history/bug_fixes/upper_bound_autoreadahead.md delete mode 100644 unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md delete mode 100644 unreleased_history/new_features/auto_hcc.md delete mode 100644 unreleased_history/new_features/compaction_time_stats.md delete mode 100644 unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md delete mode 100644 unreleased_history/new_features/iterator-refresh-snapshot.md delete mode 100644 unreleased_history/new_features/merge_operand_count_threshold.md delete mode 100644 unreleased_history/new_features/mutable_bloom_before.md delete mode 100644 unreleased_history/new_features/secondary_cache_stacking.md delete mode 100644 unreleased_history/new_features/wide_column_full_merge.md delete mode 100644 unreleased_history/new_features/wide_column_support_in_ldb.md delete mode 100644 unreleased_history/performance_improvements/auto_readahead_size.md delete mode 100644 unreleased_history/performance_improvements/avoid_double_lookup.md delete mode 100644 unreleased_history/public_api_changes/compact_filter_context_more_info.md delete mode 100644 unreleased_history/public_api_changes/compaction_readahead_size_option_change.md delete mode 100644 unreleased_history/public_api_changes/compression_options_level_lz4.md delete mode 100644 unreleased_history/public_api_changes/dyn_tiered_cache_update.md delete mode 100644 unreleased_history/public_api_changes/new_tiered_cache.md diff --git a/HISTORY.md b/HISTORY.md index fc955a1ae..a4f3b69da 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,51 @@ # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` +## 8.7.0 (09/22/2023) +### New Features +* Added an experimental new "automatic" variant of HyperClockCache that does not require a prior estimate of the average size of cache entries. This variant is activated when HyperClockCacheOptions::estimated\_entry\_charge = 0 and has essentially the same concurrency benefits as the existing HyperClockCache. +* Add a new statistic `COMPACTION_CPU_TOTAL_TIME` that records cumulative compaction cpu time. This ticker is updated regularly while a compaction is running. +* Add `GetEntity()` API for ReadOnly DB and Secondary DB. +* Add a new iterator API `Iterator::Refresh(const Snapshot *)` that allows iterator to be refreshed while using the input snapshot to read. +* Added a new read option `merge_operand_count_threshold`. When the number of merge operands applied during a successful point lookup exceeds this threshold, the query will return a special OK status with a new subcode `kMergeOperandThresholdExceeded`. Applications might use this signal to take action to reduce the number of merge operands for the affected key(s), for example by running a compaction. +* For `NewRibbonFilterPolicy()`, made the `bloom_before_level` option mutable through the Configurable interface and the SetOptions API, allowing dynamic switching between all-Bloom and all-Ribbon configurations, and configurations in between. See comments on `NewRibbonFilterPolicy()` +* RocksDB now allows the block cache to be stacked on top of a compressed secondary cache and a non-volatile secondary cache, thus creating a three-tier cache. To set it up, use the `NewTieredCache()` API in rocksdb/cache.h.. +* Added a new wide-column aware full merge API called `FullMergeV3` to `MergeOperator`. `FullMergeV3` supports wide columns both as base value and merge result, which enables the application to perform more general transformations during merges. For backward compatibility, the default implementation implements the earlier logic of applying the merge operation to the default column of any wide-column entities. Specifically, if there is no base value or the base value is a plain key-value, the default implementation falls back to `FullMergeV2`. If the base value is a wide-column entity, the default implementation invokes `FullMergeV2` to perform the merge on the default column, and leaves any other columns unchanged. +* Add wide column support to ldb commands (scan, dump, idump, dump_wal) and sst_dump tool's scan command + +### Public API Changes +* Expose more information about input files used in table creation (if any) in `CompactionFilter::Context`. See `CompactionFilter::Context::input_start_level`,`CompactionFilter::Context::input_table_properties` for more. +* `Options::compaction_readahead_size` 's default value is changed from 0 to 2MB. +* When using LZ4 compression, the `acceleration` parameter is configurable by setting the negated value in `CompressionOptions::level`. For example, `CompressionOptions::level=-10` will set `acceleration=10` +* The `NewTieredCache` API has been changed to take the total cache capacity (inclusive of both the primary and the compressed secondary cache) and the ratio of total capacity to allocate to the compressed cache. These are specified in `TieredCacheOptions`. Any capacity specified in `LRUCacheOptions`, `HyperClockCacheOptions` and `CompressedSecondaryCacheOptions` is ignored. A new API, `UpdateTieredCache` is provided to dynamically update the total capacity, ratio of compressed cache, and admission policy. +* The `NewTieredVolatileCache()` API in rocksdb/cache.h has been renamed to `NewTieredCache()`. + +### Behavior Changes +* Compaction read performance will regress when `Options::compaction_readahead_size` is explicitly set to 0 +* Universal size amp compaction will conditionally exclude some of the newest L0 files when selecting input with a small negative impact to size amp. This is to prevent a large number of L0 files from being locked by a size amp compaction, potentially leading to write stop with a few more flushes. +* Change ldb scan command delimiter from ':' to '==>'. + +### Bug Fixes +* Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results. +* Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file. +* Fix a bug with atomic_flush=true that can cause DB to stuck after a flush fails (#11872). +* Fix a bug where RocksDB (with atomic_flush=false) can delete output SST files of pending flushes when a previous concurrent flush fails (#11865). This can result in DB entering read-only state with error message like `IO error: No such file or directory: While open a file for random read: /tmp/rocksdbtest-501/db_flush_test_87732_4230653031040984171/000013.sst`. +* Fix an assertion fault during seek with async_io when readahead trimming is enabled. +* When the compressed secondary cache capacity is reduced to 0, it should be completely disabled. Before this fix, inserts and lookups would still go to the backing `LRUCache` before returning, thus incurring locking overhead. With this fix, inserts and lookups are no-ops and do not add any overhead. +* Updating the tiered cache (cache allocated using NewTieredCache()) by calling SetCapacity() on it was not working properly. The initial creation would set the primary cache capacity to the combined primary and compressed secondary cache capacity. But SetCapacity() would just set the primary cache capacity. With this fix, the user always specifies the total budget and compressed secondary cache ratio on creation. Subsequently, SetCapacity() will distribute the new capacity across the two caches by the same ratio. +* Fixed a bug in `MultiGet` for cleaning up SuperVersion acquired with locking db mutex. +* Fix a bug where row cache can falsely return kNotFound even though row cache entry is hit. +* Fixed a race condition in `GenericRateLimiter` that could cause it to stop granting requests +* Fix a bug (Issue #10257) where DB can hang after write stall since no compaction is scheduled (#11764). +* Add a fix for async_io where during seek, when reading a block for seeking a target key in a file without any readahead, the iterator aligned the read on a page boundary and reading more than necessary. This increased the storage read bandwidth usage. +* Fix an issue in sst dump tool to handle bounds specified for data with user-defined timestamps. +* When auto_readahead_size is enabled, update readahead upper bound during readahead trimming when reseek changes iterate_upper_bound dynamically. +* Fixed a bug where `rocksdb.file.read.verify.file.checksums.micros` is not populated + +### Performance Improvements +* Added additional improvements in tuning readahead_size during Scans when auto_readahead_size is enabled. However it's not supported with Iterator::Prev operation and will return NotSupported error. +* During async_io, the Seek happens in 2 phases. Phase 1 starts an asynchronous read on a block cache miss, and phase 2 waits for it to complete and finishes the seek. In both phases, it tries to lookup the block cache for the data block first before looking in the prefetch buffer. It's optimized by doing the block cache lookup only in the first phase that would save some CPU. + ## 8.6.0 (08/18/2023) ### New Features * Added enhanced data integrity checking on SST files with new format_version=6. Performance impact is very small or negligible. Previously if SST data was misplaced or re-arranged by the storage layer, it could pass block checksum with higher than 1 in 4 billion probability. With format_version=6, block checksums depend on what file they are in and location within the file. This way, misplaced SST data is no more likely to pass checksum verification than randomly corrupted data. Also in format_version=6, SST footers are checksum-protected. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index 8b1f0f651..cecbb7c7b 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -12,7 +12,7 @@ // NOTE: in 'main' development branch, this should be the *next* // minor or major version number planned for release. #define ROCKSDB_MAJOR 8 -#define ROCKSDB_MINOR 7 +#define ROCKSDB_MINOR 8 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with diff --git a/tools/check_format_compatible.sh b/tools/check_format_compatible.sh index eff949ce0..586668096 100755 --- a/tools/check_format_compatible.sh +++ b/tools/check_format_compatible.sh @@ -125,7 +125,7 @@ EOF # To check for DB forward compatibility with loading options (old version # reading data from new), as well as backward compatibility -declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb") +declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb" "8.7.fb") # To check for DB forward compatibility without loading options (in addition # to the "with loading options" set), as well as backward compatibility declare -a db_forward_no_options_refs=() # N/A at the moment diff --git a/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md b/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md deleted file mode 100644 index 430101766..000000000 --- a/unreleased_history/behavior_changes/buffered_io_compaction_readahead_size_zero.md +++ /dev/null @@ -1 +0,0 @@ -Compaction read performance will regress when `Options::compaction_readahead_size` is explicitly set to 0 diff --git a/unreleased_history/behavior_changes/exclude_some_l0_size_amp.md b/unreleased_history/behavior_changes/exclude_some_l0_size_amp.md deleted file mode 100644 index 3c73e6789..000000000 --- a/unreleased_history/behavior_changes/exclude_some_l0_size_amp.md +++ /dev/null @@ -1 +0,0 @@ -Universal size amp compaction will conditionally exclude some of the newest L0 files when selecting input with a small negative impact to size amp. This is to prevent a large number of L0 files from being locked by a size amp compaction, potentially leading to write stop with a few more flushes. diff --git a/unreleased_history/behavior_changes/ldb_scan_command_output_change.md b/unreleased_history/behavior_changes/ldb_scan_command_output_change.md deleted file mode 100644 index 806abd4c0..000000000 --- a/unreleased_history/behavior_changes/ldb_scan_command_output_change.md +++ /dev/null @@ -1 +0,0 @@ -Change ldb scan command delimiter from ':' to '==>'. diff --git a/unreleased_history/bug_fixes/001_check_iter_status_data_loss.md b/unreleased_history/bug_fixes/001_check_iter_status_data_loss.md deleted file mode 100644 index 1cedc7215..000000000 --- a/unreleased_history/bug_fixes/001_check_iter_status_data_loss.md +++ /dev/null @@ -1 +0,0 @@ -* Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md b/unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md deleted file mode 100644 index 3e060b658..000000000 --- a/unreleased_history/bug_fixes/010_check_more_iter_status_for_delete_range.md +++ /dev/null @@ -1 +0,0 @@ -* Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md b/unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md deleted file mode 100644 index 82893ae65..000000000 --- a/unreleased_history/bug_fixes/100_atomic_flush_db_stuck_fix.md +++ /dev/null @@ -1 +0,0 @@ -* Fix a bug with atomic_flush=true that can cause DB to stuck after a flush fails (#11872). \ No newline at end of file diff --git a/unreleased_history/bug_fixes/100_rollback_pending_flush.md b/unreleased_history/bug_fixes/100_rollback_pending_flush.md deleted file mode 100644 index 8ca2b1296..000000000 --- a/unreleased_history/bug_fixes/100_rollback_pending_flush.md +++ /dev/null @@ -1 +0,0 @@ -* Fix a bug where RocksDB (with atomic_flush=false) can delete output SST files of pending flushes when a previous concurrent flush fails (#11865). This can result in DB entering read-only state with error message like `IO error: No such file or directory: While open a file for random read: /tmp/rocksdbtest-501/db_flush_test_87732_4230653031040984171/000013.sst`. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/auto_tuning_async_fix.md b/unreleased_history/bug_fixes/auto_tuning_async_fix.md deleted file mode 100644 index 0a54555cd..000000000 --- a/unreleased_history/bug_fixes/auto_tuning_async_fix.md +++ /dev/null @@ -1 +0,0 @@ -Fix an assertion fault during seek with async_io when readahead trimming is enabled. diff --git a/unreleased_history/bug_fixes/compressed_sec_cache_disable.md b/unreleased_history/bug_fixes/compressed_sec_cache_disable.md deleted file mode 100644 index 9c80f4474..000000000 --- a/unreleased_history/bug_fixes/compressed_sec_cache_disable.md +++ /dev/null @@ -1 +0,0 @@ -When the compressed secondary cache capacity is reduced to 0, it should be completely disabled. Before this fix, inserts and lookups would still go to the backing `LRUCache` before returning, thus incurring locking overhead. With this fix, inserts and lookups are no-ops and do not add any overhead. diff --git a/unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md b/unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md deleted file mode 100644 index 9cebc73c1..000000000 --- a/unreleased_history/bug_fixes/dyn_tiered_cache_capacity.md +++ /dev/null @@ -1 +0,0 @@ -Updating the tiered cache (cache allocated using NewTieredCache()) by calling SetCapacity() on it was not working properly. The initial creation would set the primary cache capacity to the combined primary and compressed secondary cache capacity. But SetCapacity() would just set the primary cache capacity. With this fix, the user always specifies the total budget and compressed secondary cache ratio on creation. Subsequently, SetCapacity() will distribute the new capacity across the two caches by the same ratio. diff --git a/unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md b/unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md deleted file mode 100644 index f9e8db661..000000000 --- a/unreleased_history/bug_fixes/fix_multiget_sv_cleanup.md +++ /dev/null @@ -1 +0,0 @@ -Fixed a bug in `MultiGet` for cleaning up SuperVersion acquired with locking db mutex. diff --git a/unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md b/unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md deleted file mode 100644 index d52621932..000000000 --- a/unreleased_history/bug_fixes/fix_row_cache_falsely_return_kNotFound.md +++ /dev/null @@ -1 +0,0 @@ -* Fix a bug where row cache can falsely return kNotFound even though row cache entry is hit. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md b/unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md deleted file mode 100644 index 8f789e186..000000000 --- a/unreleased_history/bug_fixes/fixed_generic_rate_limiter_hang.md +++ /dev/null @@ -1 +0,0 @@ -Fixed a race condition in `GenericRateLimiter` that could cause it to stop granting requests diff --git a/unreleased_history/bug_fixes/no_compaction_scheduled_bug.md b/unreleased_history/bug_fixes/no_compaction_scheduled_bug.md deleted file mode 100644 index 8ac2f1ebb..000000000 --- a/unreleased_history/bug_fixes/no_compaction_scheduled_bug.md +++ /dev/null @@ -1 +0,0 @@ -* Fix a bug (Issue #10257) where DB can hang after write stall since no compaction is scheduled (#11764). \ No newline at end of file diff --git a/unreleased_history/bug_fixes/opt_seek.md b/unreleased_history/bug_fixes/opt_seek.md deleted file mode 100644 index 742c3d60f..000000000 --- a/unreleased_history/bug_fixes/opt_seek.md +++ /dev/null @@ -1 +0,0 @@ -Add a fix for async_io where during seek, when reading a block for seeking a target key in a file without any readahead, the iterator aligned the read on a page boundary and reading more than necessary. This increased the storage read bandwidth usage. diff --git a/unreleased_history/bug_fixes/sst_dump_for_udt.md b/unreleased_history/bug_fixes/sst_dump_for_udt.md deleted file mode 100644 index e8b483ebc..000000000 --- a/unreleased_history/bug_fixes/sst_dump_for_udt.md +++ /dev/null @@ -1 +0,0 @@ -Fix an issue in sst dump tool to handle bounds specified for data with user-defined timestamps. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/upper_bound_autoreadahead.md b/unreleased_history/bug_fixes/upper_bound_autoreadahead.md deleted file mode 100644 index 5ad9ddbc4..000000000 --- a/unreleased_history/bug_fixes/upper_bound_autoreadahead.md +++ /dev/null @@ -1 +0,0 @@ -* When auto_readahead_size is enabled, update readahead upper bound during readahead trimming when reseek changes iterate_upper_bound dynamically. diff --git a/unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md b/unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md deleted file mode 100644 index 7c2f921fb..000000000 --- a/unreleased_history/bug_fixes/verify_file_checksum_stat_bug.md +++ /dev/null @@ -1 +0,0 @@ -Fixed a bug where `rocksdb.file.read.verify.file.checksums.micros` is not populated diff --git a/unreleased_history/new_features/auto_hcc.md b/unreleased_history/new_features/auto_hcc.md deleted file mode 100644 index f0c83d181..000000000 --- a/unreleased_history/new_features/auto_hcc.md +++ /dev/null @@ -1 +0,0 @@ -Added an experimental new "automatic" variant of HyperClockCache that does not require a prior estimate of the average size of cache entries. This variant is activated when HyperClockCacheOptions::estimated\_entry\_charge = 0 and has essentially the same concurrency benefits as the existing HyperClockCache. diff --git a/unreleased_history/new_features/compaction_time_stats.md b/unreleased_history/new_features/compaction_time_stats.md deleted file mode 100644 index 6aa3e508c..000000000 --- a/unreleased_history/new_features/compaction_time_stats.md +++ /dev/null @@ -1 +0,0 @@ -* Add a new statistic `COMPACTION_CPU_TOTAL_TIME` that records cumulative compaction cpu time. This ticker is updated regularly while a compaction is running. \ No newline at end of file diff --git a/unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md b/unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md deleted file mode 100644 index b974fb08b..000000000 --- a/unreleased_history/new_features/get_entity_in_secondary_and_readonly_db.md +++ /dev/null @@ -1 +0,0 @@ -Add `GetEntity()` API for ReadOnly DB and Secondary DB. diff --git a/unreleased_history/new_features/iterator-refresh-snapshot.md b/unreleased_history/new_features/iterator-refresh-snapshot.md deleted file mode 100644 index f8a0e7b43..000000000 --- a/unreleased_history/new_features/iterator-refresh-snapshot.md +++ /dev/null @@ -1 +0,0 @@ -Add a new iterator API `Iterator::Refresh(const Snapshot *)` that allows iterator to be refreshed while using the input snapshot to read. \ No newline at end of file diff --git a/unreleased_history/new_features/merge_operand_count_threshold.md b/unreleased_history/new_features/merge_operand_count_threshold.md deleted file mode 100644 index cd73d71f2..000000000 --- a/unreleased_history/new_features/merge_operand_count_threshold.md +++ /dev/null @@ -1 +0,0 @@ -Added a new read option `merge_operand_count_threshold`. When the number of merge operands applied during a successful point lookup exceeds this threshold, the query will return a special OK status with a new subcode `kMergeOperandThresholdExceeded`. Applications might use this signal to take action to reduce the number of merge operands for the affected key(s), for example by running a compaction. diff --git a/unreleased_history/new_features/mutable_bloom_before.md b/unreleased_history/new_features/mutable_bloom_before.md deleted file mode 100644 index c811b6aea..000000000 --- a/unreleased_history/new_features/mutable_bloom_before.md +++ /dev/null @@ -1 +0,0 @@ -For `NewRibbonFilterPolicy()`, made the `bloom_before_level` option mutable through the Configurable interface and the SetOptions API, allowing dynamic switching between all-Bloom and all-Ribbon configurations, and configurations in between. See comments on `NewRibbonFilterPolicy()` diff --git a/unreleased_history/new_features/secondary_cache_stacking.md b/unreleased_history/new_features/secondary_cache_stacking.md deleted file mode 100644 index 05e1bb242..000000000 --- a/unreleased_history/new_features/secondary_cache_stacking.md +++ /dev/null @@ -1 +0,0 @@ -RocksDB now allows the block cache to be stacked on top of a compressed secondary cache and a non-volatile secondary cache, thus creating a three-tier cache. To set it up, use the `NewTieredCache()` API in rocksdb/cache.h.. diff --git a/unreleased_history/new_features/wide_column_full_merge.md b/unreleased_history/new_features/wide_column_full_merge.md deleted file mode 100644 index 480750820..000000000 --- a/unreleased_history/new_features/wide_column_full_merge.md +++ /dev/null @@ -1 +0,0 @@ -Added a new wide-column aware full merge API called `FullMergeV3` to `MergeOperator`. `FullMergeV3` supports wide columns both as base value and merge result, which enables the application to perform more general transformations during merges. For backward compatibility, the default implementation implements the earlier logic of applying the merge operation to the default column of any wide-column entities. Specifically, if there is no base value or the base value is a plain key-value, the default implementation falls back to `FullMergeV2`. If the base value is a wide-column entity, the default implementation invokes `FullMergeV2` to perform the merge on the default column, and leaves any other columns unchanged. diff --git a/unreleased_history/new_features/wide_column_support_in_ldb.md b/unreleased_history/new_features/wide_column_support_in_ldb.md deleted file mode 100644 index 24e7621f6..000000000 --- a/unreleased_history/new_features/wide_column_support_in_ldb.md +++ /dev/null @@ -1 +0,0 @@ -Add wide column support to ldb commands (scan, dump, idump, dump_wal) and sst_dump tool's scan command diff --git a/unreleased_history/performance_improvements/auto_readahead_size.md b/unreleased_history/performance_improvements/auto_readahead_size.md deleted file mode 100644 index 57622ff12..000000000 --- a/unreleased_history/performance_improvements/auto_readahead_size.md +++ /dev/null @@ -1 +0,0 @@ -Added additional improvements in tuning readahead_size during Scans when auto_readahead_size is enabled. However it's not supported with Iterator::Prev operation and will return NotSupported error. diff --git a/unreleased_history/performance_improvements/avoid_double_lookup.md b/unreleased_history/performance_improvements/avoid_double_lookup.md deleted file mode 100644 index d99a8707c..000000000 --- a/unreleased_history/performance_improvements/avoid_double_lookup.md +++ /dev/null @@ -1 +0,0 @@ -During async_io, the Seek happens in 2 phases. Phase 1 starts an asynchronous read on a block cache miss, and phase 2 waits for it to complete and finishes the seek. In both phases, it tries to lookup the block cache for the data block first before looking in the prefetch buffer. It's optimized by doing the block cache lookup only in the first phase that would save some CPU. diff --git a/unreleased_history/public_api_changes/compact_filter_context_more_info.md b/unreleased_history/public_api_changes/compact_filter_context_more_info.md deleted file mode 100644 index 3d821fa68..000000000 --- a/unreleased_history/public_api_changes/compact_filter_context_more_info.md +++ /dev/null @@ -1 +0,0 @@ -Expose more information about input files used in table creation (if any) in `CompactionFilter::Context`. See `CompactionFilter::Context::input_start_level`,`CompactionFilter::Context::input_table_properties` for more. diff --git a/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md b/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md deleted file mode 100644 index f86fd82ea..000000000 --- a/unreleased_history/public_api_changes/compaction_readahead_size_option_change.md +++ /dev/null @@ -1 +0,0 @@ -`Options::compaction_readahead_size` 's default value is changed from 0 to 2MB. diff --git a/unreleased_history/public_api_changes/compression_options_level_lz4.md b/unreleased_history/public_api_changes/compression_options_level_lz4.md deleted file mode 100644 index b0f0b56f4..000000000 --- a/unreleased_history/public_api_changes/compression_options_level_lz4.md +++ /dev/null @@ -1 +0,0 @@ -* When using LZ4 compression, the `acceleration` parameter is configurable by setting the negated value in `CompressionOptions::level`. For example, `CompressionOptions::level=-10` will set `acceleration=10` diff --git a/unreleased_history/public_api_changes/dyn_tiered_cache_update.md b/unreleased_history/public_api_changes/dyn_tiered_cache_update.md deleted file mode 100644 index ebe0c3925..000000000 --- a/unreleased_history/public_api_changes/dyn_tiered_cache_update.md +++ /dev/null @@ -1 +0,0 @@ -The `NewTieredCache` API has been changed to take the total cache capacity (inclusive of both the primary and the compressed secondary cache) and the ratio of total capacity to allocate to the compressed cache. These are specified in `TieredCacheOptions`. Any capacity specified in `LRUCacheOptions`, `HyperClockCacheOptions` and `CompressedSecondaryCacheOptions` is ignored. A new API, `UpdateTieredCache` is provided to dynamically update the total capacity, ratio of compressed cache, and admission policy. diff --git a/unreleased_history/public_api_changes/new_tiered_cache.md b/unreleased_history/public_api_changes/new_tiered_cache.md deleted file mode 100644 index 935e6a7ad..000000000 --- a/unreleased_history/public_api_changes/new_tiered_cache.md +++ /dev/null @@ -1 +0,0 @@ -The `NewTieredVolatileCache()` API in rocksdb/cache.h has been renamed to `NewTieredCache()`. From bd655b9af314f8d654221f553b0fede84325a4f6 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Mon, 25 Sep 2023 09:06:22 -0700 Subject: [PATCH 155/386] Disable AutoReadaheadSize in stress tests (#11883) Summary: Crash tests are failing with recent change of auto_readahead_size. Disable it in stress tests and enable it with fix to clear the crash tests failures. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11883 Reviewed By: pdillinger Differential Revision: D49597854 Pulled By: akankshamahajan15 fbshipit-source-id: 0af8ca7414ee9b92f244ee0fb811579c3c052b41 --- tools/db_crashtest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 49e9094f1..dbfe1c68b 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -217,7 +217,7 @@ "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]), # 0 (disable) is the default and more commonly used value. "bottommost_file_compaction_delay": lambda: random.choice([0, 0, 0, 600, 3600, 86400]), - "auto_readahead_size" : lambda: random.choice([0, 1]), + "auto_readahead_size" : 0, } _TEST_DIR_ENV_VAR = "TEST_TMPDIR" From 1c871a4d8682ea260ba3b18ed43cd525a2141733 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 25 Sep 2023 09:34:39 -0700 Subject: [PATCH 156/386] Only flush after recovery for retryable IOError (#11880) Summary: https://github.com/facebook/rocksdb/issues/11872 causes a unit test to start failing with the error message below. The cause is that the additional call to `FlushAllColumnFamilies()` in `DBImpl::ResumeImpl()` can run while DB is closing. More detailed explanation: there are two places where we call `ResumeImpl()`: 1. in `ErrorHandler::RecoverFromBGError`, for manual resume or recovery from errors like OutOfSpace through sst file manager, and 2. in `Errorhandler::RecoverFromRetryableBGIOError`, for error recovery from errors like flush failure due to retryable IOError. This is tracked by `ErrorHandler::recovery_thread_`. Here is how DB close waits for error recovery: https://github.com/facebook/rocksdb/blob/49da91ec097b4efcd8a8e4dc1b287e9f81eb4093/db/db_impl/db_impl.cc#L540-L543 `CancelErrorRecovery()` waits until `recovery_thread_` finishes and `IsRecoveryInProgress()` checks the `recovery_in_prog_` flag. The additional call to `FlushAllColumnFamilies()` in `ResumeImpl()` happens after it clears bg error and the `recovery_in_prog_` flag: https://github.com/facebook/rocksdb/blob/49da91ec097b4efcd8a8e4dc1b287e9f81eb4093/db/db_impl/db_impl.cc#L436-L463. So if `ResumeImpl()` is called in `RecoverFromBGError()`, we can have a thread running `FlushAllColumnFamilies()` while DB is closing and thought that recovery is done. The fix is to only do the additional call to `FlushAllColumnFamilies()` when doing error recovery through `Errorhandler::RecoverFromRetryableBGIOError` by setting flags in `DBRecoverContext`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11880 Test Plan: `gtest-parallel --repeat=100 --workers=4 ./error_handler_fs_test --gtest_filter="*AutoRecoverFlushError*"` reproduces the error pretty reliably. ```[==========] Running 1 test from 1 test case. [----------] Global test environment set-up. [----------] 1 test from DBErrorHandlingFSTest [ RUN ] DBErrorHandlingFSTest.AutoRecoverFlushError error_handler_fs_test: db/column_family.cc:1618: rocksdb::ColumnFamilySet::~ColumnFamilySet(): Assertion `last_ref' failed. Received signal 6 (Aborted) ... https://github.com/facebook/rocksdb/issues/10 0x00007fac4409efd6 in __GI___assert_fail (assertion=0x7fac452c0afa "last_ref", file=0x7fac452c9fb5 "db/column_family.cc", line=1618, function=0x7fac452cb950 "rocksdb::ColumnFamilySet::~ColumnFamilySet()") at assert.c:101 101 in assert.c https://github.com/facebook/rocksdb/issues/11 0x00007fac44b5324f in rocksdb::ColumnFamilySet::~ColumnFamilySet (this=0x7b5400000000) at db/column_family.cc:1618 1618 assert(last_ref); https://github.com/facebook/rocksdb/issues/12 0x00007fac44e0f047 in std::default_delete::operator() (this=0x7b5800000940, __ptr=0x7b5400000000) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85 85 delete __ptr; https://github.com/facebook/rocksdb/issues/13 std::__uniq_ptr_impl >::reset (this=0x7b5800000940, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:182 182 _M_deleter()(__old_p); https://github.com/facebook/rocksdb/issues/14 std::unique_ptr >::reset (this=0x7b5800000940, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:456 456 _M_t.reset(std::move(__p)); https://github.com/facebook/rocksdb/issues/15 rocksdb::VersionSet::~VersionSet (this=this@entry=0x7b5800000900) at db/version_set.cc:5081 5081 column_family_set_.reset(); https://github.com/facebook/rocksdb/issues/16 0x00007fac44e0f97a in rocksdb::VersionSet::~VersionSet (this=0x7b5800000900) at db/version_set.cc:5078 5078 VersionSet::~VersionSet() { https://github.com/facebook/rocksdb/issues/17 0x00007fac44bf0b2f in std::default_delete::operator() (this=0x7b8c00000068, __ptr=0x7b5800000900) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85 85 delete __ptr; https://github.com/facebook/rocksdb/issues/18 std::__uniq_ptr_impl >::reset (this=0x7b8c00000068, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:182 182 _M_deleter()(__old_p); https://github.com/facebook/rocksdb/issues/19 std::unique_ptr >::reset (this=0x7b8c00000068, __p=0x0) at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:456 456 _M_t.reset(std::move(__p)); https://github.com/facebook/rocksdb/issues/20 rocksdb::DBImpl::CloseHelper (this=this@entry=0x7b8c00000000) at db/db_impl/db_impl.cc:676 676 versions_.reset(); https://github.com/facebook/rocksdb/issues/21 0x00007fac44bf1346 in rocksdb::DBImpl::CloseImpl (this=0x7b8c00000000) at db/db_impl/db_impl.cc:720 720 Status DBImpl::CloseImpl() { return CloseHelper(); } https://github.com/facebook/rocksdb/issues/22 rocksdb::DBImpl::~DBImpl (this=this@entry=0x7b8c00000000) at db/db_impl/db_impl.cc:738 738 closing_status_ = CloseImpl(); https://github.com/facebook/rocksdb/issues/23 0x00007fac44bf2bba in rocksdb::DBImpl::~DBImpl (this=0x7b8c00000000) at db/db_impl/db_impl.cc:722 722 DBImpl::~DBImpl() { https://github.com/facebook/rocksdb/issues/24 0x00007fac455444d4 in rocksdb::DBTestBase::Close (this=this@entry=0x7b6c00000000) at db/db_test_util.cc:678 678 delete db_; https://github.com/facebook/rocksdb/issues/25 0x00007fac455455fb in rocksdb::DBTestBase::TryReopen (this=this@entry=0x7b6c00000000, options=...) at db/db_test_util.cc:707 707 Close(); https://github.com/facebook/rocksdb/issues/26 0x00007fac45543459 in rocksdb::DBTestBase::Reopen (this=0x7ffed74b79a0, options=...) at db/db_test_util.cc:670 670 ASSERT_OK(TryReopen(options)); https://github.com/facebook/rocksdb/issues/27 0x00000000004f2522 in rocksdb::DBErrorHandlingFSTest_AutoRecoverFlushError_Test::TestBody (this=this@entry=0x7b6c00000000) at db/error_handler_fs_test.cc:1224 1224 Reopen(options); ``` Reviewed By: jowlyzhang Differential Revision: D49579701 Pulled By: cbi42 fbshipit-source-id: 3fc8325e6dde7e7faa8bcad95060cb4e26eda638 --- db/db_impl/db_impl.cc | 14 ++++++++------ db/error_handler.cc | 1 + db/error_handler.h | 9 ++++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index ace2899e9..1af0a1fec 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -453,7 +453,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { if (shutdown_initiated_) { s = Status::ShutdownInProgress(); } - if (s.ok()) { + if (s.ok() && context.flush_after_recovery) { // Since we drop all non-recovery flush requests during recovery, // and new memtable may fill up during recovery, // schedule one more round of flush. @@ -472,14 +472,16 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // FlushAllColumnFamilies releases and re-acquires mutex. if (shutdown_initiated_) { s = Status::ShutdownInProgress(); - } else { - for (auto cfd : *versions_->GetColumnFamilySet()) { - SchedulePendingCompaction(cfd); - } - MaybeScheduleFlushOrCompaction(); } } + if (s.ok()) { + for (auto cfd : *versions_->GetColumnFamilySet()) { + SchedulePendingCompaction(cfd); + } + MaybeScheduleFlushOrCompaction(); + } + // Wake up any waiters - in this case, it could be the shutdown thread bg_cv_.SignalAll(); diff --git a/db/error_handler.cc b/db/error_handler.cc index d79455509..ddcfa5401 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -679,6 +679,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { return; } DBRecoverContext context = recover_context_; + context.flush_after_recovery = true; int resume_count = db_options_.max_bgerror_resume_count; uint64_t wait_interval = db_options_.bgerror_resume_retry_interval; uint64_t retry_count = 0; diff --git a/db/error_handler.h b/db/error_handler.h index 34e08a525..6b1e80286 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -19,10 +19,13 @@ class DBImpl; // FlushReason, which tells the flush job why this flush is called. struct DBRecoverContext { FlushReason flush_reason; + bool flush_after_recovery; - DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {} - - DBRecoverContext(FlushReason reason) : flush_reason(reason) {} + DBRecoverContext() + : flush_reason(FlushReason::kErrorRecovery), + flush_after_recovery(false) {} + DBRecoverContext(FlushReason reason) + : flush_reason(reason), flush_after_recovery(false) {} }; class ErrorHandler { From 6c564e2e174675a782de3fbaa3653e088b9b90d0 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 25 Sep 2023 19:00:39 -0700 Subject: [PATCH 157/386] Add some convenience util APIs to facilitate using U64Ts (#11888) Summary: Added some util function APIs to facilitate using the U64Ts. The U64Ts format for encoding a timestamp is not entirely RocksDB internal. When users are using the user-defined timestamp feature from the transaction layer, its public APIs including `SetCommitTimestamp`, `GetCommitTimestamp`, `SetReadTimestampForValidation` are taking and returning timestamps as uint64_t. But if users want to use the APIs from the DB layer, including populating `ReadOptions.timestamp`, interpreting `Iterator::timestamp()`, these APIs are using and returning U64Ts timestamps as an encoded Slice. So these util functions are added to facilitate the usage. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11888 Reviewed By: ltamasi Differential Revision: D49620709 Pulled By: jowlyzhang fbshipit-source-id: ace8d782ee7c3372cf410abf761320d373e495e1 --- include/rocksdb/comparator.h | 26 ++++++++++++++++++++++++++ util/comparator.cc | 26 ++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/include/rocksdb/comparator.h b/include/rocksdb/comparator.h index d0ac9f1f4..4b39a2585 100644 --- a/include/rocksdb/comparator.h +++ b/include/rocksdb/comparator.h @@ -175,4 +175,30 @@ const Comparator* BytewiseComparatorWithU64Ts(); // comes first. const Comparator* ReverseBytewiseComparatorWithU64Ts(); +// Decode a `U64Ts` timestamp returned by RocksDB to uint64_t. +// When a column family enables user-defined timestamp feature +// with `BytewiseComparatorWithU64Ts` or `ReverseBytewiseComparatorWithU64Ts` +// comparator, the `Iterator::timestamp()` API returns timestamp in `Slice` +// format. This util function helps to translate that `Slice` into an uint64_t +// type. +Status DecodeU64Ts(const Slice& ts, uint64_t* int_ts); + +// Encode an uint64_t timestamp into a U64Ts `Slice`, to be used as +// `ReadOptions.timestamp` for a column family that enables user-defined +// timestamp feature with `BytewiseComparatorWithU64Ts` or +// `ReverseBytewiseComparatorWithU64Ts` comparator. +// Be mindful that the returned `Slice` is backed by `ts_buf`. When `ts_buf` +// is deconstructed, the returned `Slice` can no longer be used. +Slice EncodeU64Ts(uint64_t ts, std::string* ts_buf); + +// Returns a `Slice` representing the maximum U64Ts timestamp. +// The returned `Slice` is backed by some static storage, so it's valid until +// program destruction. +Slice MaxU64Ts(); + +// Returns a `Slice` representing the minimum U64Ts timestamp. +// The returned `Slice` is backed by some static storage, so it's valid until +// program destruction. +Slice MinU64Ts(); + } // namespace ROCKSDB_NAMESPACE diff --git a/util/comparator.cc b/util/comparator.cc index e573f5e85..f1f249fd3 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -23,6 +23,7 @@ #include "rocksdb/slice.h" #include "rocksdb/utilities/customizable_util.h" #include "rocksdb/utilities/object_registry.h" +#include "util/coding.h" namespace ROCKSDB_NAMESPACE { @@ -328,6 +329,31 @@ const Comparator* ReverseBytewiseComparatorWithU64Ts() { return &comp_with_u64_ts; } +Status DecodeU64Ts(const Slice& ts, uint64_t* int_ts) { + if (ts.size() != sizeof(uint64_t)) { + return Status::InvalidArgument("U64Ts timestamp size mismatch."); + } + *int_ts = DecodeFixed64(ts.data()); + return Status::OK(); +} + +Slice EncodeU64Ts(uint64_t ts, std::string* ts_buf) { + char buf[sizeof(ts)]; + EncodeFixed64(buf, ts); + ts_buf->assign(buf, sizeof(buf)); + return Slice(*ts_buf); +} + +Slice MaxU64Ts() { + static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff"; + return Slice(kTsMax, sizeof(uint64_t)); +} + +Slice MinU64Ts() { + static constexpr char kTsMin[] = "\x00\x00\x00\x00\x00\x00\x00\x00"; + return Slice(kTsMin, sizeof(uint64_t)); +} + static int RegisterBuiltinComparators(ObjectLibrary& library, const std::string& /*arg*/) { library.AddFactory( From 7ea6e724faddac7f864c73afd2ce3710e02ef1ed Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 25 Sep 2023 20:15:40 -0700 Subject: [PATCH 158/386] Mark recovery_in_prog_ to false whenever recovery thread joins (#11890) Summary: Make the `RecoverFromRetryableBGIOError` function always mark `recovery_in_prog_` to false when it returns. Otherwise, in below code snippet, when db closes and the `error_handler_.CancelErrorRecovery()` call successfully joined the recovery thread, the immediately following while loop will incorrectly think the error recovery is still in progress and loops in `bg_cv_.Wait()`. https://github.com/facebook/rocksdb/blob/1c871a4d8682ea260ba3b18ed43cd525a2141733/db/db_impl/db_impl.cc#L542-L545 This is the issue https://github.com/facebook/rocksdb/issues/11440 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11890 Reviewed By: anand1976 Differential Revision: D49624216 Pulled By: jowlyzhang fbshipit-source-id: ee10cf6527d95b8dd4705a326eb6208d741fe002 --- db/error_handler.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/db/error_handler.cc b/db/error_handler.cc index ddcfa5401..04a988318 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -676,6 +676,8 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_, Status::ShutdownInProgress(), db_mutex_); + + recovery_in_prog_ = false; return; } DBRecoverContext context = recover_context_; @@ -689,6 +691,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_, Status::ShutdownInProgress(), db_mutex_); + recovery_in_prog_ = false; return; } TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume0"); From 719f5511f65d75d5b0332016bb66fa647ab6a76d Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 26 Sep 2023 10:08:43 -0700 Subject: [PATCH 159/386] No file system prefetching when Options::compaction_readahead_size is 0 (#11887) Summary: **Context/Summary:** https://github.com/facebook/rocksdb/pull/11631 introduced `readahead()` system call for compaction read under non direct IO. When `Options::compaction_readahead_size` is 0, the `readahead()` will issued with a small size (i.e, the block size, by default 4KB) Benchmarks shows that such readahead() call regresses the compaction read compared with "no readahead()" case (see Test Plan for more). Therefore we decided to not issue such `readhead() ` when `Options::compaction_readahead_size` is 0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11887 Test Plan: Settings: `compaction_readahead_size = 0, use_direct_reads=false` Setup: ``` TEST_TMPDIR=../ ./db_bench -benchmarks=filluniquerandom -disable_auto_compactions=true -write_buffer_size=1048576 -compression_type=none -value_size=10240 && tar -cf ../dbbench.tar -C ../dbbench/ . ``` Run: ``` for i in $(seq 3); do rm -rf ../dbbench/ && mkdir -p ../dbbench/ && tar -xf ../dbbench.tar -C ../dbbench/ . && sudo bash -c 'sync && echo 3 > /proc/sys/vm/drop_caches' && TEST_TMPDIR=../ /usr/bin/time ./db_bench_{pre_PR11631|PR11631|PR11631_with_improvementPR11887} -benchmarks=compact -use_existing_db=true -db=../dbbench/ -disable_auto_compactions=true -compression_type=none ; done |& grep elapsed ``` pre-PR11631("no readahead()" case): PR11631: PR11631+this improvement: Reviewed By: ajkr Differential Revision: D49607266 Pulled By: hx235 fbshipit-source-id: 2efa0dc91bac3c11cc2be057c53d894645f683ef --- table/block_based/block_prefetcher.cc | 2 +- .../no_fs_prefetch_on_zero_compaction_readahead.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 218d3821f..19810cc6c 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -20,7 +20,7 @@ void BlockPrefetcher::PrefetchIfNeeded( const size_t len = BlockBasedTable::BlockSizeWithTrailer(handle); const size_t offset = handle.offset(); if (is_for_compaction) { - if (!rep->file->use_direct_io()) { + if (!rep->file->use_direct_io() && compaction_readahead_size_ > 0) { // If FS supports prefetching (readahead_limit_ will be non zero in that // case) and current block exists in prefetch buffer then return. if (offset + len <= readahead_limit_) { diff --git a/unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md b/unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md new file mode 100644 index 000000000..e09f693ef --- /dev/null +++ b/unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md @@ -0,0 +1 @@ +For non direct IO, eliminate the file system prefetching attempt for compaction read when `Options::compaction_readahead_size` is 0 From fce04587b80db3d51d422ed8b1c52bd6291c9efc Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 26 Sep 2023 18:44:41 -0700 Subject: [PATCH 160/386] Only fallback to RocksDB internal prefetching on unsupported FS prefetching (#11897) Summary: **Context/Summary:** https://github.com/facebook/rocksdb/pull/11631 introduced an undesired fallback behavior to RocksDB internal prefetching even when FS prefetching return non-OK status other than "Unsupported". We only want to fall back when FS prefetching is not supported. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11897 Test Plan: CI Reviewed By: ajkr Differential Revision: D49667055 Pulled By: hx235 fbshipit-source-id: fa36e4e5d6dc9507080217035f9d6ff8e4abda28 --- table/block_based/block_prefetcher.cc | 7 +++---- unreleased_history/bug_fixes/fallback_only_unsupported.md | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 unreleased_history/bug_fixes/fallback_only_unsupported.md diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 19810cc6c..db2d546f6 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -35,11 +35,12 @@ void BlockPrefetcher::PrefetchIfNeeded( if (s.ok()) { readahead_limit_ = offset + len + compaction_readahead_size_; return; + } else if (!s.IsNotSupported()) { + return; } } // If FS prefetch is not supported, fall back to use internal prefetch - // buffer. Discarding other return status of Prefetch calls intentionally, - // as we can fallback to reading from disk if Prefetch fails. + // buffer. // // num_file_reads is used by FilePrefetchBuffer only when // implicit_auto_readahead is set. @@ -122,8 +123,6 @@ void BlockPrefetcher::PrefetchIfNeeded( } // If prefetch is not supported, fall back to use internal prefetch buffer. - // Discarding other return status of Prefetch calls intentionally, as - // we can fallback to reading from disk if Prefetch fails. IOOptions opts; Status s = rep->file->PrepareIOOptions(read_options, opts); if (!s.ok()) { diff --git a/unreleased_history/bug_fixes/fallback_only_unsupported.md b/unreleased_history/bug_fixes/fallback_only_unsupported.md new file mode 100644 index 000000000..feb02ce3b --- /dev/null +++ b/unreleased_history/bug_fixes/fallback_only_unsupported.md @@ -0,0 +1 @@ +Fixed a bug where compaction read under non direct IO still falls back to RocksDB internal prefetching after file system's prefetching returns non-OK status other than `Status::NotSupported()` From 35a02502939892e77454e2c71730363c34646121 Mon Sep 17 00:00:00 2001 From: anand76 Date: Wed, 27 Sep 2023 12:08:08 -0700 Subject: [PATCH 161/386] Don't call InsertSaved on compressed only secondary cache (#11889) Summary: In https://github.com/facebook/rocksdb/issues/11812, the ```CacheWithSecondaryAdapter::Insert``` calls ```InsertSaved``` on the secondary cache to warm it up with the compressed blocks. This should only be done if its a stacked cache with compressed and nvm cache. If its in-memory compressed only, then don't call ```InsertSaved```. Tests: Add a new unit test Pull Request resolved: https://github.com/facebook/rocksdb/pull/11889 Reviewed By: akankshamahajan15 Differential Revision: D49615758 Pulled By: anand1976 fbshipit-source-id: 156ff968ad014ac319f8840da7a48193e4cebfa9 --- cache/secondary_cache_adapter.cc | 43 ++++++++++--- cache/tiered_secondary_cache_test.cc | 90 ++++++++++++++++++++++++---- 2 files changed, 114 insertions(+), 19 deletions(-) diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index 28027811a..d6b347246 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -241,9 +241,10 @@ Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value, } // Warm up the secondary cache with the compressed block. The secondary // cache may choose to ignore it based on the admission policy. - if (value != nullptr && !compressed_value.empty()) { + if (value != nullptr && !compressed_value.empty() && + adm_policy_ == TieredAdmissionPolicy::kAdmPolicyThreeQueue) { Status status = secondary_cache_->InsertSaved(key, compressed_value, type); - assert(status.ok()); + assert(status.ok() || status.IsNotSupported()); } return s; @@ -575,11 +576,40 @@ std::shared_ptr NewTieredCache(const TieredCacheOptions& _opts) { return nullptr; } - if (_opts.adm_policy >= TieredAdmissionPolicy::kAdmPolicyMax) { - return nullptr; + TieredCacheOptions opts = _opts; + { + bool valid_adm_policy = true; + + switch (_opts.adm_policy) { + case TieredAdmissionPolicy::kAdmPolicyAuto: + // Select an appropriate default policy + if (opts.adm_policy == TieredAdmissionPolicy::kAdmPolicyAuto) { + if (opts.nvm_sec_cache) { + opts.adm_policy = TieredAdmissionPolicy::kAdmPolicyThreeQueue; + } else { + opts.adm_policy = TieredAdmissionPolicy::kAdmPolicyPlaceholder; + } + } + break; + case TieredAdmissionPolicy::kAdmPolicyPlaceholder: + case TieredAdmissionPolicy::kAdmPolicyAllowCacheHits: + if (opts.nvm_sec_cache) { + valid_adm_policy = false; + } + break; + case TieredAdmissionPolicy::kAdmPolicyThreeQueue: + if (!opts.nvm_sec_cache) { + valid_adm_policy = false; + } + break; + default: + valid_adm_policy = false; + } + if (!valid_adm_policy) { + return nullptr; + } } - TieredCacheOptions opts = _opts; std::shared_ptr cache; if (opts.cache_type == PrimaryCacheType::kCacheTypeLRU) { LRUCacheOptions cache_opts = @@ -602,8 +632,7 @@ std::shared_ptr NewTieredCache(const TieredCacheOptions& _opts) { sec_cache = NewCompressedSecondaryCache(opts.comp_cache_opts); if (opts.nvm_sec_cache) { - if (opts.adm_policy == TieredAdmissionPolicy::kAdmPolicyThreeQueue || - opts.adm_policy == TieredAdmissionPolicy::kAdmPolicyAuto) { + if (opts.adm_policy == TieredAdmissionPolicy::kAdmPolicyThreeQueue) { sec_cache = std::make_shared( sec_cache, opts.nvm_sec_cache, TieredAdmissionPolicy::kAdmPolicyThreeQueue); diff --git a/cache/tiered_secondary_cache_test.cc b/cache/tiered_secondary_cache_test.cc index 69678813b..9d8cdf7fb 100644 --- a/cache/tiered_secondary_cache_test.cc +++ b/cache/tiered_secondary_cache_test.cc @@ -3,6 +3,8 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // +#include "cache/compressed_secondary_cache.h" +#include "cache/secondary_cache_adapter.h" #include "db/db_test_util.h" #include "rocksdb/cache.h" #include "rocksdb/secondary_cache.h" @@ -179,7 +181,9 @@ class DBTieredSecondaryCacheTest : public DBTestBase { std::shared_ptr NewCache(size_t pri_capacity, size_t compressed_capacity, - size_t nvm_capacity) { + size_t nvm_capacity, + TieredAdmissionPolicy adm_policy = + TieredAdmissionPolicy::kAdmPolicyAuto) { LRUCacheOptions lru_opts; TieredCacheOptions opts; lru_opts.capacity = 0; @@ -187,30 +191,30 @@ class DBTieredSecondaryCacheTest : public DBTestBase { lru_opts.high_pri_pool_ratio = 0; opts.cache_opts = &lru_opts; opts.cache_type = PrimaryCacheType::kCacheTypeLRU; - opts.adm_policy = TieredAdmissionPolicy::kAdmPolicyThreeQueue; opts.comp_cache_opts.capacity = 0; opts.comp_cache_opts.num_shard_bits = 0; opts.total_capacity = pri_capacity + compressed_capacity; opts.compressed_secondary_ratio = (double)compressed_capacity / opts.total_capacity; - nvm_sec_cache_.reset(new TestSecondaryCache(nvm_capacity)); - opts.nvm_sec_cache = nvm_sec_cache_; + if (nvm_capacity > 0) { + nvm_sec_cache_.reset(new TestSecondaryCache(nvm_capacity)); + opts.nvm_sec_cache = nvm_sec_cache_; + } + opts.adm_policy = adm_policy; cache_ = NewTieredCache(opts); assert(cache_ != nullptr); -#if 0 - CacheWithSecondaryAdapter* adapter_cache_ = - static_cast(cache_.get()); - TieredSecondaryCache* tiered_cache_ = - static_cast( - adapter_cache_->TEST_GetSecondaryCache()); -#endif - return cache_; } TestSecondaryCache* nvm_sec_cache() { return nvm_sec_cache_.get(); } + CompressedSecondaryCache* compressed_secondary_cache() { + return static_cast( + static_cast(cache_.get()) + ->TEST_GetSecondaryCache()); + } + private: std::shared_ptr cache_; std::shared_ptr nvm_sec_cache_; @@ -636,6 +640,68 @@ TEST_F(DBTieredSecondaryCacheTest, IterateTest) { Destroy(options); } +class DBTieredAdmPolicyTest + : public DBTieredSecondaryCacheTest, + public testing::WithParamInterface {}; + +TEST_P(DBTieredAdmPolicyTest, CompressedOnlyTest) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); + return; + } + + BlockBasedTableOptions table_options; + // We want a block cache of size 10KB, and a compressed secondary cache of + // size 10KB. However, we specify a block cache size of 256KB here in order + // to take into account the cache reservation in the block cache on + // behalf of the compressed cache. The unit of cache reservation is 256KB. + // The effective block cache capacity will be calculated as 256 + 10 = 266KB, + // and 256KB will be reserved for the compressed cache, leaving 10KB for + // the primary block cache. We only have to worry about this here because + // the cache size is so small. + table_options.block_cache = NewCache(256 * 1024, 10 * 1024, 0, GetParam()); + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + size_t comp_cache_usage = compressed_secondary_cache()->TEST_GetUsage(); + // Disable paranoid_file_checks so that flush will not read back the newly + // written file + options.paranoid_file_checks = false; + DestroyAndReopen(options); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v; + test::CompressibleString(&rnd, 0.5, 1007, &p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + // The first 2 Gets, for keys 0 and 5, will load the corresponding data + // blocks as they will be cache misses. Since this is a 2-tier cache ( + // primary and compressed), no warm-up should happen with the compressed + // blocks. + std::string v = Get(Key(0)); + ASSERT_EQ(1007, v.size()); + + v = Get(Key(5)); + ASSERT_EQ(1007, v.size()); + + ASSERT_EQ(compressed_secondary_cache()->TEST_GetUsage(), comp_cache_usage); + + Destroy(options); +} + +INSTANTIATE_TEST_CASE_P( + DBTieredAdmPolicyTest, DBTieredAdmPolicyTest, + ::testing::Values(TieredAdmissionPolicy::kAdmPolicyAuto, + TieredAdmissionPolicy::kAdmPolicyPlaceholder, + TieredAdmissionPolicy::kAdmPolicyAllowCacheHits)); + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 6b4315ee8badc2fd4dea2cade6cdd1acfb92db78 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 27 Sep 2023 14:53:25 -0700 Subject: [PATCH 162/386] Extend the test coverage of FullMergeV3 (#11896) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11896 The patch extends the test coverage of the wide column aware merge logic by adding two new tests that perform general transformations during merge by implementing the `FullMergeV3` interface. The first one uses a merge operator that produces a wide-column entity as result in all cases (i.e. even if the base value is a plain key-value, or if there is no base value). The second one uses a merge operator that results in a plain key-value in all cases. Reviewed By: jaykorean Differential Revision: D49665946 fbshipit-source-id: 419b9e557c064525b659685eb8c09ae446656439 --- db/wide/db_wide_basic_test.cc | 392 ++++++++++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 686dddd89..2067b6c19 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -10,6 +10,7 @@ #include "db/db_test_util.h" #include "port/stack_trace.h" #include "test_util/testutil.h" +#include "util/overload.h" #include "utilities/merge_operators.h" namespace ROCKSDB_NAMESPACE { @@ -690,6 +691,397 @@ TEST_F(DBWideBasicTest, MergeEntity) { verify_merge_ops_post_compaction(); } +class DBWideMergeV3Test : public DBWideBasicTest { + protected: + void RunTest(const WideColumns& first_expected, + const WideColumns& second_expected, + const WideColumns& third_expected) { + // Note: we'll take some snapshots to prevent merging during flush + snapshots_.reserve(6); + + // Test reading from memtables + WriteKeyValues(); + VerifyKeyValues(first_expected, second_expected, third_expected); + VerifyMergeOperandCount(first_key, 2); + VerifyMergeOperandCount(second_key, 3); + VerifyMergeOperandCount(third_key, 3); + + // Test reading from SST files + ASSERT_OK(Flush()); + VerifyKeyValues(first_expected, second_expected, third_expected); + VerifyMergeOperandCount(first_key, 2); + VerifyMergeOperandCount(second_key, 3); + VerifyMergeOperandCount(third_key, 3); + + // Test reading from SSTs after compaction. Note that we write the same KVs + // and flush again so we have two overlapping files. We also release the + // snapshots so that the compaction can merge all keys. + WriteKeyValues(); + ASSERT_OK(Flush()); + + snapshots_.clear(); + + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /* begin */ nullptr, + /* end */ nullptr)); + VerifyKeyValues(first_expected, second_expected, third_expected); + VerifyMergeOperandCount(first_key, 1); + VerifyMergeOperandCount(second_key, 1); + VerifyMergeOperandCount(third_key, 1); + } + + void WriteKeyValues() { + // Base values + ASSERT_OK(db_->Delete(WriteOptions(), db_->DefaultColumnFamily(), + first_key)); // no base value + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), second_key, + second_base_value)); // plain base value + ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), + third_key, + third_columns)); // wide-column base value + + snapshots_.emplace_back(db_); + + // First round of merge operands + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), first_key, + first_merge_op1)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), second_key, + second_merge_op1)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), third_key, + third_merge_op1)); + + snapshots_.emplace_back(db_); + + // Second round of merge operands + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), first_key, + first_merge_op2)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), second_key, + second_merge_op2)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), third_key, + third_merge_op2)); + + snapshots_.emplace_back(db_); + } + + void VerifyKeyValues(const WideColumns& first_expected, + const WideColumns& second_expected, + const WideColumns& third_expected) { + assert(!first_expected.empty() && + first_expected[0].name() == kDefaultWideColumnName); + assert(!second_expected.empty() && + second_expected[0].name() == kDefaultWideColumnName); + assert(!third_expected.empty() && + third_expected[0].name() == kDefaultWideColumnName); + + // Get + { + PinnableSlice result; + ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), first_key, + &result)); + ASSERT_EQ(result, first_expected[0].value()); + } + + { + PinnableSlice result; + ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), second_key, + &result)); + ASSERT_EQ(result, second_expected[0].value()); + } + + { + PinnableSlice result; + ASSERT_OK(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), third_key, + &result)); + ASSERT_EQ(result, third_expected[0].value()); + } + + // MultiGet + { + std::array keys{{first_key, second_key, third_key}}; + std::array values; + std::array statuses; + + db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, + keys.data(), values.data(), statuses.data()); + ASSERT_OK(statuses[0]); + ASSERT_EQ(values[0], first_expected[0].value()); + ASSERT_OK(statuses[1]); + ASSERT_EQ(values[1], second_expected[0].value()); + ASSERT_OK(statuses[2]); + ASSERT_EQ(values[2], third_expected[0].value()); + } + + // GetEntity + { + PinnableWideColumns result; + + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), + first_key, &result)); + ASSERT_EQ(result.columns(), first_expected); + } + + { + PinnableWideColumns result; + + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), + second_key, &result)); + ASSERT_EQ(result.columns(), second_expected); + } + + { + PinnableWideColumns result; + + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), + third_key, &result)); + ASSERT_EQ(result.columns(), third_expected); + } + + // MultiGetEntity + { + std::array keys{{first_key, second_key, third_key}}; + std::array results; + std::array statuses; + + db_->MultiGetEntity(ReadOptions(), db_->DefaultColumnFamily(), num_keys, + keys.data(), results.data(), statuses.data()); + ASSERT_OK(statuses[0]); + ASSERT_EQ(results[0].columns(), first_expected); + ASSERT_OK(statuses[1]); + ASSERT_EQ(results[1].columns(), second_expected); + ASSERT_OK(statuses[2]); + ASSERT_EQ(results[2].columns(), third_expected); + } + + // Iterator + { + std::unique_ptr iter(db_->NewIterator(ReadOptions())); + + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), first_key); + ASSERT_EQ(iter->value(), first_expected[0].value()); + ASSERT_EQ(iter->columns(), first_expected); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), second_key); + ASSERT_EQ(iter->value(), second_expected[0].value()); + ASSERT_EQ(iter->columns(), second_expected); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), third_key); + ASSERT_EQ(iter->value(), third_expected[0].value()); + ASSERT_EQ(iter->columns(), third_expected); + + iter->Next(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + + iter->SeekToLast(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), third_key); + ASSERT_EQ(iter->value(), third_expected[0].value()); + ASSERT_EQ(iter->columns(), third_expected); + + iter->Prev(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), second_key); + ASSERT_EQ(iter->value(), second_expected[0].value()); + ASSERT_EQ(iter->columns(), second_expected); + + iter->Prev(); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + ASSERT_EQ(iter->key(), first_key); + ASSERT_EQ(iter->value(), first_expected[0].value()); + ASSERT_EQ(iter->columns(), first_expected); + + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); + } + } + + void VerifyMergeOperandCount(const Slice& key, int expected_merge_ops) { + GetMergeOperandsOptions get_merge_opts; + get_merge_opts.expected_max_number_of_operands = expected_merge_ops; + + std::vector merge_operands(expected_merge_ops); + int number_of_operands = 0; + + ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), + key, merge_operands.data(), &get_merge_opts, + &number_of_operands)); + ASSERT_EQ(number_of_operands, expected_merge_ops); + } + + std::vector snapshots_; + + static constexpr size_t num_keys = 3; + + static constexpr char first_key[] = "first"; + static constexpr char first_merge_op1[] = "hello"; + static constexpr char first_merge_op1_upper[] = "HELLO"; + static constexpr char first_merge_op2[] = "world"; + static constexpr char first_merge_op2_upper[] = "WORLD"; + + static constexpr char second_key[] = "second"; + static constexpr char second_base_value[] = "foo"; + static constexpr char second_base_value_upper[] = "FOO"; + static constexpr char second_merge_op1[] = "bar"; + static constexpr char second_merge_op1_upper[] = "BAR"; + static constexpr char second_merge_op2[] = "baz"; + static constexpr char second_merge_op2_upper[] = "BAZ"; + + static constexpr char third_key[] = "third"; + static const WideColumns third_columns; + static constexpr char third_merge_op1[] = "three"; + static constexpr char third_merge_op1_upper[] = "THREE"; + static constexpr char third_merge_op2[] = "four"; + static constexpr char third_merge_op2_upper[] = "FOUR"; +}; + +const WideColumns DBWideMergeV3Test::third_columns{{"one", "ONE"}, + {"two", "TWO"}}; + +TEST_F(DBWideMergeV3Test, MergeV3WideColumnOutput) { + // A test merge operator that always returns a wide-column result. It adds any + // base values and merge operands to a single wide-column entity, and converts + // all column values to uppercase. In addition, it puts "none", "plain", or + // "wide" into the value of the default column depending on the type of the + // base value (if any). + static constexpr char kNone[] = "none"; + static constexpr char kPlain[] = "plain"; + static constexpr char kWide[] = "wide"; + + class WideColumnOutputMergeOperator : public MergeOperator { + public: + bool FullMergeV3(const MergeOperationInputV3& merge_in, + MergeOperationOutputV3* merge_out) const override { + assert(merge_out); + + merge_out->new_value = MergeOperationOutputV3::NewColumns(); + auto& new_columns = + std::get(merge_out->new_value); + + auto upper = [](std::string str) { + for (char& c : str) { + c = static_cast(std::toupper(static_cast(c))); + } + + return str; + }; + + std::visit(overload{[&](const std::monostate&) { + new_columns.emplace_back( + kDefaultWideColumnName.ToString(), kNone); + }, + [&](const Slice& value) { + new_columns.emplace_back( + kDefaultWideColumnName.ToString(), kPlain); + + const std::string val = value.ToString(); + new_columns.emplace_back(val, upper(val)); + }, + [&](const WideColumns& columns) { + new_columns.emplace_back( + kDefaultWideColumnName.ToString(), kWide); + + for (const auto& column : columns) { + new_columns.emplace_back( + column.name().ToString(), + upper(column.value().ToString())); + } + }}, + merge_in.existing_value); + + for (const auto& operand : merge_in.operand_list) { + const std::string op = operand.ToString(); + new_columns.emplace_back(op, upper(op)); + } + + return true; + } + + const char* Name() const override { + return "WideColumnOutputMergeOperator"; + } + }; + + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.merge_operator = std::make_shared(); + Reopen(options); + + // Expected results + // Lexicographical order: [default] < hello < world + const WideColumns first_expected{{kDefaultWideColumnName, kNone}, + {first_merge_op1, first_merge_op1_upper}, + {first_merge_op2, first_merge_op2_upper}}; + // Lexicographical order: [default] < bar < baz < foo + const WideColumns second_expected{ + {kDefaultWideColumnName, kPlain}, + {second_merge_op1, second_merge_op1_upper}, + {second_merge_op2, second_merge_op2_upper}, + {second_base_value, second_base_value_upper}}; + // Lexicographical order: [default] < four < one < three < two + const WideColumns third_expected{ + {kDefaultWideColumnName, kWide}, + {third_merge_op2, third_merge_op2_upper}, + {third_columns[0].name(), third_columns[0].value()}, + {third_merge_op1, third_merge_op1_upper}, + {third_columns[1].name(), third_columns[1].value()}}; + + RunTest(first_expected, second_expected, third_expected); +} + +TEST_F(DBWideMergeV3Test, MergeV3PlainOutput) { + // A test merge operator that always returns a plain value as result, namely + // the total number of operands serialized as a string. Base values are also + // counted as operands; specifically, a plain base value is counted as one + // operand, while a wide-column base value is counted as as many operands as + // the number of columns. + class PlainOutputMergeOperator : public MergeOperator { + public: + bool FullMergeV3(const MergeOperationInputV3& merge_in, + MergeOperationOutputV3* merge_out) const override { + assert(merge_out); + + size_t count = 0; + std::visit( + overload{[&](const std::monostate&) {}, + [&](const Slice&) { count = 1; }, + [&](const WideColumns& columns) { count = columns.size(); }}, + merge_in.existing_value); + + count += merge_in.operand_list.size(); + + merge_out->new_value = std::string(); + std::get(merge_out->new_value) = std::to_string(count); + + return true; + } + + const char* Name() const override { return "PlainOutputMergeOperator"; } + }; + + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.merge_operator = std::make_shared(); + Reopen(options); + + const WideColumns first_expected{{kDefaultWideColumnName, "2"}}; + const WideColumns second_expected{{kDefaultWideColumnName, "3"}}; + const WideColumns third_expected{{kDefaultWideColumnName, "4"}}; + + RunTest(first_expected, second_expected, third_expected); +} + TEST_F(DBWideBasicTest, CompactionFilter) { Options options = GetDefaultOptions(); options.create_if_missing = true; From 8b566964b8b30965ed5e70115921705379b750fe Mon Sep 17 00:00:00 2001 From: "Fan Zhang(DevX)" Date: Thu, 28 Sep 2023 10:42:04 -0700 Subject: [PATCH 163/386] remove unnecessary autodeps suppression tag from rocksdb/src (#11904) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11904 The tag is not needed, autodeps works fine with this file. it was added in D33962843 but the reason doing is not valid anymore. We are on the way of migrating most, if not all, users to autodeps, and deprecating the noautodeps tag. Changed the tag in template and run `python3 buckifier/buckify_rocksdb.py` for regeneration Reviewed By: jaykorean Differential Revision: D49711337 fbshipit-source-id: c21892adfbc92e2ad868413746a0938062b6a543 --- TARGETS | 2 -- buckifier/targets_cfg.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/TARGETS b/TARGETS index 86d62e2c5..62496a225 100644 --- a/TARGETS +++ b/TARGETS @@ -3,8 +3,6 @@ # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can # only be validated by Facebook employees. -# -# @noautodeps @nocodemods load("//rocks/buckifier:defs.bzl", "cpp_library_wrapper","rocks_cpp_library_wrapper","cpp_binary_wrapper","cpp_unittest_wrapper","fancy_bench_wrapper","add_c_test_wrapper") diff --git a/buckifier/targets_cfg.py b/buckifier/targets_cfg.py index 491c34d6e..66dd173c2 100644 --- a/buckifier/targets_cfg.py +++ b/buckifier/targets_cfg.py @@ -6,8 +6,6 @@ # --> DO NOT EDIT MANUALLY <-- # This file is a Facebook-specific integration for buck builds, so can # only be validated by Facebook employees. -# -# @noautodeps @nocodemods load("//rocks/buckifier:defs.bzl", "cpp_library_wrapper","rocks_cpp_library_wrapper","cpp_binary_wrapper","cpp_unittest_wrapper","fancy_bench_wrapper","add_c_test_wrapper") """ From 01e2d33565cf930610253bf4f2082c8ae237d1f0 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 29 Sep 2023 08:54:50 -0700 Subject: [PATCH 164/386] Add the wide-column aware merge API to the stress tests (#11906) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11906 The patch adds stress test coverage for the wide-column aware `FullMergeV3` API by implementing a new `DBStressWideMergeOperator`. This operator is similar to `PutOperator` / `PutOperatorV2` in the sense that its result is based on the last merge operand; however, the merge result can be either a plain value or a wide-column entity, depending on the value base encoded into the operand and the value of the `use_put_entity_one_in` stress test parameter. Following the same rule for merge results that we do for writes ensures that the queries issued by the validation logic receive the expected results. The new operator is used instead of `PutOperatorV2` whenever `use_put_entity_one_in` is positive. Note that the patch also makes it possible to set `use_put_entity_one_in` and `use_merge` (but not `use_full_merge_v1`) at the same time, giving `use_put_entity_one_in` precedence, so the stress test will use `PutEntity` for writes passing the `use_put_entity_one_in` check described above and `Merge` for any other writes. Reviewed By: jaykorean Differential Revision: D49760024 fbshipit-source-id: 3893602c3e7935381b484f4f5026f1983e3a04a9 --- TARGETS | 1 + db_stress_tool/CMakeLists.txt | 1 + db_stress_tool/batched_ops_stress.cc | 8 +-- db_stress_tool/cf_consistency_stress.cc | 11 ++-- db_stress_tool/db_stress_test_base.cc | 19 ++++--- db_stress_tool/db_stress_tool.cc | 8 +-- .../db_stress_wide_merge_operator.cc | 51 +++++++++++++++++++ .../db_stress_wide_merge_operator.h | 27 ++++++++++ db_stress_tool/no_batched_ops_stress.cc | 17 +++---- src.mk | 1 + tools/db_crashtest.py | 3 +- 11 files changed, 113 insertions(+), 34 deletions(-) create mode 100644 db_stress_tool/db_stress_wide_merge_operator.cc create mode 100644 db_stress_tool/db_stress_wide_merge_operator.h diff --git a/TARGETS b/TARGETS index 62496a225..f56cceb40 100644 --- a/TARGETS +++ b/TARGETS @@ -393,6 +393,7 @@ rocks_cpp_library_wrapper(name="rocksdb_stress_lib", srcs=[ "db_stress_tool/db_stress_stat.cc", "db_stress_tool/db_stress_test_base.cc", "db_stress_tool/db_stress_tool.cc", + "db_stress_tool/db_stress_wide_merge_operator.cc", "db_stress_tool/expected_state.cc", "db_stress_tool/expected_value.cc", "db_stress_tool/multi_ops_txns_stress.cc", diff --git a/db_stress_tool/CMakeLists.txt b/db_stress_tool/CMakeLists.txt index 51d6ea0d6..60c02e173 100644 --- a/db_stress_tool/CMakeLists.txt +++ b/db_stress_tool/CMakeLists.txt @@ -9,6 +9,7 @@ add_executable(db_stress${ARTIFACT_SUFFIX} db_stress_shared_state.cc db_stress_stat.cc db_stress_test_base.cc + db_stress_wide_merge_operator.cc db_stress_tool.cc expected_state.cc expected_value.cc diff --git a/db_stress_tool/batched_ops_stress.cc b/db_stress_tool/batched_ops_stress.cc index 0872f2842..7fb89b60b 100644 --- a/db_stress_tool/batched_ops_stress.cc +++ b/db_stress_tool/batched_ops_stress.cc @@ -52,11 +52,11 @@ class BatchedOpsStressTest : public StressTest { const std::string k = num + key_body; const std::string v = value_body + num; - if (FLAGS_use_merge) { - batch.Merge(cfh, k, v); - } else if (FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0) { + if (FLAGS_use_put_entity_one_in > 0 && + (value_base % FLAGS_use_put_entity_one_in) == 0) { batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v)); + } else if (FLAGS_use_merge) { + batch.Merge(cfh, k, v); } else { batch.Put(cfh, k, v); } diff --git a/db_stress_tool/cf_consistency_stress.cc b/db_stress_tool/cf_consistency_stress.cc index f3d9b71d9..a7b0895f3 100644 --- a/db_stress_tool/cf_consistency_stress.cc +++ b/db_stress_tool/cf_consistency_stress.cc @@ -36,18 +36,15 @@ class CfConsistencyStressTest : public StressTest { WriteBatch batch; - const bool use_put_entity = !FLAGS_use_merge && - FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0; - for (auto cf : rand_column_families) { ColumnFamilyHandle* const cfh = column_families_[cf]; assert(cfh); - if (FLAGS_use_merge) { - batch.Merge(cfh, k, v); - } else if (use_put_entity) { + if (FLAGS_use_put_entity_one_in > 0 && + (value_base % FLAGS_use_put_entity_one_in) == 0) { batch.PutEntity(cfh, k, GenerateWideColumns(value_base, v)); + } else if (FLAGS_use_merge) { + batch.Merge(cfh, k, v); } else { batch.Put(cfh, k, v); } diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 3f28b7a8e..80c1858e7 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -17,6 +17,7 @@ #include "db_stress_tool/db_stress_compaction_filter.h" #include "db_stress_tool/db_stress_driver.h" #include "db_stress_tool/db_stress_table_properties_collector.h" +#include "db_stress_tool/db_stress_wide_merge_operator.h" #include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "rocksdb/secondary_cache.h" @@ -511,7 +512,11 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, ts = GetNowNanos(); } - if (FLAGS_use_merge) { + if (FLAGS_use_put_entity_one_in > 0 && + (value_base % FLAGS_use_put_entity_one_in) == 0) { + s = db_->PutEntity(write_opts, cfh, key, + GenerateWideColumns(value_base, v)); + } else if (FLAGS_use_merge) { if (!FLAGS_use_txn) { if (FLAGS_user_timestamp_size > 0) { s = db_->Merge(write_opts, cfh, key, ts, v); @@ -523,9 +528,6 @@ void StressTest::PreloadDbAndReopenAsReadOnly(int64_t number_of_keys, write_opts, /*thread=*/nullptr, [&](Transaction& txn) { return txn.Merge(cfh, key, v); }); } - } else if (FLAGS_use_put_entity_one_in > 0) { - s = db_->PutEntity(write_opts, cfh, key, - GenerateWideColumns(value_base, v)); } else { if (!FLAGS_use_txn) { if (FLAGS_user_timestamp_size > 0) { @@ -2755,8 +2757,7 @@ void StressTest::Open(SharedState* shared, bool reopen) { if (s.ok()) { db_ = blob_db; } - } else - { + } else { if (db_preload_finished_.load() && FLAGS_read_only) { s = DB::OpenForReadOnly(DBOptions(options_), FLAGS_db, cf_descriptors, &column_families_, &db_); @@ -3337,7 +3338,11 @@ void InitializeOptionsFromFlags( if (FLAGS_use_full_merge_v1) { options.merge_operator = MergeOperators::CreateDeprecatedPutOperator(); } else { - options.merge_operator = MergeOperators::CreatePutOperator(); + if (FLAGS_use_put_entity_one_in > 0) { + options.merge_operator = std::make_shared(); + } else { + options.merge_operator = MergeOperators::CreatePutOperator(); + } } if (FLAGS_enable_compaction_filter) { diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 787efe47d..9c57dafd7 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -308,11 +308,11 @@ int db_stress_tool(int argc, char** argv) { } if (FLAGS_use_put_entity_one_in > 0 && - (FLAGS_use_merge || FLAGS_use_full_merge_v1 || FLAGS_use_txn || - FLAGS_test_multi_ops_txns || FLAGS_user_timestamp_size > 0)) { + (FLAGS_use_full_merge_v1 || FLAGS_use_txn || FLAGS_test_multi_ops_txns || + FLAGS_user_timestamp_size > 0)) { fprintf(stderr, - "PutEntity is currently incompatible with Merge," - " transactions, and user-defined timestamps\n"); + "Wide columns are incompatible with V1 Merge, transactions, and " + "user-defined timestamps\n"); exit(1); } diff --git a/db_stress_tool/db_stress_wide_merge_operator.cc b/db_stress_tool/db_stress_wide_merge_operator.cc new file mode 100644 index 000000000..1fcfc3042 --- /dev/null +++ b/db_stress_tool/db_stress_wide_merge_operator.cc @@ -0,0 +1,51 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifdef GFLAGS + +#include "db_stress_tool/db_stress_wide_merge_operator.h" + +#include "db_stress_tool/db_stress_common.h" + +namespace ROCKSDB_NAMESPACE { + +bool DBStressWideMergeOperator::FullMergeV3( + const MergeOperationInputV3& merge_in, + MergeOperationOutputV3* merge_out) const { + assert(!merge_in.operand_list.empty()); + assert(merge_out); + + const Slice& latest = merge_in.operand_list.back(); + + if (latest.size() < sizeof(uint32_t)) { + return false; + } + + const uint32_t value_base = GetValueBase(latest); + + if (FLAGS_use_put_entity_one_in == 0 || + (value_base % FLAGS_use_put_entity_one_in) != 0) { + merge_out->new_value = latest; + return true; + } + + const auto columns = GenerateWideColumns(value_base, latest); + + merge_out->new_value = MergeOperationOutputV3::NewColumns(); + auto& new_columns = + std::get(merge_out->new_value); + new_columns.reserve(columns.size()); + + for (const auto& column : columns) { + new_columns.emplace_back(column.name().ToString(), + column.value().ToString()); + } + + return true; +} + +} // namespace ROCKSDB_NAMESPACE + +#endif // GFLAGS diff --git a/db_stress_tool/db_stress_wide_merge_operator.h b/db_stress_tool/db_stress_wide_merge_operator.h new file mode 100644 index 000000000..cba4f6b6b --- /dev/null +++ b/db_stress_tool/db_stress_wide_merge_operator.h @@ -0,0 +1,27 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/merge_operator.h" + +namespace ROCKSDB_NAMESPACE { + +// A test merge operator that implements the wide-column aware FullMergeV3 +// interface. Similarly to the simple "put" type merge operators, the merge +// result is based on the last merge operand; however, the merge result can +// potentially be a wide-column entity, depending on the value base encoded into +// the merge operand and the value of the "use_put_entity_one_in" stress test +// option. Following the same rule as for writes ensures that the queries +// issued by the validation logic receive the expected results. +class DBStressWideMergeOperator : public MergeOperator { + public: + bool FullMergeV3(const MergeOperationInputV3& merge_in, + MergeOperationOutputV3* merge_out) const override; + + const char* Name() const override { return "DBStressWideMergeOperator"; } +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 80e5942e8..eeb44560d 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1279,7 +1279,11 @@ class NonBatchedOpsStressTest : public StressTest { Status s; - if (FLAGS_use_merge) { + if (FLAGS_use_put_entity_one_in > 0 && + (value_base % FLAGS_use_put_entity_one_in) == 0) { + s = db_->PutEntity(write_opts, cfh, k, + GenerateWideColumns(value_base, v)); + } else if (FLAGS_use_merge) { if (!FLAGS_use_txn) { if (FLAGS_user_timestamp_size == 0) { s = db_->Merge(write_opts, cfh, k, v); @@ -1291,10 +1295,6 @@ class NonBatchedOpsStressTest : public StressTest { return txn.Merge(cfh, k, v); }); } - } else if (FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0) { - s = db_->PutEntity(write_opts, cfh, k, - GenerateWideColumns(value_base, v)); } else { if (!FLAGS_use_txn) { if (FLAGS_user_timestamp_size == 0) { @@ -1542,11 +1542,8 @@ class NonBatchedOpsStressTest : public StressTest { const Slice k(key_str); const Slice v(value, value_len); - const bool use_put_entity = - !FLAGS_use_merge && FLAGS_use_put_entity_one_in > 0 && - (value_base % FLAGS_use_put_entity_one_in) == 0; - - if (use_put_entity) { + if (FLAGS_use_put_entity_one_in > 0 && + (value_base % FLAGS_use_put_entity_one_in) == 0) { WideColumns columns = GenerateWideColumns(value_base, v); s = sst_file_writer.PutEntity(k, columns); } else { diff --git a/src.mk b/src.mk index edc8bdef5..f6927256a 100644 --- a/src.mk +++ b/src.mk @@ -380,6 +380,7 @@ STRESS_LIB_SOURCES = \ db_stress_tool/db_stress_stat.cc \ db_stress_tool/db_stress_test_base.cc \ db_stress_tool/db_stress_tool.cc \ + db_stress_tool/db_stress_wide_merge_operator.cc \ db_stress_tool/expected_state.cc \ db_stress_tool/expected_value.cc \ db_stress_tool/no_batched_ops_stress.cc \ diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index dbfe1c68b..8b5d29d08 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -673,9 +673,8 @@ def finalize_and_sanitize(src_params): if dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0: dest_params["sync_fault_injection"] = 0 dest_params["manual_wal_flush_one_in"] = 0 - # PutEntity is currently incompatible with Merge + # Wide column stress tests require FullMergeV3 if dest_params["use_put_entity_one_in"] != 0: - dest_params["use_merge"] = 0 dest_params["use_full_merge_v1"] = 0 if dest_params["file_checksum_impl"] == "none": dest_params["verify_file_checksums_one_in"] = 0 From 2cfe53ec05639ff04d8922399b5e5f0fb8f76edb Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 29 Sep 2023 09:58:40 -0700 Subject: [PATCH 165/386] Add helpful message for ldb when unknown option found (#11907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Users may run into an issue when running ldb on db that's in a different version and they have different set of options: `Failed: Invalid argument: Could not find option: ` They can work around this by setting `--ignore_unknown_options`, but the error message is not clear for users to find why the option is missing. It's also hard for the users to find the `ignore_unknown_options` option especially if they are not familiar with the codebase or `ldb` tool. This PR changes the error message to help users to find out what's wrong and possible workaround for the issue Pull Request resolved: https://github.com/facebook/rocksdb/pull/11907 Test Plan: Testing by reproducing the issue locally ``` ❯./ldb --db=/data/users/jewoongh/db_crash_whitebox_T164195541/ get a Failed: Invalid argument: Could not find option: : unknown_option_test This tool was built with version 8.8.0. If your db is in a different version, please try again with option --ignore_unknown_options. ``` Reviewed By: jowlyzhang Differential Revision: D49762291 Pulled By: jaykorean fbshipit-source-id: 895570150fde886d5ec524908c4b2664c9230ac9 --- tools/ldb_cmd.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 490773c14..ff2483dce 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -932,7 +932,15 @@ void LDBCommand::PrepareOptions() { &column_families_); if (!s.ok() && !s.IsNotFound()) { // Option file exists but load option file error. - std::string msg = s.ToString(); + std::string current_version = std::to_string(ROCKSDB_MAJOR) + "." + + std::to_string(ROCKSDB_MINOR) + "." + + std::to_string(ROCKSDB_PATCH); + std::string msg = + s.ToString() + "\nThis tool was built with version " + + current_version + + ". If your db is in a different version, please try again " + "with option --" + + LDBCommand::ARG_IGNORE_UNKNOWN_OPTIONS + "."; exec_state_ = LDBCommandExecuteResult::Failed(msg); db_ = nullptr; return; From 02443dd93f32a67cb2b0c3545ebd2c5a1111b4fa Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 29 Sep 2023 11:21:59 -0700 Subject: [PATCH 166/386] Refactor, clean up, fixes, and more testing for SeqnoToTimeMapping (#11905) Summary: This change is before a planned DBImpl change to ensure all sufficiently recent sequence numbers since Open are covered by SeqnoToTimeMapping (bug fix with existing test work-arounds). **Intended follow-up** However, I found enough issues with SeqnoToTimeMapping to warrant this PR first, including very small fixes in DB implementation related to API contract of SeqnoToTimeMapping. Functional fixes / changes: * This fixes some mishandling of boundary cases. For example, if the user decides to stop writing to DB, the last written sequence number would perpetually have its write time updated to "now" and would always be ineligible for migration to cold tier. Part of the problem is that the SeqnoToTimeMapping would return a seqno known to have been written before (immediately or otherwise) the requested time, but compaction_job.cc would include that seqno in the preserve/exclude set. That is fixed (in part) by adding one in compaction_job.cc * That problem was worse because a whole range of seqnos could be updated perpetually with new times in SeqnoToTimeMapping::Append (if no writes to DB). That logic was apparently optimized for GetOldestApproximateTime (now GetProximalTimeBeforeSeqno), which is not used in production, to the detriment of GetOldestSequenceNum (now GetProximalSeqnoBeforeTime), which is used in production. (Perhaps plans changed during development?) This is fixed in Append to optimize for accuracy of GetProximalSeqnoBeforeTime. (Unit tests added and updated.) * Related: SeqnoToTimeMapping did not have a clear contract about the relationships between seqnos and times, just the idea of a rough correspondence. Now the class description makes it clear that the write time of each recorded seqno comes before or at the associated time, to support getting best results for GetProximalSeqnoBeforeTime. And this makes it easier to make clear the contract of each API function. * Update `DBImpl::RecordSeqnoToTimeMapping()` to follow this ordering in gathering samples. Some part of these changes has required an expanded test work-around for the problem (see intended follow-up above) that the DB does not immediately ensure recent seqnos are covered by its mapping. These work-arounds will be removed with that planned work. An apparent compaction bug is revealed in PrecludeLastLevelTest::RangeDelsCauseFileEndpointsToOverlap, so that test is disabled. Filed GitHub issue #11909 Cosmetic / code safety things (not exhaustive): * Fix some confusing names. * `seqno_time_mapping` was used inconsistently in places. Now just `seqno_to_time_mapping` to correspond to class name. * Rename confusing `GetOldestSequenceNum` -> `GetProximalSeqnoBeforeTime` and `GetOldestApproximateTime` -> `GetProximalTimeBeforeSeqno`. Part of the motivation is that our times and seqnos here have the same underlying type, so we want to be clear about which is expected where to avoid mixing. * Rename `kUnknownSeqnoTime` to `kUnknownTimeBeforeAll` because the value is a bad choice for unknown if we ever add ProximalAfterBlah functions. * Arithmetic on SeqnoTimePair doesn't make sense except for delta encoding, so use better names / APIs with that in mind. * (OMG) Don't allow direct comparison between SeqnoTimePair and SequenceNumber. (There is no checking that it isn't compared against time by accident.) * A field name essentially matching the containing class name is a confusing pattern (`seqno_time_mapping_`). * Wrap calls to confusing (but useful) upper_bound and lower_bound functions to have clearer names and more code reuse. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11905 Test Plan: GetOldestSequenceNum (now GetProximalSeqnoBeforeTime) and TruncateOldEntries were lacking unit tests, despite both being used in production (experimental feature). Added those and expanded others. Reviewed By: jowlyzhang Differential Revision: D49755592 Pulled By: pdillinger fbshipit-source-id: f72a3baac74d24b963c77e538bba89a7fc8dce51 --- db/builder.cc | 6 +- db/compaction/compaction_job.cc | 26 ++- db/compaction/compaction_job.h | 2 +- db/compaction/compaction_outputs.cc | 14 +- db/compaction/compaction_outputs.h | 2 +- db/compaction/tiered_compaction_test.cc | 44 ++++- db/db_impl/db_impl.cc | 10 +- db/db_impl/db_impl.h | 4 +- db/db_impl/db_impl_compaction_flush.cc | 4 +- db/db_impl/db_impl_debug.cc | 2 +- db/db_impl/db_impl_open.cc | 4 +- db/flush_job.cc | 13 +- db/flush_job.h | 6 +- db/repair.cc | 7 +- db/seqno_time_test.cc | 235 ++++++++++++++++++++---- db/seqno_to_time_mapping.cc | 151 ++++++++------- db/seqno_to_time_mapping.h | 124 ++++++++----- 17 files changed, 451 insertions(+), 203 deletions(-) diff --git a/db/builder.cc b/db/builder.cc index a3a6bc47e..d3040ee9e 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -294,12 +294,12 @@ Status BuildTable( if (!s.ok() || empty) { builder->Abandon(); } else { - std::string seqno_time_mapping_str; + std::string seqno_to_time_mapping_str; seqno_to_time_mapping.Encode( - seqno_time_mapping_str, meta->fd.smallest_seqno, + seqno_to_time_mapping_str, meta->fd.smallest_seqno, meta->fd.largest_seqno, meta->file_creation_time); builder->SetSeqnoTimeTableProperties( - seqno_time_mapping_str, + seqno_to_time_mapping_str, ioptions.compaction_style == CompactionStyle::kCompactionStyleFIFO ? meta->file_creation_time : meta->oldest_ancester_time); diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index a5de19a39..bf8ce25a6 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -288,23 +288,23 @@ void CompactionJob::Prepare() { if (preserve_time_duration > 0) { const ReadOptions read_options(Env::IOActivity::kCompaction); - // setup seqno_time_mapping_ - seqno_time_mapping_.SetMaxTimeDuration(preserve_time_duration); + // setup seqno_to_time_mapping_ + seqno_to_time_mapping_.SetMaxTimeDuration(preserve_time_duration); for (const auto& each_level : *c->inputs()) { for (const auto& fmd : each_level.files) { std::shared_ptr tp; Status s = cfd->current()->GetTableProperties(read_options, &tp, fmd, nullptr); if (s.ok()) { - seqno_time_mapping_.Add(tp->seqno_to_time_mapping) + seqno_to_time_mapping_.Add(tp->seqno_to_time_mapping) .PermitUncheckedError(); - seqno_time_mapping_.Add(fmd->fd.smallest_seqno, - fmd->oldest_ancester_time); + seqno_to_time_mapping_.Add(fmd->fd.smallest_seqno, + fmd->oldest_ancester_time); } } } - auto status = seqno_time_mapping_.Sort(); + auto status = seqno_to_time_mapping_.Sort(); if (!status.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Invalid sequence number to time mapping: Status: %s", @@ -320,13 +320,17 @@ void CompactionJob::Prepare() { preserve_time_min_seqno_ = 0; preclude_last_level_min_seqno_ = 0; } else { - seqno_time_mapping_.TruncateOldEntries(_current_time); + seqno_to_time_mapping_.TruncateOldEntries(_current_time); uint64_t preserve_time = static_cast(_current_time) > preserve_time_duration ? _current_time - preserve_time_duration : 0; + // GetProximalSeqnoBeforeTime tells us the last seqno known to have been + // written at or before the given time. + 1 to get the minimum we should + // preserve without excluding anything that might have been written on or + // after the given time. preserve_time_min_seqno_ = - seqno_time_mapping_.GetOldestSequenceNum(preserve_time); + seqno_to_time_mapping_.GetProximalSeqnoBeforeTime(preserve_time) + 1; if (c->immutable_options()->preclude_last_level_data_seconds > 0) { uint64_t preclude_last_level_time = static_cast(_current_time) > @@ -335,7 +339,9 @@ void CompactionJob::Prepare() { c->immutable_options()->preclude_last_level_data_seconds : 0; preclude_last_level_min_seqno_ = - seqno_time_mapping_.GetOldestSequenceNum(preclude_last_level_time); + seqno_to_time_mapping_.GetProximalSeqnoBeforeTime( + preclude_last_level_time) + + 1; } } } @@ -1570,7 +1576,7 @@ Status CompactionJob::FinishCompactionOutputFile( const uint64_t current_entries = outputs.NumEntries(); - s = outputs.Finish(s, seqno_time_mapping_); + s = outputs.Finish(s, seqno_to_time_mapping_); if (s.ok()) { // With accurate smallest and largest key, we can get a slightly more diff --git a/db/compaction/compaction_job.h b/db/compaction/compaction_job.h index 72d256735..e812cfc72 100644 --- a/db/compaction/compaction_job.h +++ b/db/compaction/compaction_job.h @@ -350,7 +350,7 @@ class CompactionJob { // Stores the sequence number to time mapping gathered from all input files // it also collects the smallest_seqno -> oldest_ancester_time from the SST. - SeqnoToTimeMapping seqno_time_mapping_; + SeqnoToTimeMapping seqno_to_time_mapping_; // Minimal sequence number for preserving the time information. The time info // older than this sequence number won't be preserved after the compaction and diff --git a/db/compaction/compaction_outputs.cc b/db/compaction/compaction_outputs.cc index 3e21484c4..eb76cd849 100644 --- a/db/compaction/compaction_outputs.cc +++ b/db/compaction/compaction_outputs.cc @@ -18,16 +18,18 @@ void CompactionOutputs::NewBuilder(const TableBuilderOptions& tboptions) { builder_.reset(NewTableBuilder(tboptions, file_writer_.get())); } -Status CompactionOutputs::Finish(const Status& intput_status, - const SeqnoToTimeMapping& seqno_time_mapping) { +Status CompactionOutputs::Finish( + const Status& intput_status, + const SeqnoToTimeMapping& seqno_to_time_mapping) { FileMetaData* meta = GetMetaData(); assert(meta != nullptr); Status s = intput_status; if (s.ok()) { - std::string seqno_time_mapping_str; - seqno_time_mapping.Encode(seqno_time_mapping_str, meta->fd.smallest_seqno, - meta->fd.largest_seqno, meta->file_creation_time); - builder_->SetSeqnoTimeTableProperties(seqno_time_mapping_str, + std::string seqno_to_time_mapping_str; + seqno_to_time_mapping.Encode( + seqno_to_time_mapping_str, meta->fd.smallest_seqno, + meta->fd.largest_seqno, meta->file_creation_time); + builder_->SetSeqnoTimeTableProperties(seqno_to_time_mapping_str, meta->oldest_ancester_time); s = builder_->Finish(); diff --git a/db/compaction/compaction_outputs.h b/db/compaction/compaction_outputs.h index 6c3e3b6b3..18246cf2f 100644 --- a/db/compaction/compaction_outputs.h +++ b/db/compaction/compaction_outputs.h @@ -107,7 +107,7 @@ class CompactionOutputs { // Finish the current output file Status Finish(const Status& intput_status, - const SeqnoToTimeMapping& seqno_time_mapping); + const SeqnoToTimeMapping& seqno_to_time_mapping); // Update output table properties from table builder void UpdateTableProperties() { diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index d8aa229df..51f931603 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -1249,8 +1249,11 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimeManualCompaction) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); @@ -1311,8 +1314,11 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimeAutoCompaction) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); @@ -1387,8 +1393,11 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimePartial) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); @@ -1514,8 +1523,11 @@ TEST_F(PrecludeLastLevelTest, LastLevelOnlyCompactionPartial) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); @@ -1592,8 +1604,11 @@ TEST_P(PrecludeLastLevelTestWithParms, LastLevelOnlyCompactionNoPreclude) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); @@ -1906,8 +1921,11 @@ TEST_F(PrecludeLastLevelTest, PartialPenultimateLevelCompaction) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); @@ -1996,7 +2014,13 @@ TEST_F(PrecludeLastLevelTest, PartialPenultimateLevelCompaction) { Close(); } -TEST_F(PrecludeLastLevelTest, RangeDelsCauseFileEndpointsToOverlap) { +// FIXME broken test: +// dbfull()->TEST_WaitForCompact() +// Corruption: force_consistency_checks(DEBUG): VersionBuilder: L5 has +// overlapping ranges: +// file #14 largest key: '6B6579303030303134' seq:32, type:1 vs. +// file #19 smallest key: '6B6579303030303130' seq:10, type:1 +TEST_F(PrecludeLastLevelTest, DISABLED_RangeDelsCauseFileEndpointsToOverlap) { const int kNumLevels = 7; const int kSecondsPerKey = 10; const int kNumFiles = 3; @@ -2017,8 +2041,11 @@ TEST_F(PrecludeLastLevelTest, RangeDelsCauseFileEndpointsToOverlap) { options.target_file_size_base = kFileBytes; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun([&] { mock_clock_->MockSleepForSeconds(static_cast(kSecondsPerKey)); }); @@ -2139,7 +2166,6 @@ TEST_F(PrecludeLastLevelTest, RangeDelsCauseFileEndpointsToOverlap) { Close(); } - } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 1af0a1fec..204d56e7d 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -830,9 +830,9 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() { } } if (min_time_duration == std::numeric_limits::max()) { - seqno_time_mapping_.Resize(0, 0); + seqno_to_time_mapping_.Resize(0, 0); } else { - seqno_time_mapping_.Resize(min_time_duration, max_time_duration); + seqno_to_time_mapping_.Resize(min_time_duration, max_time_duration); } } @@ -6371,16 +6371,18 @@ Status DBImpl::GetCreationTimeOfOldestFile(uint64_t* creation_time) { } void DBImpl::RecordSeqnoToTimeMapping() { + // TECHNICALITY: Sample last sequence number *before* time, as prescribed + // for SeqnoToTimeMapping + SequenceNumber seqno = GetLatestSequenceNumber(); // Get time first then sequence number, so the actual time of seqno is <= // unix_time recorded int64_t unix_time = 0; immutable_db_options_.clock->GetCurrentTime(&unix_time) .PermitUncheckedError(); // Ignore error - SequenceNumber seqno = GetLatestSequenceNumber(); bool appended = false; { InstrumentedMutexLock l(&mutex_); - appended = seqno_time_mapping_.Append(seqno, unix_time); + appended = seqno_to_time_mapping_.Append(seqno, unix_time); } if (!appended) { ROCKS_LOG_WARN(immutable_db_options_.info_log, diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 98565bc70..3f114fa36 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2748,9 +2748,9 @@ class DBImpl : public DB { // Pointer to WriteBufferManager stalling interface. std::unique_ptr wbm_stall_; - // seqno_time_mapping_ stores the sequence number to time mapping, it's not + // seqno_to_time_mapping_ stores the sequence number to time mapping, it's not // thread safe, both read and write need db mutex hold. - SeqnoToTimeMapping seqno_time_mapping_; + SeqnoToTimeMapping seqno_to_time_mapping_; // Stop write token that is acquired when first LockWAL() is called. // Destroyed when last UnlockWAL() is called. Controlled by DB mutex. diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index b610b8d33..49da5acd3 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -250,7 +250,7 @@ Status DBImpl::FlushMemTableToOutputFile( GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_, &event_logger_, mutable_cf_options.report_bg_io_stats, true /* sync_output_directory */, true /* write_manifest */, thread_pri, - io_tracer_, seqno_time_mapping_, db_id_, db_session_id_, + io_tracer_, seqno_to_time_mapping_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow(), &blob_callback_); FileMetaData file_meta; @@ -522,7 +522,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_, &event_logger_, mutable_cf_options.report_bg_io_stats, false /* sync_output_directory */, false /* write_manifest */, - thread_pri, io_tracer_, seqno_time_mapping_, db_id_, db_session_id_, + thread_pri, io_tracer_, seqno_to_time_mapping_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow(), &blob_callback_)); } diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index be63637a2..670bc7887 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -306,7 +306,7 @@ const PeriodicTaskScheduler& DBImpl::TEST_GetPeriodicTaskScheduler() const { SeqnoToTimeMapping DBImpl::TEST_GetSeqnoToTimeMapping() const { InstrumentedMutexLock l(&mutex_); - return seqno_time_mapping_; + return seqno_to_time_mapping_; } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index de164cc20..172a55879 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1649,7 +1649,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, TableFileCreationReason::kRecovery, 0 /* oldest_key_time */, 0 /* file_creation_time */, db_id_, db_session_id_, 0 /* target_file_size */, meta.fd.GetNumber()); - SeqnoToTimeMapping empty_seqno_time_mapping; + SeqnoToTimeMapping empty_seqno_to_time_mapping; Version* version = cfd->current(); version->Ref(); const ReadOptions read_option(Env::IOActivity::kDBOpen); @@ -1661,7 +1661,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber, snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s, io_tracer_, BlobFileCreationReason::kRecovery, - empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH, + empty_seqno_to_time_mapping, &event_logger_, job_id, Env::IO_HIGH, nullptr /* table_properties */, write_hint, nullptr /*full_history_ts_low*/, &blob_callback_, version, &num_input_entries); diff --git a/db/flush_job.cc b/db/flush_job.cc index 69303081f..a3e168823 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -100,7 +100,7 @@ FlushJob::FlushJob( Statistics* stats, EventLogger* event_logger, bool measure_io_stats, const bool sync_output_directory, const bool write_manifest, Env::Priority thread_pri, const std::shared_ptr& io_tracer, - const SeqnoToTimeMapping& seqno_time_mapping, const std::string& db_id, + const SeqnoToTimeMapping& seqno_to_time_mapping, const std::string& db_id, const std::string& db_session_id, std::string full_history_ts_low, BlobFileCompletionCallback* blob_callback) : dbname_(dbname), @@ -136,7 +136,7 @@ FlushJob::FlushJob( clock_(db_options_.clock), full_history_ts_low_(std::move(full_history_ts_low)), blob_callback_(blob_callback), - db_impl_seqno_time_mapping_(seqno_time_mapping) { + db_impl_seqno_to_time_mapping_(seqno_to_time_mapping) { // Update the thread status to indicate flush. ReportStartedFlush(); TEST_SYNC_POINT("FlushJob::FlushJob()"); @@ -851,10 +851,11 @@ Status FlushJob::WriteLevel0Table() { Status s; SequenceNumber smallest_seqno = mems_.front()->GetEarliestSequenceNumber(); - if (!db_impl_seqno_time_mapping_.Empty()) { - // make a local copy, as the seqno_time_mapping from db_impl is not thread - // safe, which will be used while not holding the db_mutex. - seqno_to_time_mapping_ = db_impl_seqno_time_mapping_.Copy(smallest_seqno); + if (!db_impl_seqno_to_time_mapping_.Empty()) { + // make a local copy, as the seqno_to_time_mapping from db_impl is not + // thread safe, which will be used while not holding the db_mutex. + seqno_to_time_mapping_ = + db_impl_seqno_to_time_mapping_.Copy(smallest_seqno); } std::vector blob_file_additions; diff --git a/db/flush_job.h b/db/flush_job.h index db5dbd719..aef33ef42 100644 --- a/db/flush_job.h +++ b/db/flush_job.h @@ -210,9 +210,9 @@ class FlushJob { const std::string full_history_ts_low_; BlobFileCompletionCallback* blob_callback_; - // reference to the seqno_time_mapping_ in db_impl.h, not safe to read without - // db mutex - const SeqnoToTimeMapping& db_impl_seqno_time_mapping_; + // reference to the seqno_to_time_mapping_ in db_impl.h, not safe to read + // without db mutex + const SeqnoToTimeMapping& db_impl_seqno_to_time_mapping_; SeqnoToTimeMapping seqno_to_time_mapping_; // Keeps track of the newest user-defined timestamp for this flush job if diff --git a/db/repair.cc b/db/repair.cc index 1af738fca..e303eae64 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -471,7 +471,7 @@ class Repairer { 0 /* file_creation_time */, "DB Repairer" /* db_id */, db_session_id_, 0 /*target_file_size*/, meta.fd.GetNumber()); - SeqnoToTimeMapping empty_seqno_time_mapping; + SeqnoToTimeMapping empty_seqno_to_time_mapping; status = BuildTable( dbname_, /* versions */ nullptr, immutable_db_options_, tboptions, file_options_, read_options, table_cache_.get(), iter.get(), @@ -479,8 +479,9 @@ class Repairer { {}, kMaxSequenceNumber, kMaxSequenceNumber, snapshot_checker, false /* paranoid_file_checks*/, nullptr /* internal_stats */, &io_s, nullptr /*IOTracer*/, BlobFileCreationReason::kRecovery, - empty_seqno_time_mapping, nullptr /* event_logger */, 0 /* job_id */, - Env::IO_HIGH, nullptr /* table_properties */, write_hint); + empty_seqno_to_time_mapping, nullptr /* event_logger */, + 0 /* job_id */, Env::IO_HIGH, nullptr /* table_properties */, + write_hint); ROCKS_LOG_INFO(db_options_.info_log, "Log #%" PRIu64 ": %d ops saved to Table #%" PRIu64 " %s", log, counter, meta.fd.GetNumber(), diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc index b18b25512..e4c575167 100644 --- a/db/seqno_time_test.cc +++ b/db/seqno_time_test.cc @@ -12,7 +12,6 @@ #include "rocksdb/utilities/debug.h" #include "test_util/mock_time_env.h" - namespace ROCKSDB_NAMESPACE { class SeqnoTimeTest : public DBTestBase { @@ -77,8 +76,11 @@ TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) { options.num_levels = kNumLevels; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); @@ -189,8 +191,11 @@ TEST_F(SeqnoTimeTest, TemperatureBasicLevel) { options.disable_auto_compactions = true; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownSeqnoTime + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); @@ -320,7 +325,7 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { DestroyAndReopen(options); std::set checked_file_nums; - SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber(); + SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber() + 1; // Write a key every 10 seconds for (int i = 0; i < 200; i++) { ASSERT_OK(Put(Key(i), "value")); @@ -341,15 +346,15 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { // passes 2k time. ASSERT_GE(seqs.size(), 19); ASSERT_LE(seqs.size(), 21); - SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber(); + SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber() + 1; for (auto i = start_seq; i < start_seq + 10; i++) { - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i + 1) * 10); + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i + 1) * 10); } start_seq += 10; for (auto i = start_seq; i < seq_end; i++) { // The result is within the range - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), (i - 10) * 10); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i + 10) * 10); + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - 10) * 10); + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i + 10) * 10); } checked_file_nums.insert(it->second->orig_file_number); start_seq = seq_end; @@ -360,7 +365,7 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(1)); }); } - seq_end = dbfull()->GetLatestSequenceNumber(); + seq_end = dbfull()->GetLatestSequenceNumber() + 1; ASSERT_OK(Flush()); tables_props.clear(); ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); @@ -384,8 +389,8 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { for (auto i = start_seq; i < seq_end; i++) { // The result is not very accurate, as there is more data write within small // range of time - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 1000); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 3000); + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) + 1000); + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) + 3000); } checked_file_nums.insert(it->second->orig_file_number); start_seq = seq_end; @@ -396,7 +401,7 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(200)); }); } - seq_end = dbfull()->GetLatestSequenceNumber(); + seq_end = dbfull()->GetLatestSequenceNumber() + 1; ASSERT_OK(Flush()); tables_props.clear(); ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); @@ -419,14 +424,14 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { ASSERT_LE(seqs.size(), 101); for (auto i = start_seq; i < seq_end - 99; i++) { // likely the first 100 entries reports 0 - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), (i - start_seq) + 3000); + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) + 3000); } start_seq += 101; for (auto i = start_seq; i < seq_end; i++) { - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) * 200 + 22200); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) * 200 + 22600); } checked_file_nums.insert(it->second->orig_file_number); @@ -438,7 +443,7 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); } - seq_end = dbfull()->GetLatestSequenceNumber(); + seq_end = dbfull()->GetLatestSequenceNumber() + 1; ASSERT_OK(Flush()); tables_props.clear(); ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props)); @@ -486,15 +491,15 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { ASSERT_LE(seqs.size(), 101); for (auto i = start_seq; i < seq_end - 99; i++) { // likely the first 100 entries reports 0 - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) * 100 + 50000); } start_seq += 101; for (auto i = start_seq; i < seq_end; i++) { - ASSERT_GE(tp_mapping.GetOldestApproximateTime(i), + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) * 100 + 52200); - ASSERT_LE(tp_mapping.GetOldestApproximateTime(i), + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) * 100 + 52400); } ASSERT_OK(db_->Close()); @@ -707,6 +712,14 @@ TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) { DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); + // pass some time first, otherwise the first a few keys write time are going + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll + dbfull()->TEST_WaitForPeriodicTaskRun( + [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); + std::atomic_uint64_t num_seqno_zeroing{0}; SyncPoint::GetInstance()->DisableProcessing(); @@ -843,8 +856,9 @@ TEST_F(SeqnoTimeTest, MappingAppend) { ASSERT_FALSE(test.Append(8, 12)); ASSERT_EQ(size, test.Size()); - // Append with the same seqno, newer time will be accepted - ASSERT_TRUE(test.Append(10, 12)); + // Append with the same seqno, newer time is rejected because that makes + // GetProximalSeqnoBeforeTime queries worse (see later test) + ASSERT_FALSE(test.Append(10, 12)); ASSERT_EQ(size, test.Size()); // older time will be ignored ASSERT_FALSE(test.Append(10, 9)); @@ -853,25 +867,176 @@ TEST_F(SeqnoTimeTest, MappingAppend) { // new seqno with old time will be ignored ASSERT_FALSE(test.Append(12, 8)); ASSERT_EQ(size, test.Size()); + + // new seqno with same time is accepted by replacing last entry + // (improves GetProximalSeqnoBeforeTime queries without blowing up size) + ASSERT_TRUE(test.Append(12, 11)); + ASSERT_EQ(size, test.Size()); } -TEST_F(SeqnoTimeTest, GetOldestApproximateTime) { +TEST_F(SeqnoTimeTest, ProximalFunctions) { SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10); - ASSERT_EQ(test.GetOldestApproximateTime(10), kUnknownSeqnoTime); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1), kUnknownTimeBeforeAll); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), + kUnknownTimeBeforeAll); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1), kUnknownSeqnoBeforeAll); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), + kUnknownSeqnoBeforeAll); + + // (Taken from example in SeqnoToTimeMapping class comment) + // Time 500 is after seqno 10 and before seqno 11 + EXPECT_TRUE(test.Append(10, 500)); + + // Seqno too early + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(9), kUnknownTimeBeforeAll); + // We only know that 500 is after 10 + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll); + // Found + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), 500U); + + // Time too early + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(499), kUnknownSeqnoBeforeAll); + // Found + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(501), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), 10U); + + // More samples + EXPECT_TRUE(test.Append(20, 600)); + EXPECT_TRUE(test.Append(30, 700)); + + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(20), 500U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(21), 600U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), 700U); + + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(499), kUnknownSeqnoBeforeAll); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(501), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(601), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(701), 30U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), 30U); + + // Redundant sample ignored + EXPECT_EQ(test.Size(), 3U); + EXPECT_FALSE(test.Append(30, 700)); + EXPECT_EQ(test.Size(), 3U); + + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U); + + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U); + + // Later sample with same seqno is ignored, to provide best results + // for GetProximalSeqnoBeforeTime function while saving entries + // in SeqnoToTimeMapping. + EXPECT_FALSE(test.Append(30, 800)); + + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U); + // Could return 800, but saving space in SeqnoToTimeMapping instead. + // Can reconsider if/when GetProximalTimeBeforeSeqno is used in + // production. + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U); + + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U); + // If the existing {30, 700} entry were replaced with {30, 800}, this + // would return seqno 20 instead of 30, which would preclude more than + // necessary for "preclude_last_level_data_seconds" feature. + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(800), 30U); + + // Still OK + EXPECT_TRUE(test.Append(40, 900)); + + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(41), 900U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 30U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 40U); + + // Burst of writes during a short time creates an opportunity + // for better results from GetProximalSeqnoBeforeTime(), at the + // expense of GetProximalTimeBeforeSeqno(). + EXPECT_TRUE(test.Append(50, 900)); + + // These are subject to later revision depending on priorities + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(49), 700U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(51), 900U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 30U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 50U); +} - test.Append(3, 10); +TEST_F(SeqnoTimeTest, TruncateOldEntries) { + constexpr uint64_t kMaxTimeDuration = 42; + SeqnoToTimeMapping test(kMaxTimeDuration, /*max_capacity=*/10); - ASSERT_EQ(test.GetOldestApproximateTime(2), kUnknownSeqnoTime); - ASSERT_EQ(test.GetOldestApproximateTime(3), 10); - ASSERT_EQ(test.GetOldestApproximateTime(10), 10); + EXPECT_EQ(test.Size(), 0U); - test.Append(10, 100); + // Safe on empty mapping + test.TruncateOldEntries(500); + + EXPECT_EQ(test.Size(), 0U); + + // (Taken from example in SeqnoToTimeMapping class comment) + // Time 500 is after seqno 10 and before seqno 11 + EXPECT_TRUE(test.Append(10, 500)); + EXPECT_TRUE(test.Append(20, 600)); + EXPECT_TRUE(test.Append(30, 700)); + EXPECT_TRUE(test.Append(40, 800)); + EXPECT_TRUE(test.Append(50, 900)); + + EXPECT_EQ(test.Size(), 5U); + + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), 10U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U); + // etc. + + // Must keep first entry + test.TruncateOldEntries(500 + kMaxTimeDuration); + EXPECT_EQ(test.Size(), 5U); + test.TruncateOldEntries(599 + kMaxTimeDuration); + EXPECT_EQ(test.Size(), 5U); + + // Purges first entry + test.TruncateOldEntries(600 + kMaxTimeDuration); + EXPECT_EQ(test.Size(), 4U); + + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), kUnknownSeqnoBeforeAll); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), kUnknownSeqnoBeforeAll); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U); + + // No effect + test.TruncateOldEntries(600 + kMaxTimeDuration); + EXPECT_EQ(test.Size(), 4U); + test.TruncateOldEntries(699 + kMaxTimeDuration); + EXPECT_EQ(test.Size(), 4U); + + // Purges next two + test.TruncateOldEntries(899 + kMaxTimeDuration); + EXPECT_EQ(test.Size(), 2U); + + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(799), kUnknownSeqnoBeforeAll); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 40U); + + // Always keep last entry, to have a non-trivial seqno bound + test.TruncateOldEntries(10000000); + EXPECT_EQ(test.Size(), 1U); - test.Append(100, 1000); - ASSERT_EQ(test.GetOldestApproximateTime(10), 100); - ASSERT_EQ(test.GetOldestApproximateTime(40), 100); - ASSERT_EQ(test.GetOldestApproximateTime(111), 1000); + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(10000000), 50U); } TEST_F(SeqnoTimeTest, Sort) { @@ -930,10 +1095,10 @@ TEST_F(SeqnoTimeTest, EncodeDecodeBasic) { for (SequenceNumber seq = 0; seq <= 1000; seq++) { // test has the more accurate time mapping, encode only pick // kMaxSeqnoTimePairsPerSST number of entries, which is less accurate - uint64_t target_time = test.GetOldestApproximateTime(seq); - ASSERT_GE(decoded.GetOldestApproximateTime(seq), + uint64_t target_time = test.GetProximalTimeBeforeSeqno(seq); + ASSERT_GE(decoded.GetProximalTimeBeforeSeqno(seq), target_time < 200 ? 0 : target_time - 200); - ASSERT_LE(decoded.GetOldestApproximateTime(seq), target_time); + ASSERT_LE(decoded.GetProximalTimeBeforeSeqno(seq), target_time); } } diff --git a/db/seqno_to_time_mapping.cc b/db/seqno_to_time_mapping.cc index c69209929..ec7b2d9cb 100644 --- a/db/seqno_to_time_mapping.cc +++ b/db/seqno_to_time_mapping.cc @@ -11,14 +11,34 @@ namespace ROCKSDB_NAMESPACE { -uint64_t SeqnoToTimeMapping::GetOldestApproximateTime( - const SequenceNumber seqno) const { +SeqnoToTimeMapping::pair_const_iterator SeqnoToTimeMapping::FindGreaterTime( + uint64_t time) const { + return std::upper_bound(pairs_.cbegin(), pairs_.cend(), + SeqnoTimePair{0, time}, SeqnoTimePair::TimeLess); +} + +SeqnoToTimeMapping::pair_const_iterator SeqnoToTimeMapping::FindGreaterEqSeqno( + SequenceNumber seqno) const { + return std::lower_bound(pairs_.cbegin(), pairs_.cend(), + SeqnoTimePair{seqno, 0}, SeqnoTimePair::SeqnoLess); +} + +SeqnoToTimeMapping::pair_const_iterator SeqnoToTimeMapping::FindGreaterSeqno( + SequenceNumber seqno) const { + return std::upper_bound(pairs_.cbegin(), pairs_.cend(), + SeqnoTimePair{seqno, 0}, SeqnoTimePair::SeqnoLess); +} + +uint64_t SeqnoToTimeMapping::GetProximalTimeBeforeSeqno( + SequenceNumber seqno) const { assert(is_sorted_); - auto it = std::upper_bound(seqno_time_mapping_.begin(), - seqno_time_mapping_.end(), seqno); - if (it == seqno_time_mapping_.begin()) { - return 0; + // Find the last entry with a seqno strictly less than the given seqno. + // First, find the first entry >= the given seqno (or end) + auto it = FindGreaterEqSeqno(seqno); + if (it == pairs_.cbegin()) { + return kUnknownTimeBeforeAll; } + // Then return data from previous. it--; return it->time; } @@ -28,44 +48,47 @@ void SeqnoToTimeMapping::Add(SequenceNumber seqno, uint64_t time) { return; } is_sorted_ = false; - seqno_time_mapping_.emplace_back(seqno, time); + pairs_.emplace_back(seqno, time); } void SeqnoToTimeMapping::TruncateOldEntries(const uint64_t now) { assert(is_sorted_); if (max_time_duration_ == 0) { + // No cutoff time + return; + } + + if (now < max_time_duration_) { + // Would under-flow return; } - const uint64_t cut_off_time = - now > max_time_duration_ ? now - max_time_duration_ : 0; - assert(cut_off_time <= now); // no overflow + const uint64_t cut_off_time = now - max_time_duration_; + assert(cut_off_time <= now); // no under/overflow - auto it = std::upper_bound( - seqno_time_mapping_.begin(), seqno_time_mapping_.end(), cut_off_time, - [](uint64_t target, const SeqnoTimePair& other) -> bool { - return target < other.time; - }); - if (it == seqno_time_mapping_.begin()) { + auto it = FindGreaterTime(cut_off_time); + if (it == pairs_.cbegin()) { return; } - it--; - seqno_time_mapping_.erase(seqno_time_mapping_.begin(), it); + // Move back one, to the entry that would be used to return a good seqno from + // GetProximalSeqnoBeforeTime(cut_off_time) + --it; + // Remove everything strictly before that entry + pairs_.erase(pairs_.cbegin(), std::move(it)); } -SequenceNumber SeqnoToTimeMapping::GetOldestSequenceNum(uint64_t time) { +SequenceNumber SeqnoToTimeMapping::GetProximalSeqnoBeforeTime(uint64_t time) { assert(is_sorted_); - auto it = std::upper_bound( - seqno_time_mapping_.begin(), seqno_time_mapping_.end(), time, - [](uint64_t target, const SeqnoTimePair& other) -> bool { - return target < other.time; - }); - if (it == seqno_time_mapping_.begin()) { - return 0; + // Find the last entry with a time <= the given time. + // First, find the first entry > the given time (or end). + auto it = FindGreaterTime(time); + if (it == pairs_.cbegin()) { + return kUnknownSeqnoBeforeAll; } - it--; + // Then return data from previous. + --it; return it->seqno; } @@ -84,15 +107,13 @@ void SeqnoToTimeMapping::Encode(std::string& dest, const SequenceNumber start, return; } - auto start_it = std::upper_bound(seqno_time_mapping_.begin(), - seqno_time_mapping_.end(), start); - if (start_it != seqno_time_mapping_.begin()) { + auto start_it = FindGreaterSeqno(start); + if (start_it != pairs_.begin()) { start_it--; } - auto end_it = std::upper_bound(seqno_time_mapping_.begin(), - seqno_time_mapping_.end(), end); - if (end_it == seqno_time_mapping_.begin()) { + auto end_it = FindGreaterSeqno(end); + if (end_it == pairs_.begin()) { return; } if (start_it >= end_it) { @@ -108,7 +129,7 @@ void SeqnoToTimeMapping::Encode(std::string& dest, const SequenceNumber start, } } // to include the first element - if (start_it != seqno_time_mapping_.begin()) { + if (start_it != pairs_.begin()) { start_it--; } @@ -166,14 +187,14 @@ void SeqnoToTimeMapping::Encode(std::string& dest, const SequenceNumber start, SeqnoTimePair base; for (auto it = start_it; it < end_it; it++) { assert(base < *it); - SeqnoTimePair val = *it - base; + SeqnoTimePair val = it->ComputeDelta(base); base = *it; val.Encode(dest); } } -Status SeqnoToTimeMapping::Add(const std::string& seqno_time_mapping_str) { - Slice input(seqno_time_mapping_str); +Status SeqnoToTimeMapping::Add(const std::string& pairs_str) { + Slice input(pairs_str); if (input.empty()) { return Status::OK(); } @@ -189,8 +210,8 @@ Status SeqnoToTimeMapping::Add(const std::string& seqno_time_mapping_str) { if (!s.ok()) { return s; } - val.Add(base); - seqno_time_mapping_.emplace_back(val); + val.ApplyDelta(base); + pairs_.emplace_back(val); base = val; } return Status::OK(); @@ -222,19 +243,22 @@ bool SeqnoToTimeMapping::Append(SequenceNumber seqno, uint64_t time) { return false; } if (seqno == Last().seqno) { - Last().time = time; - return true; + // Updating Last() would hurt GetProximalSeqnoBeforeTime() queries, so + // NOT doing it (for now) + return false; } if (time == Last().time) { - // new sequence has the same time as old one, no need to add new mapping - return false; + // Updating Last() here helps GetProximalSeqnoBeforeTime() queries, so + // doing it (for now) + Last().seqno = seqno; + return true; } } - seqno_time_mapping_.emplace_back(seqno, time); + pairs_.emplace_back(seqno, time); - if (seqno_time_mapping_.size() > max_capacity_) { - seqno_time_mapping_.pop_front(); + if (pairs_.size() > max_capacity_) { + pairs_.pop_front(); } return true; } @@ -245,10 +269,9 @@ bool SeqnoToTimeMapping::Resize(uint64_t min_time_duration, CalculateMaxCapacity(min_time_duration, max_time_duration); if (new_max_capacity == max_capacity_) { return false; - } else if (new_max_capacity < seqno_time_mapping_.size()) { - uint64_t delta = seqno_time_mapping_.size() - new_max_capacity; - seqno_time_mapping_.erase(seqno_time_mapping_.begin(), - seqno_time_mapping_.begin() + delta); + } else if (new_max_capacity < pairs_.size()) { + uint64_t delta = pairs_.size() - new_max_capacity; + pairs_.erase(pairs_.begin(), pairs_.begin() + delta); } max_capacity_ = new_max_capacity; return true; @@ -258,16 +281,16 @@ Status SeqnoToTimeMapping::Sort() { if (is_sorted_) { return Status::OK(); } - if (seqno_time_mapping_.empty()) { + if (pairs_.empty()) { is_sorted_ = true; return Status::OK(); } - std::deque copy = std::move(seqno_time_mapping_); + std::deque copy = std::move(pairs_); std::sort(copy.begin(), copy.end()); - seqno_time_mapping_.clear(); + pairs_.clear(); // remove seqno = 0, which may have special meaning, like zeroed out data while (copy.front().seqno == 0) { @@ -285,12 +308,12 @@ Status SeqnoToTimeMapping::Sort() { assert(it.seqno > prev.seqno); // If a larger sequence number has an older time which is not useful, skip if (it.time > prev.time) { - seqno_time_mapping_.push_back(prev); + pairs_.push_back(prev); prev = it; } } } - seqno_time_mapping_.emplace_back(prev); + pairs_.emplace_back(prev); is_sorted_ = true; return Status::OK(); @@ -298,7 +321,7 @@ Status SeqnoToTimeMapping::Sort() { std::string SeqnoToTimeMapping::ToHumanString() const { std::string ret; - for (const auto& seq_time : seqno_time_mapping_) { + for (const auto& seq_time : pairs_) { AppendNumberTo(&ret, seq_time.seqno); ret.append("->"); AppendNumberTo(&ret, seq_time.time); @@ -310,13 +333,11 @@ std::string SeqnoToTimeMapping::ToHumanString() const { SeqnoToTimeMapping SeqnoToTimeMapping::Copy( SequenceNumber smallest_seqno) const { SeqnoToTimeMapping ret; - auto it = std::upper_bound(seqno_time_mapping_.begin(), - seqno_time_mapping_.end(), smallest_seqno); - if (it != seqno_time_mapping_.begin()) { + auto it = FindGreaterSeqno(smallest_seqno); + if (it != pairs_.begin()) { it--; } - std::copy(it, seqno_time_mapping_.end(), - std::back_inserter(ret.seqno_time_mapping_)); + std::copy(it, pairs_.end(), std::back_inserter(ret.pairs_)); return ret; } @@ -330,12 +351,4 @@ uint64_t SeqnoToTimeMapping::CalculateMaxCapacity(uint64_t min_time_duration, max_time_duration * kMaxSeqnoTimePairsPerCF / min_time_duration); } -SeqnoToTimeMapping::SeqnoTimePair SeqnoToTimeMapping::SeqnoTimePair::operator-( - const SeqnoTimePair& other) const { - SeqnoTimePair res; - res.seqno = seqno - other.seqno; - res.time = time - other.time; - return res; -} - } // namespace ROCKSDB_NAMESPACE diff --git a/db/seqno_to_time_mapping.h b/db/seqno_to_time_mapping.h index 4ffc9c199..132d74957 100644 --- a/db/seqno_to_time_mapping.h +++ b/db/seqno_to_time_mapping.h @@ -18,20 +18,32 @@ namespace ROCKSDB_NAMESPACE { -constexpr uint64_t kUnknownSeqnoTime = 0; - -// SeqnoToTimeMapping stores the sequence number to time mapping, so given a -// sequence number it can estimate the oldest possible time for that sequence -// number. For example: -// 10 -> 100 -// 50 -> 300 -// then if a key has seqno 19, the OldestApproximateTime would be 100, for 51 it -// would be 300. -// As it's a sorted list, the new entry is inserted from the back. The old data -// will be popped from the front if they're no longer used. +constexpr uint64_t kUnknownTimeBeforeAll = 0; +constexpr SequenceNumber kUnknownSeqnoBeforeAll = 0; + +// SeqnoToTimeMapping stores a sampled mapping from sequence numbers to +// unix times (seconds since epoch). This information provides rough bounds +// between sequence numbers and their write times, but is primarily designed +// for getting a best lower bound on the sequence number of data written no +// later than a specified time. // -// Note: the data struct is not thread safe, both read and write need to be -// synchronized by caller. +// For ease of sampling, it is assumed that the recorded time in each pair +// comes at or after the sequence number and before the next sequence number, +// so this example: +// +// Seqno: 10, 11, ... 20, 21, ... 30, 31, ... +// Time: ... 500 ... 600 ... 700 ... +// +// would be represented as +// 10 -> 500 +// 20 -> 600 +// 30 -> 700 +// +// In typical operation, the list is sorted, both among seqnos and among times, +// with a bounded number of entries, but some public working states violate +// these constraints. +// +// NOT thread safe - requires external synchronization. class SeqnoToTimeMapping { public: // Maximum number of entries can be encoded into SST. The data is delta encode @@ -63,28 +75,33 @@ class SeqnoToTimeMapping { // Decode the value from input Slice and remove it from the input Status Decode(Slice& input); - // subtraction of 2 SeqnoTimePair - SeqnoTimePair operator-(const SeqnoTimePair& other) const; - - // Add 2 values together - void Add(const SeqnoTimePair& obj) { - seqno += obj.seqno; - time += obj.time; + // For delta encoding + SeqnoTimePair ComputeDelta(const SeqnoTimePair& base) const { + return {seqno - base.seqno, time - base.time}; } - // Compare SeqnoTimePair with a sequence number, used for binary search a - // sequence number in a list of SeqnoTimePair - bool operator<(const SequenceNumber& other) const { return seqno < other; } + // For delta decoding + void ApplyDelta(const SeqnoTimePair& delta_or_base) { + seqno += delta_or_base.seqno; + time += delta_or_base.time; + } - // Compare 2 SeqnoTimePair + // Ordering used for Sort() bool operator<(const SeqnoTimePair& other) const { return std::tie(seqno, time) < std::tie(other.seqno, other.time); } - // Check if 2 SeqnoTimePair is the same bool operator==(const SeqnoTimePair& other) const { return std::tie(seqno, time) == std::tie(other.seqno, other.time); } + + static bool SeqnoLess(const SeqnoTimePair& a, const SeqnoTimePair& b) { + return a.seqno < b.seqno; + } + + static bool TimeLess(const SeqnoTimePair& a, const SeqnoTimePair& b) { + return a.time < b.time; + } }; // constractor of SeqnoToTimeMapping @@ -103,16 +120,31 @@ class SeqnoToTimeMapping { // existing ones. It maintains the internal sorted status. bool Append(SequenceNumber seqno, uint64_t time); - // Given a sequence number, estimate it's oldest time - uint64_t GetOldestApproximateTime(SequenceNumber seqno) const; - - // Truncate the old entries based on the current time and max_time_duration_ + // Given a sequence number, return the best (largest / newest) known time + // that is no later than the write time of that given sequence number. + // If no such specific time is known, returns kUnknownTimeBeforeAll. + // Using the example in the class comment above, + // GetProximalTimeBeforeSeqno(10) -> kUnknownTimeBeforeAll + // GetProximalTimeBeforeSeqno(11) -> 500 + // GetProximalTimeBeforeSeqno(20) -> 500 + // GetProximalTimeBeforeSeqno(21) -> 600 + uint64_t GetProximalTimeBeforeSeqno(SequenceNumber seqno) const; + + // Remove any entries not needed for GetProximalSeqnoBeforeTime queries of + // times older than `now - max_time_duration_` void TruncateOldEntries(uint64_t now); - // Given a time, return it's oldest possible sequence number - SequenceNumber GetOldestSequenceNum(uint64_t time); - - // Encode to a binary string + // Given a time, return the best (largest) sequence number whose write time + // is no later than that given time. If no such specific sequence number is + // known, returns kUnknownSeqnoBeforeAll. Using the example in the class + // comment above, + // GetProximalSeqnoBeforeTime(499) -> kUnknownSeqnoBeforeAll + // GetProximalSeqnoBeforeTime(500) -> 10 + // GetProximalSeqnoBeforeTime(599) -> 10 + // GetProximalSeqnoBeforeTime(600) -> 20 + SequenceNumber GetProximalSeqnoBeforeTime(uint64_t time); + + // Encode to a binary string. start and end seqno are both inclusive. void Encode(std::string& des, SequenceNumber start, SequenceNumber end, uint64_t now, uint64_t output_size = kMaxSeqnoTimePairsPerSST) const; @@ -122,10 +154,10 @@ class SeqnoToTimeMapping { void Add(SequenceNumber seqno, uint64_t time); // Decode and add the entries to the current obj. The list will be unsorted - Status Add(const std::string& seqno_time_mapping_str); + Status Add(const std::string& pairs_str); // Return the number of entries - size_t Size() const { return seqno_time_mapping_.size(); } + size_t Size() const { return pairs_.size(); } // Reduce the size of internal list bool Resize(uint64_t min_time_duration, uint64_t max_time_duration); @@ -145,10 +177,10 @@ class SeqnoToTimeMapping { SeqnoToTimeMapping Copy(SequenceNumber smallest_seqno) const; // If the internal list is empty - bool Empty() const { return seqno_time_mapping_.empty(); } + bool Empty() const { return pairs_.empty(); } // clear all entries - void Clear() { seqno_time_mapping_.clear(); } + void Clear() { pairs_.clear(); } // return the string for user message // Note: Not efficient, okay for print @@ -156,7 +188,7 @@ class SeqnoToTimeMapping { #ifndef NDEBUG const std::deque& TEST_GetInternalMapping() const { - return seqno_time_mapping_; + return pairs_; } #endif @@ -167,7 +199,7 @@ class SeqnoToTimeMapping { uint64_t max_time_duration_; uint64_t max_capacity_; - std::deque seqno_time_mapping_; + std::deque pairs_; bool is_sorted_ = true; @@ -176,14 +208,14 @@ class SeqnoToTimeMapping { SeqnoTimePair& Last() { assert(!Empty()); - return seqno_time_mapping_.back(); + return pairs_.back(); } -}; -// for searching the sequence number from SeqnoToTimeMapping -inline bool operator<(const SequenceNumber& seqno, - const SeqnoToTimeMapping::SeqnoTimePair& other) { - return seqno < other.seqno; -} + using pair_const_iterator = + std::deque::const_iterator; + pair_const_iterator FindGreaterTime(uint64_t time) const; + pair_const_iterator FindGreaterSeqno(SequenceNumber seqno) const; + pair_const_iterator FindGreaterEqSeqno(SequenceNumber seqno) const; +}; } // namespace ROCKSDB_NAMESPACE From 63ed8688408b59dab86876928efa97f36dc0c3d1 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 29 Sep 2023 13:03:39 -0700 Subject: [PATCH 167/386] Offpeak in db option (#11893) Summary: RocksDB's primary function is to facilitate read and write operations. Compactions, while essential for minimizing read amplifications and optimizing storage, can sometimes compete with these primary tasks. Especially during periods of high read/write traffic, it's vital to ensure that primary operations receive priority, avoiding any potential disruptions or slowdowns. Conversely, during off-peak times when traffic is minimal, it's an opportune moment to tackle low-priority tasks like TTL based compactions, optimizing resource usage. In this PR, we are incorporating the concept of off-peak time into RocksDB by introducing `daily_offpeak_time_utc` within the DBOptions. This setting is formatted as "HH:mm-HH:mm" where the first one before "-" is the start time and the second one is the end time, inclusive. It will be later used for resource optimization in subsequent PRs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11893 Test Plan: - New Unit Test Added - `DBOptionsTest::OffPeakTimes` - Existing Unit Test Updated - `OptionsTest`, `OptionsSettableTest` Reviewed By: pdillinger Differential Revision: D49714553 Pulled By: jaykorean fbshipit-source-id: fef51ea7c0fede6431c715bff116ddbb567c8752 --- db/db_impl/db_impl.h | 4 + db/db_impl/db_impl_open.cc | 10 ++ db/db_options_test.cc | 154 ++++++++++++++++++ include/rocksdb/options.h | 19 +++ options/db_options.cc | 36 +++- options/db_options.h | 3 + options/options_helper.cc | 1 + options/options_settable_test.cc | 4 +- options/options_test.cc | 7 +- .../new_features/offpeak_db_option.md | 1 + util/string_util.cc | 39 +++++ util/string_util.h | 10 ++ 12 files changed, 284 insertions(+), 4 deletions(-) create mode 100644 unreleased_history/new_features/offpeak_db_option.md diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 3f114fa36..8ae0b9ef0 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1197,6 +1197,10 @@ class DBImpl : public DB { const PeriodicTaskScheduler& TEST_GetPeriodicTaskScheduler() const; + static Status TEST_ValidateOptions(const DBOptions& db_options) { + return ValidateOptions(db_options); + } + #endif // NDEBUG // persist stats to column family "_persistent_stats" diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 172a55879..b89b997e7 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -25,6 +25,7 @@ #include "rocksdb/wal_filter.h" #include "test_util/sync_point.h" #include "util/rate_limiter_impl.h" +#include "util/string_util.h" #include "util/udt_util.h" namespace ROCKSDB_NAMESPACE { @@ -291,6 +292,15 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { "writes in direct IO require writable_file_max_buffer_size > 0"); } + if (db_options.daily_offpeak_time_utc != "") { + int start_time, end_time; + if (!TryParseTimeRangeString(db_options.daily_offpeak_time_utc, start_time, + end_time)) { + return Status::InvalidArgument( + "daily_offpeak_time_utc should be set in the format HH:mm-HH:mm " + "(e.g. 04:30-07:30)"); + } + } return Status::OK(); } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index c3910a978..19d6b7e2f 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -19,6 +19,7 @@ #include "rocksdb/convenience.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/stats_history.h" +#include "test_util/mock_time_env.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" #include "util/random.h" @@ -1033,6 +1034,159 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_EQ(fifo_temp_opt[1].age, 30000); } +TEST_F(DBOptionsTest, OffPeakTimes) { + Options options; + options.create_if_missing = true; + + auto verify_invalid = [&]() { + Status s = DBImpl::TEST_ValidateOptions(options); + ASSERT_NOK(s); + ASSERT_TRUE(s.IsInvalidArgument()); + }; + + auto verify_valid = [&]() { + Status s = DBImpl::TEST_ValidateOptions(options); + ASSERT_OK(s); + ASSERT_FALSE(s.IsInvalidArgument()); + }; + std::vector invalid_cases = { + "06:30-", + "-23:30", // Both need to be set + "12:30 PM-23:30", + "12:01AM-11:00PM", // Invalid format + "01:99-22:00", // Invalid value for minutes + "00:00-24:00", // 24:00 is an invalid value + "6-7", + "6:-7", + "06:31.42-7:00", + "6.31:42-7:00", + "6:0-7:", + "15:0.2-3:.7", + ":00-00:02", + "02:00-:00", + "random-value", + "No:No-Hi:Hi", + }; + + std::vector valid_cases = { + "", // Not enabled. Valid case + "00:00-00:00", // Valid. Entire 24 hours are offpeak. + "06:30-11:30", "06:30-23:30", "13:30-14:30", + "00:00-23:59", // This doesn't cover entire 24 hours. There's 1 minute + // gap from 11:59:00PM to midnight + "23:30-01:15", // From 11:30PM to 1:15AM next day. Valid case. + "1:0000000000000-2:000000000042", // Weird, but we can parse the int. + }; + + for (std::string invalid_case : invalid_cases) { + options.daily_offpeak_time_utc = invalid_case; + verify_invalid(); + } + for (std::string valid_case : valid_cases) { + options.daily_offpeak_time_utc = valid_case; + verify_valid(); + } + + auto verify_is_now_offpeak = [&](bool expected, int now_utc_hour, + int now_utc_minute, int now_utc_second = 0) { + auto mock_clock = std::make_shared(env_->GetSystemClock()); + // Add some extra random days to current time + Random rnd(301); + int days = rnd.Uniform(100); + mock_clock->SetCurrentTime(days * 86400 + now_utc_hour * 3600 + + now_utc_minute * 60 + now_utc_second); + Status s = DBImpl::TEST_ValidateOptions(options); + ASSERT_OK(s); + auto db_options = MutableDBOptions(options); + ASSERT_EQ(expected, db_options.IsNowOffPeak(mock_clock.get())); + }; + + options.daily_offpeak_time_utc = ""; + verify_is_now_offpeak(false, 12, 30); + + options.daily_offpeak_time_utc = "06:30-11:30"; + verify_is_now_offpeak(false, 5, 30); + verify_is_now_offpeak(true, 6, 30); + verify_is_now_offpeak(true, 10, 30); + verify_is_now_offpeak(true, 11, 30); + verify_is_now_offpeak(false, 13, 30); + + options.daily_offpeak_time_utc = "23:30-04:30"; + verify_is_now_offpeak(false, 6, 30); + verify_is_now_offpeak(true, 23, 30); + verify_is_now_offpeak(true, 0, 0); + verify_is_now_offpeak(true, 1, 0); + verify_is_now_offpeak(true, 4, 30); + verify_is_now_offpeak(false, 4, 31); + + // There's one minute gap from 11:59PM to midnight + options.daily_offpeak_time_utc = "00:00-23:59"; + verify_is_now_offpeak(true, 0, 0); + verify_is_now_offpeak(true, 12, 00); + verify_is_now_offpeak(true, 23, 59); + verify_is_now_offpeak(false, 23, 59, 1); + + // Open the db and test by Get/SetDBOptions + options.daily_offpeak_time_utc = ""; + DestroyAndReopen(options); + ASSERT_EQ("", dbfull()->GetDBOptions().daily_offpeak_time_utc); + for (std::string invalid_case : invalid_cases) { + ASSERT_NOK( + dbfull()->SetDBOptions({{"daily_offpeak_time_utc", invalid_case}})); + } + for (std::string valid_case : valid_cases) { + ASSERT_OK(dbfull()->SetDBOptions({{"daily_offpeak_time_utc", valid_case}})); + ASSERT_EQ(valid_case, dbfull()->GetDBOptions().daily_offpeak_time_utc); + } + Close(); + + // Sets off-peak time from 11:30PM to 4:30AM next day. + // Starting at 1:30PM, use mock sleep to make time pass + // and see if IsNowOffPeak() returns correctly per time changes + int now_hour = 13; + int now_minute = 30; + options.daily_offpeak_time_utc = "23:30-04:30"; + auto mock_clock = std::make_shared(env_->GetSystemClock()); + auto mock_env = std::make_unique(env_, mock_clock); + // Add some extra random days to current time + Random rnd(301); + int days = rnd.Uniform(100); + mock_clock->SetCurrentTime(days * 86400 + now_hour * 3600 + now_minute * 60); + options.env = mock_env.get(); + + // Starting at 1:30PM. It's not off-peak + DestroyAndReopen(options); + ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 4:30PM. Still not off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 11:30PM. It's off-peak + mock_clock->MockSleepForSeconds(7 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 2:30AM next day. It's still off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Now it's at 4:30AM. It's still off-peak + mock_clock->MockSleepForSeconds(2 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Sleep for one more second. It's no longer off-peak + mock_clock->MockSleepForSeconds(1); + ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + Close(); +} + TEST_F(DBOptionsTest, CompactionReadaheadSizeChange) { for (bool use_direct_reads : {true, false}) { SpecialEnv env(env_); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index ac70def24..b14ecfb77 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1427,6 +1427,25 @@ struct DBOptions { // of the contract leads to undefined behaviors with high possibility of data // inconsistency, e.g. deleted old data become visible again, etc. bool enforce_single_del_contracts = true; + + // EXPERIMENTAL + // Implementing offpeak duration awareness in RocksDB. In this context, "peak + // time" signifies periods characterized by significantly elevated read and + // write activity compared to other times. By leveraging this knowledge, we + // can prevent low-priority tasks, such as TTL-based compactions, from + // competing with read and write operations during peak hours. Essentially, we + // preprocess these tasks during the preceding off-peak period, just before + // the next peak cycle begins. For example, if the TTL is configured for 25 + // days, we may compact the files during the off-peak hours of the 24th day. + // + // Time of the day in UTC. Format - HH:mm-HH:mm (00:00-23:59) + // If the start time > end time, it will be considered that the time period + // spans to the next day (e.g., 23:30-04:00) + // If the start time == end time, entire 24 hours will be considered offpeak + // (e.g. 00:00-00:00). Note that 00:00-23:59 will have one minute gap from + // 11:59:00PM to midnight. + // Default: Empty String (No notion of peak/offpeak) + std::string daily_offpeak_time_utc = ""; }; // Options to control the behavior of a database (passed to DB::Open) diff --git a/options/db_options.cc b/options/db_options.cc index b93e35f43..af30718f3 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -129,6 +129,10 @@ static std::unordered_map {offsetof(struct MutableDBOptions, max_background_flushes), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kMutable}}, + {"daily_offpeak_time_utc", + {offsetof(struct MutableDBOptions, daily_offpeak_time_utc), + OptionType::kString, OptionVerificationType::kNormal, + OptionTypeFlags::kMutable}}, }; static std::unordered_map @@ -991,7 +995,8 @@ MutableDBOptions::MutableDBOptions() wal_bytes_per_sync(0), strict_bytes_per_sync(false), compaction_readahead_size(0), - max_background_flushes(-1) {} + max_background_flushes(-1), + daily_offpeak_time_utc("") {} MutableDBOptions::MutableDBOptions(const DBOptions& options) : max_background_jobs(options.max_background_jobs), @@ -1011,7 +1016,8 @@ MutableDBOptions::MutableDBOptions(const DBOptions& options) wal_bytes_per_sync(options.wal_bytes_per_sync), strict_bytes_per_sync(options.strict_bytes_per_sync), compaction_readahead_size(options.compaction_readahead_size), - max_background_flushes(options.max_background_flushes) {} + max_background_flushes(options.max_background_flushes), + daily_offpeak_time_utc(options.daily_offpeak_time_utc) {} void MutableDBOptions::Dump(Logger* log) const { ROCKS_LOG_HEADER(log, " Options.max_background_jobs: %d", @@ -1056,6 +1062,32 @@ void MutableDBOptions::Dump(Logger* log) const { compaction_readahead_size); ROCKS_LOG_HEADER(log, " Options.max_background_flushes: %d", max_background_flushes); + ROCKS_LOG_HEADER(log, "Options.daily_offpeak_time_utc: %s", + daily_offpeak_time_utc.c_str()); +} + +bool MutableDBOptions::IsNowOffPeak(SystemClock* clock) const { + if (daily_offpeak_time_utc.empty()) { + return false; + } + int64_t now; + if (clock->GetCurrentTime(&now).ok()) { + constexpr int kSecondsPerDay = 86400; + int since_midnight_seconds = static_cast(now % kSecondsPerDay); + int start_time = 0, end_time = 0; + assert( + TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time)); + + // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) + if (start_time > end_time) { + return start_time <= since_midnight_seconds || + since_midnight_seconds <= end_time; + } else { + return start_time <= since_midnight_seconds && + since_midnight_seconds <= end_time; + } + } + return false; } Status GetMutableDBOptionsFromStrings( diff --git a/options/db_options.h b/options/db_options.h index d00a06718..85a4d949b 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -136,6 +136,9 @@ struct MutableDBOptions { bool strict_bytes_per_sync; size_t compaction_readahead_size; int max_background_flushes; + + std::string daily_offpeak_time_utc; + bool IsNowOffPeak(SystemClock* clock) const; }; Status GetStringFromMutableDBOptions(const ConfigOptions& config_options, diff --git a/options/options_helper.cc b/options/options_helper.cc index 8d32640c9..0c76d0315 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -179,6 +179,7 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options, options.lowest_used_cache_tier = immutable_db_options.lowest_used_cache_tier; options.enforce_single_del_contracts = immutable_db_options.enforce_single_del_contracts; + options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc; return options; } diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc index 39d7d6b20..2f7493f32 100644 --- a/options/options_settable_test.cc +++ b/options/options_settable_test.cc @@ -252,6 +252,7 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { sizeof(FileTypeSet)}, {offsetof(struct DBOptions, compaction_service), sizeof(std::shared_ptr)}, + {offsetof(struct DBOptions, daily_offpeak_time_utc), sizeof(std::string)}, }; char* options_ptr = new char[sizeof(DBOptions)]; @@ -365,7 +366,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) { "db_host_id=hostname;" "lowest_used_cache_tier=kNonVolatileBlockTier;" "allow_data_in_errors=false;" - "enforce_single_del_contracts=false;", + "enforce_single_del_contracts=false;" + "daily_offpeak_time_utc=08:30-19:00;", new_options)); ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions), diff --git a/options/options_test.cc b/options/options_test.cc index 855243c95..a05ed0c8c 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -178,6 +178,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { {"wal_bytes_per_sync", "48"}, {"strict_bytes_per_sync", "true"}, {"preserve_deletes", "false"}, + {"daily_offpeak_time_utc", ""}, }; ColumnFamilyOptions base_cf_opt; @@ -358,6 +359,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) { ASSERT_EQ(new_db_opt.bytes_per_sync, static_cast(47)); ASSERT_EQ(new_db_opt.wal_bytes_per_sync, static_cast(48)); ASSERT_EQ(new_db_opt.strict_bytes_per_sync, true); + ASSERT_EQ(new_db_opt.daily_offpeak_time_utc, ""); db_options_map["max_open_files"] = "hello"; Status s = @@ -879,6 +881,7 @@ TEST_F(OptionsTest, OldInterfaceTest) { {"track_and_verify_wals_in_manifest", "true"}, {"verify_sst_unique_id_in_manifest", "true"}, {"max_open_files", "32"}, + {"daily_offpeak_time_utc", "06:30-23:30"}, }; ConfigOptions db_config_options(base_db_opt); @@ -909,11 +912,13 @@ TEST_F(OptionsTest, OldInterfaceTest) { db_config_options.ignore_unknown_options = false; ASSERT_OK(GetDBOptionsFromString( db_config_options, base_db_opt, - "create_if_missing=false;error_if_exists=false;max_open_files=42;", + "create_if_missing=false;error_if_exists=false;max_open_files=42;" + "daily_offpeak_time_utc=08:30-19:00;", &new_db_opt)); ASSERT_EQ(new_db_opt.create_if_missing, false); ASSERT_EQ(new_db_opt.error_if_exists, false); ASSERT_EQ(new_db_opt.max_open_files, 42); + ASSERT_EQ(new_db_opt.daily_offpeak_time_utc, "08:30-19:00"); s = GetDBOptionsFromString( db_config_options, base_db_opt, "create_if_missing=false;error_if_exists=false;max_open_files=42;" diff --git a/unreleased_history/new_features/offpeak_db_option.md b/unreleased_history/new_features/offpeak_db_option.md new file mode 100644 index 000000000..bdc9b2a29 --- /dev/null +++ b/unreleased_history/new_features/offpeak_db_option.md @@ -0,0 +1 @@ +Add an experimental offpeak duration awareness by setting `DBOptions::daily_offpeak_time_utc` in "HH:mm-HH:mm" format. This information will be used for resource optimization in the future diff --git a/util/string_util.cc b/util/string_util.cc index 821ccba07..57207889f 100644 --- a/util/string_util.cc +++ b/util/string_util.cc @@ -437,6 +437,45 @@ bool SerializeIntVector(const std::vector& vec, std::string* value) { return true; } +int ParseTimeStringToSeconds(const std::string& value) { + int hours, minutes; + char colon; + + std::istringstream stream(value); + stream >> hours >> colon >> minutes; + + if (stream.fail() || !stream.eof() || colon != ':') { + return -1; + } + + if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59) { + return -1; + } + return hours * 3600 + minutes * 60; +} + +bool TryParseTimeRangeString(const std::string& value, int& start_time, + int& end_time) { + if (value.empty()) { + start_time = 0; + end_time = 0; + return true; + } + auto split = StringSplit(value, '-'); + if (split.size() != 2) { + return false; + } + start_time = ParseTimeStringToSeconds(split[0]); + if (start_time < 0) { + return false; + } + end_time = ParseTimeStringToSeconds(split[1]); + if (end_time < 0) { + return false; + } + return true; +} + // Copied from folly/string.cpp: // https://github.com/facebook/folly/blob/0deef031cb8aab76dc7e736f8b7c22d701d5f36b/folly/String.cpp#L457 // There are two variants of `strerror_r` function, one returns diff --git a/util/string_util.h b/util/string_util.h index 0b15181f5..999081ebb 100644 --- a/util/string_util.h +++ b/util/string_util.h @@ -166,6 +166,16 @@ std::vector ParseVectorInt(const std::string& value); bool SerializeIntVector(const std::vector& vec, std::string* value); +// Expects HH:mm format for the input value +// Returns -1 if invalid input. Otherwise returns seconds since midnight +int ParseTimeStringToSeconds(const std::string& value); + +// Expects HH:mm-HH:mm format for the input value +// Returns false, if invalid format. +// Otherwise, returns true and start_time and end_time are set +bool TryParseTimeRangeString(const std::string& value, int& start_time, + int& end_time); + extern const std::string kNullptrString; // errnoStr() function returns a string that describes the error code passed in From 3c4cc6c2cc99c594c7c5569f1e565413bbe8bcdc Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 29 Sep 2023 15:15:32 -0700 Subject: [PATCH 168/386] flip default `DBOptions::fail_if_options_file_error` (#11800) Summary: Changed `DBOptions::fail_if_options_file_error` default from `false` to `true`. It is safer to fail an operation by default when it encounters an error. Also changed the API doc to list items in the conventional way for listing items in a sentence. The slashes weren't working well as one got dropped, probably because it looked like a typo. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11800 Test Plan: rely on CI Reviewed By: jowlyzhang Differential Revision: D49030532 Pulled By: ajkr fbshipit-source-id: e606062aa25f9063d8c6fb0d03aebca5c2bc56d3 --- include/rocksdb/options.h | 6 +++--- .../fail_if_options_file_error_default_change.md | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index b14ecfb77..3cc268d88 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1200,11 +1200,11 @@ struct DBOptions { // currently. WalFilter* wal_filter = nullptr; - // If true, then DB::Open / CreateColumnFamily / DropColumnFamily + // If true, then DB::Open, CreateColumnFamily, DropColumnFamily, and // SetOptions will fail if options file is not properly persisted. // - // DEFAULT: false - bool fail_if_options_file_error = false; + // DEFAULT: true + bool fail_if_options_file_error = true; // If true, then print malloc stats together with rocksdb.stats // when printing to LOG. diff --git a/unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md b/unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md new file mode 100644 index 000000000..44e3bb507 --- /dev/null +++ b/unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md @@ -0,0 +1 @@ +* The default value of `DBOptions::fail_if_options_file_error` changed from `false` to `true`. Operations that set in-memory options (e.g., `DB::Open*()`, `DB::SetOptions()`, `DB::CreateColumnFamily*()`, and `DB::DropColumnFamily()`) but fail to persist the change will now return a non-OK `Status` by default. From d98a9cfb27dbffb4f7a7ab173b366dd626c2f721 Mon Sep 17 00:00:00 2001 From: leipeng Date: Fri, 29 Sep 2023 15:58:08 -0700 Subject: [PATCH 169/386] test: WritableFile derived class: add missing GetFileSize() override (#11726) Summary: Missed `GetFileSize()` forwarding , this PR fix this issue, and mark `WritableFile::GetFileSize()` as pure virtual to detect such issue in compile time. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11726 Reviewed By: ajkr Differential Revision: D49791240 Pulled By: jowlyzhang fbshipit-source-id: ef219508d6b15c9a24df9b706a9fdc33cc6a286e --- db/db_test_util.h | 2 ++ include/rocksdb/env.h | 2 +- utilities/fault_injection_env.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/db/db_test_util.h b/db/db_test_util.h index 1e1225812..023784f61 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -233,6 +233,7 @@ class SpecialEnv : public EnvWrapper { size_t GetUniqueId(char* id, size_t max_size) const override { return base_->GetUniqueId(id, max_size); } + uint64_t GetFileSize() final { return base_->GetFileSize(); } }; class ManifestFile : public WritableFile { public: @@ -345,6 +346,7 @@ class SpecialEnv : public EnvWrapper { Status Allocate(uint64_t offset, uint64_t len) override { return base_->Allocate(offset, len); } + uint64_t GetFileSize() final { return base_->GetFileSize(); } private: SpecialEnv* env_; diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 63a161923..7828a31eb 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1001,7 +1001,7 @@ class WritableFile { /* * Get the size of valid data in the file. */ - virtual uint64_t GetFileSize() { return 0; } + virtual uint64_t GetFileSize() = 0; /* * Get and set the default pre-allocation block size for writes to diff --git a/utilities/fault_injection_env.h b/utilities/fault_injection_env.h index 549bfe716..6c1623a8d 100644 --- a/utilities/fault_injection_env.h +++ b/utilities/fault_injection_env.h @@ -96,6 +96,7 @@ class TestWritableFile : public WritableFile { virtual bool use_direct_io() const override { return target_->use_direct_io(); }; + uint64_t GetFileSize() final { return target_->GetFileSize(); } private: FileState state_; From be879cc56baafcfd8672ff526fb5b8422aef98cc Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 2 Oct 2023 16:07:39 -0700 Subject: [PATCH 170/386] stress test verification value mismatch message (#11912) Summary: Separate the message for value mismatch from the message for an extra value in the DB Pull Request resolved: https://github.com/facebook/rocksdb/pull/11912 Reviewed By: hx235 Differential Revision: D49792137 Pulled By: ajkr fbshipit-source-id: 311bc1801843a15367f409ead88ef755acbde468 --- db_stress_tool/no_batched_ops_stress.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index eeb44560d..6ddc7fe0b 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -2034,9 +2034,7 @@ class NonBatchedOpsStressTest : public StressTest { const Slice slice(value_from_db); const uint32_t value_base_from_db = GetValueBase(slice); if (ExpectedValueHelper::MustHaveNotExisted(expected_value, - expected_value) || - !ExpectedValueHelper::InExpectedValueBaseRange( - value_base_from_db, expected_value, expected_value)) { + expected_value)) { VerificationAbort(shared, msg_prefix + ": Unexpected value found", cf, key, value_from_db, ""); return false; @@ -2045,6 +2043,14 @@ class NonBatchedOpsStressTest : public StressTest { size_t expected_value_data_size = GenerateValue(expected_value.GetValueBase(), expected_value_data, sizeof(expected_value_data)); + if (!ExpectedValueHelper::InExpectedValueBaseRange( + value_base_from_db, expected_value, expected_value)) { + VerificationAbort(shared, msg_prefix + ": Unexpected value found", cf, + key, value_from_db, + Slice(expected_value_data, expected_value_data_size)); + return false; + } + // TODO: are the length/memcmp() checks repetitive? if (value_from_db.length() != expected_value_data_size) { VerificationAbort(shared, msg_prefix + ": Length of value read is not equal", From 7bebd3036d6947fa38db0f8936d6e7706ca22a5a Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 2 Oct 2023 16:19:05 -0700 Subject: [PATCH 171/386] Update tiered storage tests (ahead of next change) (#11917) Summary: After https://github.com/facebook/rocksdb/issues/11905, I am preparing a DBImpl change to ensure all sufficiently recent sequence numbers since Open are covered by SeqnoToTimeMapping. **Intended follow-up** However, there are a number of test changes I want to make prior to that to make it clear that I am not regressing the tests and production behavior at the same time. * Start mock time in the tests well beyond epoch (time 0) so that we aren't normally reaching into pre-history for current time minus the preserve/preclude duration. * Majorly clean up BasicSeqnoToTimeMapping to avoid confusing hard-coded bounds on GetProximalTimeBeforeSeqno() results. * There is an unresolved/unexplained issue marked with FIXME that should be investigated when GetProximalTimeBeforeSeqno() is put into production. * MultiCFs test was strangely generating 5 L0 files, four of which would be compacted into an L1, and then letting TTL compaction compact 1@L0+1@L1. Changing the starting time of the tests seemed to mess up the TTL compaction. But I suspect the TTL compaction was unintentional, so I've cut it down to just 4 L0 files, which compacts predictably. * Unrelated: allow ROCKSDB_NO_STACK=1 to skip printing a stack trace on assertion failures. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11917 Test Plan: no changes to production code Reviewed By: jowlyzhang Differential Revision: D49841436 Pulled By: pdillinger fbshipit-source-id: 753348ace9c548e82bcb77fcc8b2ffb7a6beeb0a --- db/compaction/tiered_compaction_test.cc | 7 ++- db/seqno_time_test.cc | 71 ++++++++++++++----------- port/stack_trace.cc | 5 ++ 3 files changed, 51 insertions(+), 32 deletions(-) diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index 51f931603..3341bddc8 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -1216,6 +1216,7 @@ class PrecludeLastLevelTest : public DBTestBase { PrecludeLastLevelTest() : DBTestBase("preclude_last_level_test", /*env_do_fsync=*/false) { mock_clock_ = std::make_shared(env_->GetSystemClock()); + mock_clock_->SetCurrentTime(kMockStartTime); mock_env_ = std::make_unique(env_, mock_clock_); } @@ -1223,6 +1224,10 @@ class PrecludeLastLevelTest : public DBTestBase { std::unique_ptr mock_env_; std::shared_ptr mock_clock_; + // Sufficient starting time that preserve time doesn't under-flow into + // pre-history + static constexpr uint32_t kMockStartTime = 10000000; + void SetUp() override { mock_clock_->InstallTimedWaitFixCallback(); SyncPoint::GetInstance()->SetCallBack( @@ -1231,7 +1236,7 @@ class PrecludeLastLevelTest : public DBTestBase { reinterpret_cast(arg); periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get()); }); - mock_clock_->SetCurrentTime(0); + mock_clock_->SetCurrentTime(kMockStartTime); } }; diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc index e4c575167..3a64499be 100644 --- a/db/seqno_time_test.cc +++ b/db/seqno_time_test.cc @@ -18,6 +18,7 @@ class SeqnoTimeTest : public DBTestBase { public: SeqnoTimeTest() : DBTestBase("seqno_time_test", /*env_do_fsync=*/false) { mock_clock_ = std::make_shared(env_->GetSystemClock()); + mock_clock_->SetCurrentTime(kMockStartTime); mock_env_ = std::make_unique(env_, mock_clock_); } @@ -25,6 +26,10 @@ class SeqnoTimeTest : public DBTestBase { std::unique_ptr mock_env_; std::shared_ptr mock_clock_; + // Sufficient starting time that preserve time doesn't under-flow into + // pre-history + static constexpr uint32_t kMockStartTime = 10000000; + void SetUp() override { mock_clock_->InstallTimedWaitFixCallback(); SyncPoint::GetInstance()->SetCallBack( @@ -33,6 +38,7 @@ class SeqnoTimeTest : public DBTestBase { reinterpret_cast(arg); periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock_.get()); }); + mock_clock_->SetCurrentTime(kMockStartTime); } // make sure the file is not in cache, otherwise it won't have IO info @@ -324,8 +330,18 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { options.disable_auto_compactions = true; DestroyAndReopen(options); + // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(SingleDelete("foo")); + // pass some time first, otherwise the first a few keys write time are going + // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll + dbfull()->TEST_WaitForPeriodicTaskRun( + [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); + std::set checked_file_nums; SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber() + 1; + uint64_t start_time = mock_clock_->NowSeconds(); + // Write a key every 10 seconds for (int i = 0; i < 200; i++) { ASSERT_OK(Put(Key(i), "value")); @@ -347,17 +363,16 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { ASSERT_GE(seqs.size(), 19); ASSERT_LE(seqs.size(), 21); SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber() + 1; - for (auto i = start_seq; i < start_seq + 10; i++) { - ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i + 1) * 10); - } - start_seq += 10; for (auto i = start_seq; i < seq_end; i++) { // The result is within the range - ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - 10) * 10); - ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i + 10) * 10); + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), + start_time + (i - start_seq) * 10 - 100); + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), + start_time + (i - start_seq) * 10); } checked_file_nums.insert(it->second->orig_file_number); start_seq = seq_end; + start_time = mock_clock_->NowSeconds(); // Write a key every 1 seconds for (int i = 0; i < 200; i++) { @@ -387,13 +402,14 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { ASSERT_GE(seqs.size(), 1); ASSERT_LE(seqs.size(), 3); for (auto i = start_seq; i < seq_end; i++) { - // The result is not very accurate, as there is more data write within small - // range of time - ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) + 1000); - ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) + 3000); + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), + start_time + (i - start_seq) - 100); + ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), + start_time + (i - start_seq)); } checked_file_nums.insert(it->second->orig_file_number); start_seq = seq_end; + start_time = mock_clock_->NowSeconds(); // Write a key every 200 seconds for (int i = 0; i < 200; i++) { @@ -422,20 +438,18 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { // The sequence number -> time entries should be maxed ASSERT_GE(seqs.size(), 99); ASSERT_LE(seqs.size(), 101); - for (auto i = start_seq; i < seq_end - 99; i++) { - // likely the first 100 entries reports 0 - ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), (i - start_seq) + 3000); - } - start_seq += 101; - for (auto i = start_seq; i < seq_end; i++) { - ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), - (i - start_seq) * 200 + 22200); + // aged out entries allowed to report time=0 + if ((seq_end - i) * 200 <= 10000) { + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), + start_time + (i - start_seq) * 200 - 100); + } ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), - (i - start_seq) * 200 + 22600); + start_time + (i - start_seq) * 200); } checked_file_nums.insert(it->second->orig_file_number); start_seq = seq_end; + start_time = mock_clock_->NowSeconds(); // Write a key every 100 seconds for (int i = 0; i < 200; i++) { @@ -489,18 +503,15 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { seqs = tp_mapping.TEST_GetInternalMapping(); ASSERT_GE(seqs.size(), 99); ASSERT_LE(seqs.size(), 101); - for (auto i = start_seq; i < seq_end - 99; i++) { - // likely the first 100 entries reports 0 - ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), - (i - start_seq) * 100 + 50000); - } - start_seq += 101; - for (auto i = start_seq; i < seq_end; i++) { - ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), - (i - start_seq) * 100 + 52200); + // aged out entries allowed to report time=0 + // FIXME: should be <= + if ((seq_end - i) * 100 < 10000) { + ASSERT_GE(tp_mapping.GetProximalTimeBeforeSeqno(i), + start_time + (i - start_seq) * 100 - 100); + } ASSERT_LE(tp_mapping.GetProximalTimeBeforeSeqno(i), - (i - start_seq) * 100 + 52400); + start_time + (i - start_seq) * 100); } ASSERT_OK(db_->Close()); } @@ -625,14 +636,12 @@ TEST_P(SeqnoTimeTablePropTest, MultiCFs) { ASSERT_GE(seqs.size(), 99); ASSERT_LE(seqs.size(), 101); - for (int j = 0; j < 2; j++) { for (int i = 0; i < 200; i++) { ASSERT_OK(Put(0, Key(i), "value")); dbfull()->TEST_WaitForPeriodicTaskRun( [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); } ASSERT_OK(Flush(0)); - } ASSERT_OK(dbfull()->TEST_WaitForCompact()); tables_props.clear(); ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[0], &tables_props)); diff --git a/port/stack_trace.cc b/port/stack_trace.cc index 1ccf9d804..a5a6d2e77 100644 --- a/port/stack_trace.cc +++ b/port/stack_trace.cc @@ -191,6 +191,11 @@ void PrintStack(int first_frames_to_skip) { char* debug_env = getenv("ROCKSDB_DEBUG"); bool debug = debug_env != nullptr && strlen(debug_env) > 0; + if (!debug && getenv("ROCKSDB_NO_STACK") != nullptr) { + // Skip stack trace + return; + } + if (lldb_stack_trace || gdb_stack_trace || debug) { // Allow ouside debugger to attach, even with Yama security restrictions #ifdef PR_SET_PTRACER_ANY From b00fa5597e2c620309c5be0a3aa1837fdbeb50cf Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 2 Oct 2023 16:25:25 -0700 Subject: [PATCH 172/386] Fix the handling of wide-column base values in the max_successive_merges logic (#11913) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11913 The `max_successive_merges` logic currently does not handle wide-column base values correctly, since it uses the `Get` API, which only returns the value of the default column. The patch fixes this by switching to `GetEntity` and passing all columns (if applicable) to the merge operator. Reviewed By: jaykorean Differential Revision: D49795097 fbshipit-source-id: 75eb7cc9476226255062cdb3d43ab6bd1cc2faa3 --- db/db_merge_operator_test.cc | 94 +++++++++++++++++++ db/merge_helper.cc | 29 ++++++ db/merge_helper.h | 17 ++++ db/wide/wide_columns_helper.h | 5 + db/write_batch.cc | 56 +++++++---- tools/ldb_cmd.cc | 3 +- .../max_successive_merges_wide_columns.md | 1 + 7 files changed, 185 insertions(+), 20 deletions(-) create mode 100644 unreleased_history/bug_fixes/max_successive_merges_wide_columns.md diff --git a/db/db_merge_operator_test.cc b/db/db_merge_operator_test.cc index 5a7028e77..e82e0cbf0 100644 --- a/db/db_merge_operator_test.cc +++ b/db/db_merge_operator_test.cc @@ -6,10 +6,12 @@ #include #include "db/db_test_util.h" +#include "db/dbformat.h" #include "db/forward_iterator.h" #include "port/stack_trace.h" #include "rocksdb/merge_operator.h" #include "rocksdb/snapshot.h" +#include "rocksdb/utilities/debug.h" #include "util/random.h" #include "utilities/merge_operators.h" #include "utilities/merge_operators/string_append/stringappend2.h" @@ -949,6 +951,98 @@ TEST_P(PerConfigMergeOperatorPinningTest, Randomized) { VerifyDBFromMap(true_data); } +TEST_F(DBMergeOperatorTest, MaxSuccessiveMergesBaseValues) { + Options options = CurrentOptions(); + options.create_if_missing = true; + options.merge_operator = MergeOperators::CreatePutOperator(); + options.max_successive_merges = 1; + options.env = env_; + Reopen(options); + + constexpr char foo[] = "foo"; + constexpr char bar[] = "bar"; + constexpr char baz[] = "baz"; + constexpr char qux[] = "qux"; + constexpr char corge[] = "corge"; + + // No base value + { + constexpr char key[] = "key1"; + + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), key, foo)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), key, bar)); + + PinnableSlice result; + ASSERT_OK( + db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)); + ASSERT_EQ(result, bar); + + // We expect the second Merge to be converted to a Put because of + // max_successive_merges. + constexpr size_t max_key_versions = 8; + std::vector key_versions; + ASSERT_OK(GetAllKeyVersions(db_, db_->DefaultColumnFamily(), key, key, + max_key_versions, &key_versions)); + ASSERT_EQ(key_versions.size(), 2); + ASSERT_EQ(key_versions[0].type, kTypeValue); + ASSERT_EQ(key_versions[1].type, kTypeMerge); + } + + // Plain base value + { + constexpr char key[] = "key2"; + + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), key, foo)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), key, bar)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), key, baz)); + + PinnableSlice result; + ASSERT_OK( + db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)); + ASSERT_EQ(result, baz); + + // We expect the second Merge to be converted to a Put because of + // max_successive_merges. + constexpr size_t max_key_versions = 8; + std::vector key_versions; + ASSERT_OK(GetAllKeyVersions(db_, db_->DefaultColumnFamily(), key, key, + max_key_versions, &key_versions)); + ASSERT_EQ(key_versions.size(), 3); + ASSERT_EQ(key_versions[0].type, kTypeValue); + ASSERT_EQ(key_versions[1].type, kTypeMerge); + ASSERT_EQ(key_versions[2].type, kTypeValue); + } + + // Wide-column base value + { + constexpr char key[] = "key3"; + const WideColumns columns{{kDefaultWideColumnName, foo}, {bar, baz}}; + + ASSERT_OK(db_->PutEntity(WriteOptions(), db_->DefaultColumnFamily(), key, + columns)); + ASSERT_OK(db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), key, qux)); + ASSERT_OK( + db_->Merge(WriteOptions(), db_->DefaultColumnFamily(), key, corge)); + + PinnableWideColumns result; + ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), key, + &result)); + const WideColumns expected{{kDefaultWideColumnName, corge}, {bar, baz}}; + ASSERT_EQ(result.columns(), expected); + + // We expect the second Merge to be converted to a PutEntity because of + // max_successive_merges. + constexpr size_t max_key_versions = 8; + std::vector key_versions; + ASSERT_OK(GetAllKeyVersions(db_, db_->DefaultColumnFamily(), key, key, + max_key_versions, &key_versions)); + ASSERT_EQ(key_versions.size(), 3); + ASSERT_EQ(key_versions[0].type, kTypeWideColumnEntity); + ASSERT_EQ(key_versions[1].type, kTypeMerge); + ASSERT_EQ(key_versions[2].type, kTypeWideColumnEntity); + } +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 9d212fc51..d8b1d788b 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -298,6 +298,20 @@ Status MergeHelper::TimedFullMerge( result_type, op_failure_scope); } +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const WideColumns& columns, const std::vector& operands, + Logger* logger, Statistics* statistics, SystemClock* clock, + bool update_num_ops_stats, std::string* result, Slice* result_operand, + ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(columns); + + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result, result_operand, + result_type, op_failure_scope); +} + Status MergeHelper::TimedFullMerge( const MergeOperator* merge_operator, const Slice& key, NoBaseValueTag, const std::vector& operands, Logger* logger, Statistics* statistics, @@ -351,6 +365,21 @@ Status MergeHelper::TimedFullMerge( op_failure_scope); } +Status MergeHelper::TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const WideColumns& columns, const std::vector& operands, + Logger* logger, Statistics* statistics, SystemClock* clock, + bool update_num_ops_stats, std::string* result_value, + PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(columns); + + return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), + operands, logger, statistics, clock, + update_num_ops_stats, result_value, result_entity, + op_failure_scope); +} + // PRE: iter points to the first merge type entry // POST: iter points to the first entry beyond the merge process (or the end) // keys_, operands_ are updated to reflect the merge result. diff --git a/db/merge_helper.h b/db/merge_helper.h index 93267c9a9..84c5f3535 100644 --- a/db/merge_helper.h +++ b/db/merge_helper.h @@ -85,6 +85,13 @@ class MergeHelper { std::string* result, Slice* result_operand, ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope); + static Status TimedFullMerge( + const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, + const WideColumns& columns, const std::vector& operands, + Logger* logger, Statistics* statistics, SystemClock* clock, + bool update_num_ops_stats, std::string* result, Slice* result_operand, + ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope); + // Variants that expose the merge result translated to the form requested by // the client. (For example, if the result is a wide-column structure but the // client requested the results in plain-value form, the value of the default @@ -112,6 +119,16 @@ class MergeHelper { std::string* result_value, PinnableWideColumns* result_entity, MergeOperator::OpFailureScope* op_failure_scope); + static Status TimedFullMerge(const MergeOperator* merge_operator, + const Slice& key, WideBaseValueTag, + const WideColumns& columns, + const std::vector& operands, + Logger* logger, Statistics* statistics, + SystemClock* clock, bool update_num_ops_stats, + std::string* result_value, + PinnableWideColumns* result_entity, + MergeOperator::OpFailureScope* op_failure_scope); + // During compaction, merge entries until we hit // - a corrupted key // - a Put/Delete, diff --git a/db/wide/wide_columns_helper.h b/db/wide/wide_columns_helper.h index 86c77c02d..a870fae30 100644 --- a/db/wide/wide_columns_helper.h +++ b/db/wide/wide_columns_helper.h @@ -24,6 +24,11 @@ class WideColumnsHelper { return !columns.empty() && columns.front().name() == kDefaultWideColumnName; } + static bool HasDefaultColumnOnly(const WideColumns& columns) { + return columns.size() == 1 && + columns.front().name() == kDefaultWideColumnName; + } + static const Slice& GetDefaultColumn(const WideColumns& columns) { assert(HasDefaultColumn(columns)); return columns.front().value(); diff --git a/db/write_batch.cc b/db/write_batch.cc index 17ccca2fa..4502a81ff 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -2483,15 +2483,15 @@ class MemTableInserter : public WriteBatch::Handler { } if (perform_merge) { - // TODO: support wide-column base values for max_successive_merges - - // 1) Get the existing value - std::string get_value; + // 1) Get the existing value. Use the wide column APIs to make sure we + // don't lose any columns in the process. + PinnableWideColumns existing; // Pass in the sequence number so that we also include previous merge // operations in the same batch. SnapshotImpl read_from_snapshot; read_from_snapshot.number_ = sequence_; + // TODO: plumb Env::IOActivity ReadOptions read_options; read_options.snapshot = &read_from_snapshot; @@ -2500,28 +2500,47 @@ class MemTableInserter : public WriteBatch::Handler { if (cf_handle == nullptr) { cf_handle = db_->DefaultColumnFamily(); } - Status get_status = db_->Get(read_options, cf_handle, key, &get_value); + + Status get_status = + db_->GetEntity(read_options, cf_handle, key, &existing); if (!get_status.ok()) { // Failed to read a key we know exists. Store the delta in memtable. perform_merge = false; } else { - Slice get_value_slice = Slice(get_value); - // 2) Apply this merge auto merge_operator = moptions->merge_operator; assert(merge_operator); + const auto& columns = existing.columns(); + + Status merge_status; std::string new_value; ValueType new_value_type; - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - Status merge_status = MergeHelper::TimedFullMerge( - merge_operator, key, MergeHelper::kPlainBaseValue, get_value_slice, - {value}, moptions->info_log, moptions->statistics, - SystemClock::Default().get(), - /* update_num_ops_stats */ false, &new_value, - /* result_operand */ nullptr, &new_value_type, - /* op_failure_scope */ nullptr); + + if (WideColumnsHelper::HasDefaultColumnOnly(columns)) { + // `op_failure_scope` (an output parameter) is not provided (set to + // nullptr) since a failure must be propagated regardless of its + // value. + merge_status = MergeHelper::TimedFullMerge( + merge_operator, key, MergeHelper::kPlainBaseValue, + WideColumnsHelper::GetDefaultColumn(columns), {value}, + moptions->info_log, moptions->statistics, + SystemClock::Default().get(), + /* update_num_ops_stats */ false, &new_value, + /* result_operand */ nullptr, &new_value_type, + /* op_failure_scope */ nullptr); + } else { + // `op_failure_scope` (an output parameter) is not provided (set to + // nullptr) since a failure must be propagated regardless of its + // value. + merge_status = MergeHelper::TimedFullMerge( + merge_operator, key, MergeHelper::kWideBaseValue, columns, + {value}, moptions->info_log, moptions->statistics, + SystemClock::Default().get(), + /* update_num_ops_stats */ false, &new_value, + /* result_operand */ nullptr, &new_value_type, + /* op_failure_scope */ nullptr); + } if (!merge_status.ok()) { // Failed to merge! @@ -2530,12 +2549,13 @@ class MemTableInserter : public WriteBatch::Handler { } else { // 3) Add value to memtable assert(!concurrent_memtable_writes_); + assert(new_value_type == kTypeValue || + new_value_type == kTypeWideColumnEntity); + if (kv_prot_info != nullptr) { auto merged_kv_prot_info = kv_prot_info->StripC(column_family_id).ProtectS(sequence_); merged_kv_prot_info.UpdateV(value, new_value); - assert(new_value_type == kTypeValue || - new_value_type == kTypeWideColumnEntity); merged_kv_prot_info.UpdateO(kTypeMerge, new_value_type); ret_status = mem->Add(sequence_, new_value_type, key, new_value, &merged_kv_prot_info); diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index ff2483dce..6edf0637f 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1100,8 +1100,7 @@ std::string LDBCommand::PrintKeyValueOrWideColumns( const Slice& key, const Slice& value, const WideColumns& wide_columns, bool is_key_hex, bool is_value_hex) { if (wide_columns.empty() || - (wide_columns.size() == 1 && - WideColumnsHelper::HasDefaultColumn(wide_columns))) { + WideColumnsHelper::HasDefaultColumnOnly(wide_columns)) { return PrintKeyValue(key.ToString(), value.ToString(), is_key_hex, is_value_hex); } diff --git a/unreleased_history/bug_fixes/max_successive_merges_wide_columns.md b/unreleased_history/bug_fixes/max_successive_merges_wide_columns.md new file mode 100644 index 000000000..d24b6cf30 --- /dev/null +++ b/unreleased_history/bug_fixes/max_successive_merges_wide_columns.md @@ -0,0 +1 @@ +Fixed the handling of wide-column base values in the `max_successive_merges` logic. From 10fd05e394a879154e24d12f3e32038e5eadd121 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 2 Oct 2023 16:26:24 -0700 Subject: [PATCH 173/386] Give retry flushes their own functions (#11903) Summary: Recovery triggers flushes for very different scenarios: (1) `FlushReason::kErrorRecoveryRetryFlush`: a flush failed (2) `FlushReason::kErrorRecovery`: a WAL may be corrupted (3) `FlushReason::kCatchUpAfterErrorRecovery`: immutable memtables may have accumulated The old code called called `FlushAllColumnFamilies()` in all cases, which uses manual flush functions: `AtomicFlushMemTables()` and `FlushMemTable()`. Forcing flushing the latest data on all CFs was useful for (2) because it ensures all CFs move past the corrupted WAL. However, those code paths were overkill for (1) and (3), where only already-immutable memtables need to be flushed. There were conditionals to exclude some of the extraneous logic but I found there was still too much happening. For example, both of the manual flush functions enter the write thread. Entering the write thread is inconvenient because then we can't allow stalled writes to wait on a retrying flush to finish. Instead of continuing down the path of adding more conditionals to the manual flush functions, this PR introduces a dedicated function for cases (1) and (3): `RetryFlushesForErrorRecovery()`. Also I cleaned up the manual flush functions to remove existing conditionals for these cases as they're no longer needed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11903 Reviewed By: cbi42 Differential Revision: D49693812 Pulled By: ajkr fbshipit-source-id: 7630ac539b9d6c92052c13a3cdce53256134d990 --- db/db_impl/db_impl.cc | 24 +++--- db/db_impl/db_impl.h | 8 ++ db/db_impl/db_impl_compaction_flush.cc | 105 ++++++++++++++++++------- db/flush_job_test.cc | 3 +- db/memtable_list.h | 12 ++- db/memtable_list_test.cc | 2 +- 6 files changed, 112 insertions(+), 42 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 204d56e7d..855f6c534 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -387,13 +387,18 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { } } - // We cannot guarantee consistency of the WAL. So force flush Memtables of - // all the column families if (s.ok()) { - FlushOptions flush_opts; - // We allow flush to stall write since we are trying to resume from error. - flush_opts.allow_write_stall = true; - s = FlushAllColumnFamilies(flush_opts, context.flush_reason); + if (context.flush_reason == FlushReason::kErrorRecoveryRetryFlush) { + s = RetryFlushesForErrorRecovery(FlushReason::kErrorRecoveryRetryFlush, + true /* wait */); + } else { + // We cannot guarantee consistency of the WAL. So force flush Memtables of + // all the column families + FlushOptions flush_opts; + // We allow flush to stall write since we are trying to resume from error. + flush_opts.allow_write_stall = true; + s = FlushAllColumnFamilies(flush_opts, context.flush_reason); + } if (!s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB resume requested but failed due to Flush failure [%s]", @@ -457,11 +462,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // Since we drop all non-recovery flush requests during recovery, // and new memtable may fill up during recovery, // schedule one more round of flush. - FlushOptions flush_opts; - flush_opts.allow_write_stall = false; - flush_opts.wait = false; - Status status = FlushAllColumnFamilies( - flush_opts, FlushReason::kCatchUpAfterErrorRecovery); + Status status = RetryFlushesForErrorRecovery( + FlushReason::kCatchUpAfterErrorRecovery, false /* wait */); if (!status.ok()) { // FlushAllColumnFamilies internally should take care of setting // background error if needed. diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 8ae0b9ef0..f4ce56035 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1951,6 +1951,8 @@ class DBImpl : public DB { const autovector& provided_candidate_cfds = {}, bool entered_write_thread = false); + Status RetryFlushesForErrorRecovery(FlushReason flush_reason, bool wait); + // Wait until flushing this column family won't stall writes Status WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd, bool* flush_needed); @@ -2099,6 +2101,12 @@ class DBImpl : public DB { #endif /* !NDEBUG */ }; + // In case of atomic flush, generates a `FlushRequest` for the latest atomic + // cuts for these `cfds`. Atomic cuts are recorded in + // `AssignAtomicFlushSeq()`. For each entry in `cfds`, all CFDs sharing the + // same latest atomic cut must also be present. + // + // REQUIRES: mutex held void GenerateFlushRequest(const autovector& cfds, FlushReason flush_reason, FlushRequest* req); diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 49da5acd3..e77680d40 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -222,9 +222,10 @@ Status DBImpl::FlushMemTableToOutputFile( // `snapshot_seqs` has already been computed before this function starts. // Recording the max memtable ID ensures that the flush job does not flush // a memtable without knowing such snapshot(s). - uint64_t max_memtable_id = needs_to_sync_closed_wals - ? cfd->imm()->GetLatestMemTableID() - : std::numeric_limits::max(); + uint64_t max_memtable_id = + needs_to_sync_closed_wals + ? cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */) + : std::numeric_limits::max(); // If needs_to_sync_closed_wals is false, then the flush job will pick ALL // existing memtables of the column family when PickMemTable() is called @@ -2230,7 +2231,8 @@ void DBImpl::GenerateFlushRequest(const autovector& cfds, // cfd may be null, see DBImpl::ScheduleFlushes continue; } - uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID(); + uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID( + immutable_db_options_.atomic_flush /* for_atomic_flush */); req->cfd_to_max_mem_id_to_persist.emplace(cfd, max_memtable_id); } } @@ -2274,19 +2276,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, } WaitForPendingWrites(); - if (flush_reason != FlushReason::kErrorRecoveryRetryFlush && - flush_reason != FlushReason::kCatchUpAfterErrorRecovery && - (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) { - // Note that, when flush reason is kErrorRecoveryRetryFlush, during the - // auto retry resume, we want to avoid creating new small memtables. - // If flush reason is kCatchUpAfterErrorRecovery, we try to flush any new - // memtable that filled up during recovery, and we also want to avoid - // switching memtable to create small memtables. - // Therefore, SwitchMemtable will not be called. Also, since ResumeImpl - // will iterate through all the CFs and call FlushMemtable during auto - // retry resume, it is possible that in some CFs, - // cfd->imm()->NumNotFlushed() = 0. In this case, so no flush request will - // be created and scheduled, status::OK() will be returned. + if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) { s = SwitchMemtable(cfd, &context); } const uint64_t flush_memtable_id = std::numeric_limits::max(); @@ -2295,10 +2285,10 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, !cached_recoverable_state_empty_.load()) { FlushRequest req{flush_reason, {{cfd, flush_memtable_id}}}; flush_reqs.emplace_back(std::move(req)); - memtable_ids_to_wait.emplace_back(cfd->imm()->GetLatestMemTableID()); + memtable_ids_to_wait.emplace_back( + cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */)); } - if (immutable_db_options_.persist_stats_to_disk && - flush_reason != FlushReason::kErrorRecoveryRetryFlush) { + if (immutable_db_options_.persist_stats_to_disk) { ColumnFamilyData* cfd_stats = versions_->GetColumnFamilySet()->GetColumnFamily( kPersistentStatsColumnFamilyName); @@ -2324,7 +2314,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, FlushRequest req{flush_reason, {{cfd_stats, flush_memtable_id}}}; flush_reqs.emplace_back(std::move(req)); memtable_ids_to_wait.emplace_back( - cfd_stats->imm()->GetLatestMemTableID()); + cfd_stats->imm()->GetLatestMemTableID( + false /* for_atomic_flush */)); } } } @@ -2375,8 +2366,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, } s = WaitForFlushMemTables( cfds, flush_memtable_ids, - (flush_reason == FlushReason::kErrorRecovery || - flush_reason == FlushReason::kErrorRecoveryRetryFlush)); + flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */); InstrumentedMutexLock lock_guard(&mutex_); for (auto* tmp_cfd : cfds) { tmp_cfd->UnrefAndTryDelete(); @@ -2471,9 +2461,7 @@ Status DBImpl::AtomicFlushMemTables( } for (auto cfd : cfds) { - if ((cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) || - flush_reason == FlushReason::kErrorRecoveryRetryFlush || - flush_reason == FlushReason::kCatchUpAfterErrorRecovery) { + if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) { continue; } cfd->Ref(); @@ -2518,8 +2506,7 @@ Status DBImpl::AtomicFlushMemTables( } s = WaitForFlushMemTables( cfds, flush_memtable_ids, - (flush_reason == FlushReason::kErrorRecovery || - flush_reason == FlushReason::kErrorRecoveryRetryFlush)); + flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */); InstrumentedMutexLock lock_guard(&mutex_); for (auto* cfd : cfds) { cfd->UnrefAndTryDelete(); @@ -2528,6 +2515,68 @@ Status DBImpl::AtomicFlushMemTables( return s; } +Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason, + bool wait) { + mutex_.AssertHeld(); + assert(flush_reason == FlushReason::kErrorRecoveryRetryFlush || + flush_reason == FlushReason::kCatchUpAfterErrorRecovery); + + // Collect referenced CFDs. + autovector cfds; + for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) { + if (!cfd->IsDropped() && cfd->initialized() && + cfd->imm()->NumNotFlushed() != 0) { + cfd->Ref(); + cfd->imm()->FlushRequested(); + cfds.push_back(cfd); + } + } + + // Submit flush requests for all immutable memtables needing flush. + // `flush_memtable_ids` will be populated such that all immutable + // memtables eligible for flush are waited on before this function + // returns. + autovector flush_memtable_ids; + if (immutable_db_options_.atomic_flush) { + FlushRequest flush_req; + GenerateFlushRequest(cfds, flush_reason, &flush_req); + SchedulePendingFlush(flush_req); + for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) { + flush_memtable_ids.push_back(iter.second); + } + } else { + for (auto cfd : cfds) { + flush_memtable_ids.push_back( + cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */)); + // Impose no bound on the highest memtable ID flushed. There is no + // reason to do so outside of atomic flush. + FlushRequest flush_req{ + flush_reason, + {{cfd, + std::numeric_limits::max() /* max_mem_id_to_persist */}}}; + SchedulePendingFlush(flush_req); + } + } + MaybeScheduleFlushOrCompaction(); + + Status s; + if (wait) { + mutex_.Unlock(); + autovector flush_memtable_id_ptrs; + for (auto& flush_memtable_id : flush_memtable_ids) { + flush_memtable_id_ptrs.push_back(&flush_memtable_id); + } + s = WaitForFlushMemTables(cfds, flush_memtable_id_ptrs, + true /* resuming_from_bg_err */); + mutex_.Lock(); + } + + for (auto* cfd : cfds) { + cfd->UnrefAndTryDelete(); + } + return s; +} + // Calling FlushMemTable(), whether from DB::Flush() or from Backup Engine, can // cause write stall, for example if one memtable is being flushed already. // This method tries to avoid write stall (similar to CompactRange() behavior) diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 9fd9c13fa..0f7871709 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -457,7 +457,8 @@ TEST_F(FlushJobTest, FlushMemtablesMultipleColumnFamilies) { // Verify that imm is empty ASSERT_EQ(std::numeric_limits::max(), all_cfds[k]->imm()->GetEarliestMemTableID()); - ASSERT_EQ(0, all_cfds[k]->imm()->GetLatestMemTableID()); + ASSERT_EQ(0, all_cfds[k]->imm()->GetLatestMemTableID( + false /* for_atomic_flush */)); ++k; } diff --git a/db/memtable_list.h b/db/memtable_list.h index 51d14dff7..81b60288d 100644 --- a/db/memtable_list.h +++ b/db/memtable_list.h @@ -386,11 +386,21 @@ class MemTableList { return memlist.back()->GetID(); } - uint64_t GetLatestMemTableID() const { + uint64_t GetLatestMemTableID(bool for_atomic_flush) const { auto& memlist = current_->memlist_; if (memlist.empty()) { return 0; } + if (for_atomic_flush) { + // Scan the memtable list from new to old + for (auto it = memlist.begin(); it != memlist.end(); ++it) { + MemTable* m = *it; + if (m->atomic_flush_seqno_ != kMaxSequenceNumber) { + return m->GetID(); + } + } + return 0; + } return memlist.front()->GetID(); } diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 12f7495b8..3203c7a00 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -833,7 +833,7 @@ TEST_F(MemTableListTest, FlushPendingTest) { // Add another table list.Add(tables[5], &to_delete); ASSERT_EQ(1, list.NumNotFlushed()); - ASSERT_EQ(5, list.GetLatestMemTableID()); + ASSERT_EQ(5, list.GetLatestMemTableID(false /* for_atomic_flush */)); memtable_id = 4; // Pick tables to flush. The tables to pick must have ID smaller than or // equal to 4. Therefore, no table will be selected in this case. From 5fbea87859cc0f5775edb2684ed72e0ac0819ea4 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 2 Oct 2023 16:52:39 -0700 Subject: [PATCH 174/386] Disallow start_time == end_time in offpeak time and compare at minute level to allow 24hr offpeak (#11911) Summary: Since allowing 24hr peak by setting start_time = end_time is not so intuitive, we are not going to allow it (e.g. `00:00-00:00` doesn't looks like a value that would cover 24hr.). Instead, we are going to compare at minute level (i.e. dropping the seconds to the nearest minute) so that `00:00-23:59` will cover 24hrs. The entire minute from 23:59:00 23:59:59 will be covered with this change. Minor fixes from previous PR - release build error - fixed random seed in test Pull Request resolved: https://github.com/facebook/rocksdb/pull/11911 Test Plan: `DBOptionsTest::OffPeakTimes` `make -j64 static_lib` to test release build issue that was fixed Reviewed By: pdillinger Differential Revision: D49787795 Pulled By: jaykorean fbshipit-source-id: e8d045b95f54f61d5dd5f1bb473579f8d55c18b3 --- db/db_impl/db_impl_open.cc | 3 ++ db/db_options_test.cc | 61 +++++++++++++++++++++++++++++++------- include/rocksdb/options.h | 19 ++++++------ options/db_options.cc | 24 ++++++++++----- 4 files changed, 78 insertions(+), 29 deletions(-) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index b89b997e7..b139a7c8f 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -299,6 +299,9 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { return Status::InvalidArgument( "daily_offpeak_time_utc should be set in the format HH:mm-HH:mm " "(e.g. 04:30-07:30)"); + } else if (start_time == end_time) { + return Status::InvalidArgument( + "start_time and end_time cannot be the same"); } } return Status::OK(); diff --git a/db/db_options_test.cc b/db/db_options_test.cc index 19d6b7e2f..e709dcaaa 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1037,6 +1037,7 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { TEST_F(DBOptionsTest, OffPeakTimes) { Options options; options.create_if_missing = true; + Random rnd(test::RandomSeed()); auto verify_invalid = [&]() { Status s = DBImpl::TEST_ValidateOptions(options); @@ -1052,6 +1053,8 @@ TEST_F(DBOptionsTest, OffPeakTimes) { std::vector invalid_cases = { "06:30-", "-23:30", // Both need to be set + "00:00-00:00", + "06:30-06:30" // Start time cannot be the same as end time "12:30 PM-23:30", "12:01AM-11:00PM", // Invalid format "01:99-22:00", // Invalid value for minutes @@ -1069,11 +1072,11 @@ TEST_F(DBOptionsTest, OffPeakTimes) { }; std::vector valid_cases = { - "", // Not enabled. Valid case - "00:00-00:00", // Valid. Entire 24 hours are offpeak. - "06:30-11:30", "06:30-23:30", "13:30-14:30", - "00:00-23:59", // This doesn't cover entire 24 hours. There's 1 minute - // gap from 11:59:00PM to midnight + "", // Not enabled. Valid case + "06:30-11:30", + "06:30-23:30", + "13:30-14:30", + "00:00-23:59", // Entire Day "23:30-01:15", // From 11:30PM to 1:15AM next day. Valid case. "1:0000000000000-2:000000000042", // Weird, but we can parse the int. }; @@ -1091,7 +1094,6 @@ TEST_F(DBOptionsTest, OffPeakTimes) { int now_utc_minute, int now_utc_second = 0) { auto mock_clock = std::make_shared(env_->GetSystemClock()); // Add some extra random days to current time - Random rnd(301); int days = rnd.Uniform(100); mock_clock->SetCurrentTime(days * 86400 + now_utc_hour * 3600 + now_utc_minute * 60 + now_utc_second); @@ -1119,12 +1121,13 @@ TEST_F(DBOptionsTest, OffPeakTimes) { verify_is_now_offpeak(true, 4, 30); verify_is_now_offpeak(false, 4, 31); - // There's one minute gap from 11:59PM to midnight + // Entire day offpeak options.daily_offpeak_time_utc = "00:00-23:59"; verify_is_now_offpeak(true, 0, 0); verify_is_now_offpeak(true, 12, 00); verify_is_now_offpeak(true, 23, 59); - verify_is_now_offpeak(false, 23, 59, 1); + verify_is_now_offpeak(true, 23, 59, 1); + verify_is_now_offpeak(true, 23, 59, 59); // Open the db and test by Get/SetDBOptions options.daily_offpeak_time_utc = ""; @@ -1149,7 +1152,6 @@ TEST_F(DBOptionsTest, OffPeakTimes) { auto mock_clock = std::make_shared(env_->GetSystemClock()); auto mock_env = std::make_unique(env_, mock_clock); // Add some extra random days to current time - Random rnd(301); int days = rnd.Uniform(100); mock_clock->SetCurrentTime(days * 86400 + now_hour * 3600 + now_minute * 60); options.env = mock_env.get(); @@ -1179,10 +1181,47 @@ TEST_F(DBOptionsTest, OffPeakTimes) { ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) .IsNowOffPeak(mock_clock.get())); - // Sleep for one more second. It's no longer off-peak - mock_clock->MockSleepForSeconds(1); + // Sleep for one more minute. It's at 4:31AM It's no longer off-peak + mock_clock->MockSleepForSeconds(60); ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) .IsNowOffPeak(mock_clock.get())); + Close(); + + // Entire day offpeak + options.daily_offpeak_time_utc = "00:00-23:59"; + DestroyAndReopen(options); + // It doesn't matter what time it is. It should be just offpeak. + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Mock Sleep for 3 hours. It's still off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Mock Sleep for 20 hours. It's still off-peak + mock_clock->MockSleepForSeconds(20 * 3600); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Mock Sleep for 59 minutes. It's still off-peak + mock_clock->MockSleepForSeconds(59 * 60); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Mock Sleep for 59 seconds. It's still off-peak + mock_clock->MockSleepForSeconds(59); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + + // Mock Sleep for 1 second (exactly 24h passed). It's still off-peak + mock_clock->MockSleepForSeconds(1); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); + // Another second for sanity check + mock_clock->MockSleepForSeconds(1); + ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) + .IsNowOffPeak(mock_clock.get())); Close(); } diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 3cc268d88..94bfceece 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1429,22 +1429,21 @@ struct DBOptions { bool enforce_single_del_contracts = true; // EXPERIMENTAL - // Implementing offpeak duration awareness in RocksDB. In this context, "peak - // time" signifies periods characterized by significantly elevated read and - // write activity compared to other times. By leveraging this knowledge, we - // can prevent low-priority tasks, such as TTL-based compactions, from + // Implementing off-peak duration awareness in RocksDB. In this context, + // "off-peak time" signifies periods characterized by significantly less read + // and write activity compared to other times. By leveraging this knowledge, + // we can prevent low-priority tasks, such as TTL-based compactions, from // competing with read and write operations during peak hours. Essentially, we // preprocess these tasks during the preceding off-peak period, just before // the next peak cycle begins. For example, if the TTL is configured for 25 // days, we may compact the files during the off-peak hours of the 24th day. // - // Time of the day in UTC. Format - HH:mm-HH:mm (00:00-23:59) + // Time of the day in UTC, start_time-end_time inclusive. + // Format - HH:mm-HH:mm (00:00-23:59) // If the start time > end time, it will be considered that the time period - // spans to the next day (e.g., 23:30-04:00) - // If the start time == end time, entire 24 hours will be considered offpeak - // (e.g. 00:00-00:00). Note that 00:00-23:59 will have one minute gap from - // 11:59:00PM to midnight. - // Default: Empty String (No notion of peak/offpeak) + // spans to the next day (e.g., 23:30-04:00). To make an entire day off-peak, + // use "0:00-23:59". To make an entire day have no offpeak period, leave + // this field blank. Default: Empty string (no offpeak). std::string daily_offpeak_time_utc = ""; }; diff --git a/options/db_options.cc b/options/db_options.cc index af30718f3..b26d18e75 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -1073,18 +1073,26 @@ bool MutableDBOptions::IsNowOffPeak(SystemClock* clock) const { int64_t now; if (clock->GetCurrentTime(&now).ok()) { constexpr int kSecondsPerDay = 86400; - int since_midnight_seconds = static_cast(now % kSecondsPerDay); + constexpr int kSecondsPerMinute = 60; + int seconds_since_midnight_to_nearest_minute = + (static_cast(now % kSecondsPerDay) / kSecondsPerMinute) * + kSecondsPerMinute; int start_time = 0, end_time = 0; - assert( - TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time)); - + bool success = + TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time); + assert(success); + assert(start_time != end_time); + if (!success) { + // If the validation was done properly, we should never reach here + return false; + } // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) if (start_time > end_time) { - return start_time <= since_midnight_seconds || - since_midnight_seconds <= end_time; + return start_time <= seconds_since_midnight_to_nearest_minute || + seconds_since_midnight_to_nearest_minute <= end_time; } else { - return start_time <= since_midnight_seconds && - since_midnight_seconds <= end_time; + return start_time <= seconds_since_midnight_to_nearest_minute && + seconds_since_midnight_to_nearest_minute <= end_time; } } return false; From 97f6f475bc373360d3a5a153e76a04c5d43487c0 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Mon, 2 Oct 2023 17:47:24 -0700 Subject: [PATCH 175/386] Fix various failures in auto_readahead_size (#11884) Summary: 1. **Error** in TestIterateAgainstExpected API - `Assertion index < pre_read_expected_values.size() && index < post_read_expected_values.size() failed.` **Fix** - `Prev` op is not supported with `auto_readahead_size`. So added support to Reseek in db_iter, if Prev is called. In BlockBasedTableIterator, index_iter_ already moves forward. So there is no way to do Prev from BlockBasedTableIterator. 2. **Error** - `void rocksdb::BlockBasedTableIterator::BlockCacheLookupForReadAheadSize(uint64_t, size_t, size_t&): Assertion index_iter_->value().handle.offset() == offset` **Fix** - Remove prefetch_buffer to be used when uncompressed dict is read. 3. ** Error in TestPrefixScan API - `db_stress: db/db_iter.cc:369: bool rocksdb::DBIter::FindNextUserEntryInternal(bool, const rocksdb::Slice*): Assertion !skipping_saved_key || CompareKeyForSkip(ikey_.user_key, saved_key_.GetUserKey()) > 0 failed. Received signal 6 (Aborted) Invoking GDB for stack trace... db_stress: table/merging_iterator.cc:1036: bool rocksdb::MergingIterator::SkipNextDeleted(): Assertion comparator_->Compare(range_tombstone_iters_[i]->start_key(), pik) <= 0 failed` **Fix** - SeekPrev also calls 1) SeekPrev , 2)Seek and then 3)Prev in some cases in db_iter.cc leading to failure of Prev operation. These backward operations also call Seek. Added direction to disable lookup once direction is backwards in BlockBasedTableIterator.cc Pull Request resolved: https://github.com/facebook/rocksdb/pull/11884 Test Plan: Ran various flavors of crash tests locally for the whole duration Reviewed By: anand1976 Differential Revision: D49834201 Pulled By: akankshamahajan15 fbshipit-source-id: 9a007b4d46a48002c43dc4623a400ecf47d997fe --- db/db_iter.cc | 16 +++++-- db/db_iter.h | 1 + db_stress_tool/db_stress_test_base.cc | 8 ++-- db_stress_tool/no_batched_ops_stress.cc | 3 +- file/prefetch_test.cc | 46 +++++++++++++++---- include/rocksdb/options.h | 11 ++++- .../block_based/block_based_table_iterator.cc | 38 +++++++++------ .../block_based/block_based_table_iterator.h | 8 ++++ .../block_based_table_reader_impl.h | 8 +++- 9 files changed, 102 insertions(+), 37 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 3549d5f34..247542811 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -83,7 +83,8 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options, cfd_(cfd), timestamp_ub_(read_options.timestamp), timestamp_lb_(read_options.iter_start_ts), - timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0) { + timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0), + auto_readahead_size_(read_options.auto_readahead_size) { RecordTick(statistics_, NO_ITERATOR_CREATED); if (pin_thru_lifetime_) { pinned_iters_mgr_.StartPinning(); @@ -743,15 +744,22 @@ bool DBIter::ReverseToBackward() { // When current_entry_is_merged_ is true, iter_ may be positioned on the next // key, which may not exist or may have prefix different from current. // If that's the case, seek to saved_key_. - if (current_entry_is_merged_ && - (!expect_total_order_inner_iter() || !iter_.Valid())) { + // + // In case of auto_readahead_size enabled, index_iter moves forward during + // forward scan for block cache lookup and points to different block. If Prev + // op is called, it needs to call SeekForPrev to point to right index_iter_ in + // BlockBasedTableIterator. This only happens when direction is changed from + // forward to backward. + if ((current_entry_is_merged_ && + (!expect_total_order_inner_iter() || !iter_.Valid())) || + auto_readahead_size_) { IterKey last_key; // Using kMaxSequenceNumber and kValueTypeForSeek // (not kValueTypeForSeekForPrev) to seek to a key strictly smaller // than saved_key_. last_key.SetInternalKey(ParsedInternalKey( saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek)); - if (!expect_total_order_inner_iter()) { + if (!expect_total_order_inner_iter() || auto_readahead_size_) { iter_.SeekForPrev(last_key.GetInternalKey()); } else { // Some iterators may not support SeekForPrev(), so we avoid using it diff --git a/db/db_iter.h b/db/db_iter.h index 5022405c3..ac6487802 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -402,6 +402,7 @@ class DBIter final : public Iterator { const Slice* const timestamp_lb_; const size_t timestamp_size_; std::string saved_timestamp_; + bool auto_readahead_size_; }; // Return a new iterator that converts internal keys (yielded by diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 80c1858e7..f7dee86b2 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1237,7 +1237,6 @@ Status StressTest::TestIterate(ThreadState* thread, } else if (options_.prefix_extractor.get() == nullptr) { expect_total_order = true; } - std::string upper_bound_str; Slice upper_bound; if (thread->rand.OneIn(16)) { @@ -1248,6 +1247,7 @@ Status StressTest::TestIterate(ThreadState* thread, upper_bound = Slice(upper_bound_str); ro.iterate_upper_bound = &upper_bound; } + std::string lower_bound_str; Slice lower_bound; if (thread->rand.OneIn(16)) { @@ -1377,8 +1377,7 @@ Status StressTest::TestIterate(ThreadState* thread, key, op_logs, &diverged); const bool no_reverse = - (FLAGS_memtablerep == "prefix_hash" && !expect_total_order) || - FLAGS_auto_readahead_size; + (FLAGS_memtablerep == "prefix_hash" && !expect_total_order); for (uint64_t i = 0; i < FLAGS_num_iterations && iter->Valid(); ++i) { if (no_reverse || thread->rand.OneIn(2)) { iter->Next(); @@ -1566,7 +1565,8 @@ void StressTest::VerifyIterator(ThreadState* thread, fprintf(stderr, "iterator has value %s\n", iter->key().ToString(true).c_str()); } else { - fprintf(stderr, "iterator is not valid\n"); + fprintf(stderr, "iterator is not valid with status: %s\n", + iter->status().ToString().c_str()); } *diverged = true; } diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 6ddc7fe0b..42cc8f302 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1911,8 +1911,7 @@ class NonBatchedOpsStressTest : public StressTest { if (static_cast(curr) < lb) { iter->Next(); op_logs += "N"; - } else if (static_cast(curr) >= ub && - !FLAGS_auto_readahead_size) { + } else if (static_cast(curr) >= ub) { iter->Prev(); op_logs += "P"; } else { diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index ef71dd2ef..b58b8fd22 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -1448,21 +1448,49 @@ TEST_F(PrefetchTest, PrefetchWithBlockLookupAutoTuneWithPrev) { ropts.iterate_upper_bound = ub_ptr; ropts.auto_readahead_size = true; + ReadOptions cmp_readopts = ropts; + cmp_readopts.auto_readahead_size = false; + auto iter = std::unique_ptr(db_->NewIterator(ropts)); + auto cmp_iter = std::unique_ptr(db_->NewIterator(cmp_readopts)); Slice seek_key = Slice("my_key_bbb"); - iter->Seek(seek_key); - ASSERT_TRUE(iter->Valid()); + { + cmp_iter->Seek(seek_key); + ASSERT_TRUE(cmp_iter->Valid()); + ASSERT_OK(cmp_iter->status()); + + iter->Seek(seek_key); + ASSERT_TRUE(iter->Valid()); + ASSERT_OK(iter->status()); + + ASSERT_EQ(iter->key(), cmp_iter->key()); + } - // Prev op should fail with auto tuning of readahead_size. - iter->Prev(); - ASSERT_TRUE(iter->status().IsNotSupported()); - ASSERT_FALSE(iter->Valid()); + // Prev op should pass with auto tuning of readahead_size. + { + cmp_iter->Prev(); + ASSERT_TRUE(cmp_iter->Valid()); + ASSERT_OK(cmp_iter->status()); + + iter->Prev(); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + + ASSERT_EQ(iter->key(), cmp_iter->key()); + } // Reseek would follow as usual. - iter->Seek(seek_key); - ASSERT_OK(iter->status()); - ASSERT_TRUE(iter->Valid()); + { + cmp_iter->Seek(seek_key); + ASSERT_TRUE(cmp_iter->Valid()); + ASSERT_OK(cmp_iter->status()); + + iter->Seek(seek_key); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), cmp_iter->key()); + } } Close(); } diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 94bfceece..0944e1976 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1737,8 +1737,15 @@ struct ReadOptions { // during scans internally. // For this feature to enabled, iterate_upper_bound must also be specified. // - // NOTE: Not supported with Prev operation and it will be return NotSupported - // error. Enable it for forward scans only. + // NOTE: - Recommended for forward Scans only. + // - In case of backward scans like Prev or SeekForPrev, the + // cost of these backward operations might increase and affect the + // performace. So this option should not be enabled if workload + // contains backward scans. + // - If there is a backward scans, this option will be + // disabled internally and won't be reset if forward scan is done + // again. + // // Default: false bool auto_readahead_size = false; diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 57d14a285..dfd7d1471 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -26,7 +26,7 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, if (autotune_readaheadsize && table_->get_rep()->table_options.block_cache.get() && - !read_options_.async_io) { + !read_options_.async_io && direction_ == IterDirection::kForward) { readahead_cache_lookup_ = true; } @@ -87,15 +87,13 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } else { index_iter_->SeekToFirst(); } - + is_index_at_curr_block_ = true; if (!index_iter_->Valid()) { ResetDataIter(); return; } } - is_index_at_curr_block_ = true; - if (autotune_readaheadsize) { FindReadAheadSizeUpperBound(); if (target) { @@ -170,6 +168,8 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } void BlockBasedTableIterator::SeekForPrev(const Slice& target) { + direction_ = IterDirection::kBackward; + ResetBlockCacheLookupVar(); is_out_of_bound_ = false; is_at_first_key_from_index_ = false; seek_stat_state_ = kNone; @@ -191,7 +191,6 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { } SavePrevIndexValue(); - ResetBlockCacheLookupVar(); // Call Seek() rather than SeekForPrev() in the index block, because the // target data block will likely to contain the position for `target`, the @@ -207,6 +206,7 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { // to distinguish the two unless we read the second block. In this case, we'll // end up with reading two blocks. index_iter_->Seek(target); + is_index_at_curr_block_ = true; if (!index_iter_->Valid()) { auto seek_status = index_iter_->status(); @@ -231,8 +231,6 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { } } - is_index_at_curr_block_ = true; - InitDataBlock(); block_iter_.SeekForPrev(target); @@ -244,22 +242,22 @@ void BlockBasedTableIterator::SeekForPrev(const Slice& target) { } void BlockBasedTableIterator::SeekToLast() { + direction_ = IterDirection::kBackward; + ResetBlockCacheLookupVar(); is_out_of_bound_ = false; is_at_first_key_from_index_ = false; seek_stat_state_ = kNone; SavePrevIndexValue(); - ResetBlockCacheLookupVar(); index_iter_->SeekToLast(); + is_index_at_curr_block_ = true; if (!index_iter_->Valid()) { ResetDataIter(); return; } - is_index_at_curr_block_ = true; - InitDataBlock(); block_iter_.SeekToLast(); FindKeyBackward(); @@ -528,7 +526,7 @@ void BlockBasedTableIterator::FindBlockForward() { // index_iter_ can point to different block in case of // readahead_cache_lookup_. readahead_cache_lookup_ will be handle the // upper_bound check. - const bool next_block_is_out_of_bound = + bool next_block_is_out_of_bound = IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr && block_iter_points_to_real_block_ && block_upper_bound_check_ == BlockUpperBound::kUpperBoundInCurBlock; @@ -553,8 +551,14 @@ void BlockBasedTableIterator::FindBlockForward() { // 2. If block_handles is empty and index is not at current because of // lookup (during Next), it should skip doing index_iter_->Next(), as // it's already pointing to next block; - if (IsIndexAtCurr()) { + // 3. Last block could be out of bound and it won't iterate over that + // during BlockCacheLookup. We need to set for that block here. + if (IsIndexAtCurr() || is_index_out_of_bound_) { index_iter_->Next(); + if (is_index_out_of_bound_) { + next_block_is_out_of_bound = is_index_out_of_bound_; + is_index_out_of_bound_ = false; + } } else { // Skip Next as index_iter_ already points to correct index when it // iterates in BlockCacheLookupForReadAheadSize. @@ -612,7 +616,7 @@ void BlockBasedTableIterator::FindKeyBackward() { } void BlockBasedTableIterator::CheckOutOfBound() { - if (IsIndexAtCurr() && read_options_.iterate_upper_bound != nullptr && + if (read_options_.iterate_upper_bound != nullptr && block_upper_bound_check_ != BlockUpperBound::kUpperBoundBeyondCurBlock && Valid()) { is_out_of_bound_ = @@ -686,6 +690,11 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( return; } + if (IsNextBlockOutOfBound()) { + updated_readahead_size = 0; + return; + } + size_t current_readahead_size = 0; size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); @@ -725,6 +734,7 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( if (!s.ok()) { break; } + block_handle_info.is_cache_hit_ = (block_handle_info.cachable_entry_.GetValue() || block_handle_info.cachable_entry_.GetCacheHandle()); @@ -738,6 +748,7 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( // means all the keys in next block or above are out of // bound. if (IsNextBlockOutOfBound()) { + is_index_out_of_bound_ = true; break; } index_iter_->Next(); @@ -750,7 +761,6 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( current_readahead_size -= (*it).index_val_.handle.size(); current_readahead_size -= footer; } - updated_readahead_size = current_readahead_size; ResetPreviousBlockOffset(); } diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index c668ced66..7c1c09cb9 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -307,6 +307,13 @@ class BlockBasedTableIterator : public InternalIteratorBase { // can point to a different block. is_index_at_curr_block_ keeps track of // that. bool is_index_at_curr_block_ = true; + bool is_index_out_of_bound_ = false; + + // Used in case of auto_readahead_size to disable the block_cache lookup if + // direction is reversed from forward to backward. In case of backward + // direction, SeekForPrev or Prev might call Seek from db_iter. So direction + // is used to disable the lookup. + IterDirection direction_ = IterDirection::kForward; // If `target` is null, seek to first. void SeekImpl(const Slice* target, bool async_prefetch); @@ -356,6 +363,7 @@ class BlockBasedTableIterator : public InternalIteratorBase { size_t& updated_readahead_size); void ResetBlockCacheLookupVar() { + is_index_out_of_bound_ = false; readahead_cache_lookup_ = false; ClearBlockHandles(); } diff --git a/table/block_based/block_based_table_reader_impl.h b/table/block_based/block_based_table_reader_impl.h index 5f8456bee..fedccd5ee 100644 --- a/table/block_based/block_based_table_reader_impl.h +++ b/table/block_based/block_based_table_reader_impl.h @@ -67,9 +67,13 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator( // might already be under way and this would invalidate it. Also, the // uncompression dict is typically at the end of the file and would // most likely break the sequentiality of the access pattern. + // Same is with auto_readahead_size. It iterates over index to lookup for + // data blocks. And this could break the the sequentiality of the access + // pattern. s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( - ro.async_io ? nullptr : prefetch_buffer, ro, no_io, ro.verify_checksums, - get_context, lookup_context, &uncompression_dict); + ((ro.async_io || ro.auto_readahead_size) ? nullptr : prefetch_buffer), + ro, no_io, ro.verify_checksums, get_context, lookup_context, + &uncompression_dict); if (!s.ok()) { iter->Invalidate(s); return iter; From c13569e41de33ec34d2e45ecf76acaa1800b6848 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Tue, 3 Oct 2023 09:58:49 -0700 Subject: [PATCH 176/386] RocksDB now requires gflags v2.2.0 (#10933) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/10933 Reviewed By: jaykorean Differential Revision: D49872302 Pulled By: jowlyzhang fbshipit-source-id: 15f4e177bed59455ff58a0b48a3f6a55973d0b38 --- INSTALL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index f4bb7e62a..fb4651e4b 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -17,7 +17,7 @@ There are few options when compiling RocksDB: * `make check` will compile and run all the unit tests. `make check` will compile RocksDB in debug mode. * `make all` will compile our static library, and all our tools and unit tests. Our tools -depend on gflags. You will need to have gflags installed to run `make all`. This will compile RocksDB in debug mode. Don't +depend on gflags 2.2.0 or newer. You will need to have gflags installed to run `make all`. This will compile RocksDB in debug mode. Don't use binaries compiled by `make all` in production. * By default the binary we produce is optimized for the CPU you're compiling on @@ -77,7 +77,7 @@ most processors made since roughly 2013. git clone https://github.com/gflags/gflags.git cd gflags - git checkout v2.0 + git checkout v2.2.0 ./configure && make && sudo make install **Notice**: Once installed, please add the include path for gflags to your `CPATH` environment variable and the From 40b618f2349b509eabdd175f75faf7ce84cf0696 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Tue, 3 Oct 2023 14:41:26 -0700 Subject: [PATCH 177/386] Enable auto_readahead_size in db_stress (#11916) Summary: Depends on https://github.com/facebook/rocksdb/pull/11884 This PR only enables the option in db_stress. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11916 Reviewed By: anand1976 Differential Revision: D49834479 Pulled By: akankshamahajan15 fbshipit-source-id: 103a64fd7b23236493a8f3064d4c5af83656bd18 --- tools/db_crashtest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 8b5d29d08..7b2ce96a1 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -217,7 +217,7 @@ "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]), # 0 (disable) is the default and more commonly used value. "bottommost_file_compaction_delay": lambda: random.choice([0, 0, 0, 600, 3600, 86400]), - "auto_readahead_size" : 0, + "auto_readahead_size" : lambda: random.choice([0, 1]), } _TEST_DIR_ENV_VAR = "TEST_TMPDIR" From 141b872bd4d71bb4d288d7864349852577aa2a65 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 4 Oct 2023 14:14:22 -0700 Subject: [PATCH 178/386] Improve efficiency of create_missing_column_families, light refactor (#11920) Summary: In preparing some seqno_to_time_mapping improvements, I found that some of the wrap-up work for creating column families was unnecessarily repeated in the case of DB::Open with create_missing_column_families. This change fixes that (`CreateColumnFamily()` -> `CreateColumnFamilyImpl()` in `DBImpl::Open()`), motivated by avoiding repeated calls to `RegisterRecordSeqnoTimeWorker()` but with the side benefit of avoiding repeated calls to `WriteOptionsFile()` for each CF. Also in this change: * Add a `Status::UpdateIfOk()` function for combining statuses in a common pattern * Rename `max_time_duration` -> `min_preserve_seconds` (include units as much as possible) * Improved comments in several places Pull Request resolved: https://github.com/facebook/rocksdb/pull/11920 Test Plan: tests added / updated Reviewed By: jaykorean Differential Revision: D49919147 Pulled By: pdillinger fbshipit-source-id: 3d0318c1d070c842c5331da0a5b415caedc104f1 --- db/column_family_test.cc | 54 +++++++++++++-- db/db_impl/db_impl.cc | 67 +++++++++++-------- db/db_impl/db_impl.h | 5 ++ db/db_impl/db_impl_open.cc | 4 +- db/periodic_task_scheduler.h | 8 +-- include/rocksdb/db.h | 4 ++ include/rocksdb/options.h | 3 +- include/rocksdb/status.h | 19 ++++++ tools/ldb_tool.cc | 1 - .../options_files_on_open.md | 1 + util/slice_test.cc | 30 +++++++++ 11 files changed, 157 insertions(+), 39 deletions(-) create mode 100644 unreleased_history/performance_improvements/options_files_on_open.md diff --git a/db/column_family_test.cc b/db/column_family_test.cc index c0574ee55..6fa4373c2 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include +#include #include #include #include @@ -27,6 +28,7 @@ #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" +#include "util/defer.h" #include "util/string_util.h" #include "utilities/fault_injection_env.h" #include "utilities/merge_operators.h" @@ -2169,13 +2171,57 @@ TEST_P(ColumnFamilyTest, FlushStaleColumnFamilies) { Close(); } +namespace { +struct CountOptionsFilesFs : public FileSystemWrapper { + explicit CountOptionsFilesFs(const std::shared_ptr& t) + : FileSystemWrapper(t) {} + const char* Name() const override { return "CountOptionsFilesFs"; } + + IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts, + std::unique_ptr* r, + IODebugContext* dbg) override { + if (f.find("OPTIONS-") != std::string::npos) { + options_files_created.fetch_add(1, std::memory_order_relaxed); + } + return FileSystemWrapper::NewWritableFile(f, file_opts, r, dbg); + } + + std::atomic options_files_created{}; +}; +} // namespace + TEST_P(ColumnFamilyTest, CreateMissingColumnFamilies) { - Status s = TryOpen({"one", "two"}); - ASSERT_TRUE(!s.ok()); + // Can't accidentally add CFs to an existing DB + Open(); + Close(); + ASSERT_FALSE(db_options_.create_missing_column_families); + ASSERT_NOK(TryOpen({"one", "two"})); + + // Nor accidentally create in a new DB + Destroy(); + db_options_.create_if_missing = true; + ASSERT_NOK(TryOpen({"one", "two"})); + + // Only with the option (new DB case) db_options_.create_missing_column_families = true; - s = TryOpen({"default", "one", "two"}); - ASSERT_TRUE(s.ok()); + // Also setup to count number of options files created (see check below) + auto my_fs = + std::make_shared(db_options_.env->GetFileSystem()); + auto my_env = std::make_unique(db_options_.env, my_fs); + SaveAndRestore save_restore_env(&db_options_.env, my_env.get()); + + ASSERT_OK(TryOpen({"default", "one", "two"})); + Close(); + + // An older version would write an updated options file for each column + // family created under create_missing_column_families, which would be + // quadratic I/O in the number of column families. + ASSERT_EQ(my_fs->options_files_created.load(), 1); + + // Add to existing DB case + ASSERT_OK(TryOpen({"default", "one", "two", "three", "four"})); Close(); + ASSERT_EQ(my_fs->options_files_created.load(), 2); } TEST_P(ColumnFamilyTest, SanitizeOptions) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 855f6c534..b7b555070 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -816,35 +816,35 @@ Status DBImpl::StartPeriodicTaskScheduler() { } Status DBImpl::RegisterRecordSeqnoTimeWorker() { - uint64_t min_time_duration = std::numeric_limits::max(); - uint64_t max_time_duration = std::numeric_limits::min(); + uint64_t min_preserve_seconds = std::numeric_limits::max(); + uint64_t max_preserve_seconds = std::numeric_limits::min(); { InstrumentedMutexLock l(&mutex_); for (auto cfd : *versions_->GetColumnFamilySet()) { // preserve time is the max of 2 options. - uint64_t preserve_time_duration = + uint64_t preserve_seconds = std::max(cfd->ioptions()->preserve_internal_time_seconds, cfd->ioptions()->preclude_last_level_data_seconds); - if (!cfd->IsDropped() && preserve_time_duration > 0) { - min_time_duration = std::min(preserve_time_duration, min_time_duration); - max_time_duration = std::max(preserve_time_duration, max_time_duration); + if (!cfd->IsDropped() && preserve_seconds > 0) { + min_preserve_seconds = std::min(preserve_seconds, min_preserve_seconds); + max_preserve_seconds = std::max(preserve_seconds, max_preserve_seconds); } } - if (min_time_duration == std::numeric_limits::max()) { + if (min_preserve_seconds == std::numeric_limits::max()) { seqno_to_time_mapping_.Resize(0, 0); } else { - seqno_to_time_mapping_.Resize(min_time_duration, max_time_duration); + seqno_to_time_mapping_.Resize(min_preserve_seconds, max_preserve_seconds); } } uint64_t seqno_time_cadence = 0; - if (min_time_duration != std::numeric_limits::max()) { + if (min_preserve_seconds != std::numeric_limits::max()) { // round up to 1 when the time_duration is smaller than // kMaxSeqnoTimePairsPerCF - seqno_time_cadence = - (min_time_duration + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF - 1) / - SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF; + seqno_time_cadence = (min_preserve_seconds + + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF - 1) / + SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF; } Status s; @@ -3296,14 +3296,34 @@ void DBImpl::MultiGetEntity(const ReadOptions& _read_options, statuses, sorted_input); } +Status DBImpl::WrapUpCreateColumnFamilies( + const std::vector& cf_options) { + // NOTE: this function is skipped for create_missing_column_families and + // DB::Open, so new functionality here might need to go into Open also. + bool register_worker = false; + for (auto* opts_ptr : cf_options) { + if (opts_ptr->preserve_internal_time_seconds > 0 || + opts_ptr->preclude_last_level_data_seconds > 0) { + register_worker = true; + break; + } + } + // Attempt both follow-up actions even if one fails + Status s = WriteOptionsFile(true /*need_mutex_lock*/, + true /*need_enter_write_thread*/); + if (register_worker) { + s.UpdateIfOk(RegisterRecordSeqnoTimeWorker()); + } + return s; +} + Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options, const std::string& column_family, ColumnFamilyHandle** handle) { assert(handle != nullptr); Status s = CreateColumnFamilyImpl(cf_options, column_family, handle); if (s.ok()) { - s = WriteOptionsFile(true /*need_mutex_lock*/, - true /*need_enter_write_thread*/); + s.UpdateIfOk(WrapUpCreateColumnFamilies({&cf_options})); } return s; } @@ -3327,11 +3347,7 @@ Status DBImpl::CreateColumnFamilies( success_once = true; } if (success_once) { - Status persist_options_status = WriteOptionsFile( - true /*need_mutex_lock*/, true /*need_enter_write_thread*/); - if (s.ok() && !persist_options_status.ok()) { - s = persist_options_status; - } + s.UpdateIfOk(WrapUpCreateColumnFamilies({&cf_options})); } return s; } @@ -3344,6 +3360,8 @@ Status DBImpl::CreateColumnFamilies( size_t num_cf = column_families.size(); Status s; bool success_once = false; + std::vector cf_opts; + cf_opts.reserve(num_cf); for (size_t i = 0; i < num_cf; i++) { ColumnFamilyHandle* handle; s = CreateColumnFamilyImpl(column_families[i].options, @@ -3353,13 +3371,10 @@ Status DBImpl::CreateColumnFamilies( } handles->push_back(handle); success_once = true; + cf_opts.push_back(&column_families[i].options); } if (success_once) { - Status persist_options_status = WriteOptionsFile( - true /*need_mutex_lock*/, true /*need_enter_write_thread*/); - if (s.ok() && !persist_options_status.ok()) { - s = persist_options_status; - } + s.UpdateIfOk(WrapUpCreateColumnFamilies(cf_opts)); } return s; } @@ -3447,10 +3462,6 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, } } // InstrumentedMutexLock l(&mutex_) - if (cf_options.preserve_internal_time_seconds > 0 || - cf_options.preclude_last_level_data_seconds > 0) { - s = RegisterRecordSeqnoTimeWorker(); - } sv_context.Clean(); // this is outside the mutex if (s.ok()) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index f4ce56035..d3ab66d1b 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1823,10 +1823,15 @@ class DBImpl : public DB { const Status CreateArchivalDirectory(); + // Create a column family, without some of the follow-up work yet Status CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, const std::string& cf_name, ColumnFamilyHandle** handle); + // Follow-up work to user creating a column family or (families) + Status WrapUpCreateColumnFamilies( + const std::vector& cf_options); + Status DropColumnFamilyImpl(ColumnFamilyHandle* column_family); // Delete any unneeded files and stale in-memory entries. diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index b139a7c8f..f9ca85405 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -2069,7 +2069,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, // missing column family, create it ColumnFamilyHandle* handle = nullptr; impl->mutex_.Unlock(); - s = impl->CreateColumnFamily(cf.options, cf.name, &handle); + // NOTE: the work normally done in WrapUpCreateColumnFamilies will + // be done separately below. + s = impl->CreateColumnFamilyImpl(cf.options, cf.name, &handle); impl->mutex_.Lock(); if (s.ok()) { handles->push_back(handle); diff --git a/db/periodic_task_scheduler.h b/db/periodic_task_scheduler.h index 4d129a679..a93f9a095 100644 --- a/db/periodic_task_scheduler.h +++ b/db/periodic_task_scheduler.h @@ -42,15 +42,16 @@ class PeriodicTaskScheduler { PeriodicTaskScheduler& operator=(const PeriodicTaskScheduler&) = delete; PeriodicTaskScheduler& operator=(PeriodicTaskScheduler&&) = delete; - // Register a task with its default repeat period + // Register a task with its default repeat period. Thread safe call. Status Register(PeriodicTaskType task_type, const PeriodicTaskFunc& fn); // Register a task with specified repeat period. 0 is an invalid argument - // (kInvalidPeriodSec). To stop the task, please use Unregister() specifically + // (kInvalidPeriodSec). To stop the task, please use Unregister(). + // Thread safe call. Status Register(PeriodicTaskType task_type, const PeriodicTaskFunc& fn, uint64_t repeat_period_seconds); - // Unregister the task + // Unregister the task. Thread safe call. Status Unregister(PeriodicTaskType task_type); #ifndef NDEBUG @@ -105,4 +106,3 @@ class PeriodicTaskScheduler { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 4cd4b09c4..781e3f277 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -362,6 +362,10 @@ class DB { // Create a column_family and return the handle of column family // through the argument handle. + // NOTE: creating many column families one-by-one is not recommended because + // of quadratic overheads, such as writing a full OPTIONS file for all CFs + // after each new CF creation. Use CreateColumnFamilies(), or DB::Open() with + // create_missing_column_families=true. virtual Status CreateColumnFamily(const ColumnFamilyOptions& options, const std::string& column_family_name, ColumnFamilyHandle** handle); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 0944e1976..b20c66e14 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -485,7 +485,8 @@ struct DBOptions { // Default: false bool create_if_missing = false; - // If true, missing column families will be automatically created. + // If true, missing column families will be automatically created on + // DB::Open(). // Default: false bool create_missing_column_families = false; diff --git a/include/rocksdb/status.h b/include/rocksdb/status.h index 8b3054545..82597239f 100644 --- a/include/rocksdb/status.h +++ b/include/rocksdb/status.h @@ -151,6 +151,25 @@ class Status { return state_.get(); } + // Override this status with another, unless this status is already non-ok. + // Returns *this. Thus, the result of `a.UpdateIfOk(b).UpdateIfOk(c)` is + // non-ok (and `a` modified as such) iff any input was non-ok, with + // left-most taking precedence as far as the details. + Status& UpdateIfOk(Status&& s) { + if (code() == kOk) { + *this = std::move(s); + } else { + // Alright to ignore that status as long as this one is checked + s.PermitUncheckedError(); + } + MustCheck(); + return *this; + } + + Status& UpdateIfOk(const Status& s) { + return UpdateIfOk(std::forward(Status(s))); + } + // Return a success status. static Status OK() { return Status(); } diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index 8d5ac068b..20e9ebe2c 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -180,4 +180,3 @@ void LDBTool::Run(int argc, char** argv, Options options, exit(error_code); } } // namespace ROCKSDB_NAMESPACE - diff --git a/unreleased_history/performance_improvements/options_files_on_open.md b/unreleased_history/performance_improvements/options_files_on_open.md new file mode 100644 index 000000000..e4ecf2949 --- /dev/null +++ b/unreleased_history/performance_improvements/options_files_on_open.md @@ -0,0 +1 @@ +Improved the I/O efficiency of DB::Open a new DB with `create_missing_column_families=true` and many column families. diff --git a/util/slice_test.cc b/util/slice_test.cc index 010ded3d8..e82547494 100644 --- a/util/slice_test.cc +++ b/util/slice_test.cc @@ -243,6 +243,36 @@ TEST_F(SmallEnumSetTest, SmallEnumSetTest2) { } } +// ***************************************************************** // +// Unit test for Status +TEST(StatusTest, Update) { + const Status ok = Status::OK(); + const Status inc = Status::Incomplete("blah"); + const Status notf = Status::NotFound("meow"); + + Status s = ok; + ASSERT_TRUE(s.UpdateIfOk(Status::Corruption("bad")).IsCorruption()); + ASSERT_TRUE(s.IsCorruption()); + + s = ok; + ASSERT_TRUE(s.UpdateIfOk(Status::OK()).ok()); + ASSERT_TRUE(s.UpdateIfOk(ok).ok()); + ASSERT_TRUE(s.ok()); + + ASSERT_TRUE(s.UpdateIfOk(inc).IsIncomplete()); + ASSERT_TRUE(s.IsIncomplete()); + + ASSERT_TRUE(s.UpdateIfOk(notf).IsIncomplete()); + ASSERT_TRUE(s.UpdateIfOk(ok).IsIncomplete()); + ASSERT_TRUE(s.IsIncomplete()); + + // Keeps left-most non-OK status + s = ok; + ASSERT_TRUE( + s.UpdateIfOk(Status()).UpdateIfOk(notf).UpdateIfOk(inc).IsNotFound()); + ASSERT_TRUE(s.IsNotFound()); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 8e949116f78b03a7c68f262af2fb4b56b427e35b Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 4 Oct 2023 14:42:35 -0700 Subject: [PATCH 179/386] Fix comments about creation_time/oldest_ancester_time/oldest_key_time (#11921) Summary: Code reference for the comments change: https://github.com/facebook/rocksdb/blob/40b618f2349b509eabdd175f75faf7ce84cf0696/table/block_based/block_based_table_builder.cc?fbclid=IwAR0JlfnG8wysclFP5wv0fSngFbi_j32BUCKbFayeGdr10tzDhyyk5QqpclA#L2093 https://github.com/facebook/rocksdb/blob/40b618f2349b509eabdd175f75faf7ce84cf0696/db/flush_job.cc?fbclid=IwAR1ri6eTX3wyD_2fAEBRzFSwZItcbmDS8LaB11k1letDMQmB2L8nF6TfXDs#L945-L949 https://github.com/facebook/rocksdb/blob/40b618f2349b509eabdd175f75faf7ce84cf0696/db/compaction/compaction_job.cc#L1882-L1904 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11921 Reviewed By: cbi42 Differential Revision: D49921304 Pulled By: hx235 fbshipit-source-id: 2ae17e43c0fd52044404d7b63fea254d2d1f3595 --- db/compaction/compaction_job.cc | 1 + db/version_edit.h | 14 ++++++++++---- include/rocksdb/table_properties.h | 17 ++++++++++++++--- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index bf8ce25a6..257848e46 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1944,6 +1944,7 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, db_options_.stats, listeners, db_options_.file_checksum_gen_factory.get(), tmp_set.Contains(FileType::kTableFile), false)); + // TODO(hx235): pass in the correct `oldest_key_time` instead of `0` TableBuilderOptions tboptions( *cfd->ioptions(), *(sub_compact->compaction->mutable_cf_options()), cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), diff --git a/db/version_edit.h b/db/version_edit.h index e6d54d31d..5d7687204 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -219,10 +219,16 @@ struct FileMetaData { // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1. uint64_t oldest_blob_file_number = kInvalidBlobFileNumber; - // The file could be the compaction output from other SST files, which could - // in turn be outputs for compact older SST files. We track the memtable - // flush timestamp for the oldest SST file that eventually contribute data - // to this file. 0 means the information is not available. + // For flush output file, oldest ancestor time is the oldest key time in the + // file. If the oldest key time is not available, flush time is used. + // + // For compaction output file, oldest ancestor time is the oldest + // among all the oldest key time of its input files, since the file could be + // the compaction output from other SST files, which could in turn be outputs + // for compact older SST files. If that's not available, creation time of this + // compaction output file is used. + // + // 0 means the information is not available. uint64_t oldest_ancester_time = kUnknownOldestAncesterTime; // Unix time when the SST file is created. diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index ebde339dd..0256fbddd 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -219,9 +219,20 @@ struct TableProperties { // by column_family_name. uint64_t column_family_id = ROCKSDB_NAMESPACE:: TablePropertiesCollectorFactory::Context::kUnknownColumnFamily; - // Timestamp of the latest key. 0 means unknown. - // TODO(sagar0): Should be changed to latest_key_time ... but don't know the - // full implications of backward compatibility. Hence retaining for now. + + // Oldest ancester time. 0 means unknown. + // + // For flush output file, oldest ancestor time is the oldest key time in the + // file. If the oldest key time is not available, flush time is used. + // + // For compaction output file, oldest ancestor time is the oldest + // among all the oldest key time of its input files, since the file could be + // the compaction output from other SST files, which could in turn be outputs + // for compact older SST files. If that's not available, creation time of this + // compaction output file is used. + // + // TODO(sagar0): Should be changed to oldest_ancester_time ... but don't know + // the full implications of backward compatibility. Hence retaining for now. uint64_t creation_time = 0; // Timestamp of the earliest key. 0 means unknown. From 1d5bddbc58c6987e8a0a0feefd1fb475caa54ab6 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 6 Oct 2023 08:21:21 -0700 Subject: [PATCH 180/386] Bootstrap, pre-populate seqno_to_time_mapping (#11922) Summary: This change has two primary goals (follow-up to https://github.com/facebook/rocksdb/issues/11917, https://github.com/facebook/rocksdb/issues/11920): * Ensure the DB seqno_to_time_mapping has entries that allow us to put a good time lower bound on any writes that happen after setting up preserve/preclude options (either in a new DB, new CF, SetOptions, etc.) and haven't yet aged out of that time window. This allows us to remove a bunch of work-arounds in tests. * For new DBs using preserve/preclude options, automatically reserve some sequence numbers and pre-map them to cover the time span back to the preserve/preclude cut-off time. In the future, this will allow us to import data from another DB by key, value, and write time by assigning an appropriate seqno in this DB for that write time. Note that the pre-population (historical mappings) does not happen if the original options at DB Open time do not have preserve/preclude, so it is recommended to create initial column families at that time with create_missing_column_families, to take advantage of this (future) feature. (Adding these historical mappings after DB Open would risk non-monotonic seqno_to_time_mapping, which is dubious if not dangerous.) Recommended follow-up: * Solve existing race conditions (not memory safety) where parallel operations like CreateColumnFamily or SetDBOptions could leave the wrong setting in effect. * Make SeqnoToTimeMapping more gracefully handle a possible case in which too many mappings are added for the time range of concern. It seems like there could be cases where data is massively excluded from the cold tier because of entries falling off the front of the mapping list (causing GetProximalSeqnoBeforeTime() to return 0). (More investigation needed.) No release note for the minor bug fix because this is still an experimental feature with limited usage. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11922 Test Plan: tests added / updated Reviewed By: jowlyzhang Differential Revision: D49956563 Pulled By: pdillinger fbshipit-source-id: 92beb918c3a298fae9ca8e509717b1067caa1519 --- db/compaction/tiered_compaction_test.cc | 57 ------ db/db_impl/db_impl.cc | 97 +++++++-- db/db_impl/db_impl.h | 9 +- db/db_impl/db_impl_open.cc | 2 +- db/db_test_util.cc | 1 + db/seqno_time_test.cc | 260 ++++++++++++++++++++---- db/seqno_to_time_mapping.cc | 23 +++ db/seqno_to_time_mapping.h | 5 + db/version_set.cc | 14 ++ db/version_set.h | 3 + util/cast_util.h | 10 + 11 files changed, 370 insertions(+), 111 deletions(-) diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index 3341bddc8..654bd0829 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -1254,14 +1254,6 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimeManualCompaction) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - int sst_num = 0; // Write files that are overlap and enough to trigger compaction for (; sst_num < kNumTrigger; sst_num++) { @@ -1319,14 +1311,6 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimeAutoCompaction) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - int sst_num = 0; // Write files that are overlap and enough to trigger compaction for (; sst_num < kNumTrigger; sst_num++) { @@ -1398,14 +1382,6 @@ TEST_F(PrecludeLastLevelTest, MigrationFromPreserveTimePartial) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - int sst_num = 0; // Write files that are overlap and enough to trigger compaction for (; sst_num < kNumTrigger; sst_num++) { @@ -1528,14 +1504,6 @@ TEST_F(PrecludeLastLevelTest, LastLevelOnlyCompactionPartial) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - int sst_num = 0; // Write files that are overlap and enough to trigger compaction for (; sst_num < kNumTrigger; sst_num++) { @@ -1609,14 +1577,6 @@ TEST_P(PrecludeLastLevelTestWithParms, LastLevelOnlyCompactionNoPreclude) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - Random rnd(301); int sst_num = 0; // Write files that are overlap and enough to trigger compaction @@ -1926,14 +1886,6 @@ TEST_F(PrecludeLastLevelTest, PartialPenultimateLevelCompaction) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - Random rnd(301); for (int i = 0; i < 300; i++) { @@ -2046,15 +1998,6 @@ TEST_F(PrecludeLastLevelTest, DISABLED_RangeDelsCauseFileEndpointsToOverlap) { options.target_file_size_base = kFileBytes; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun([&] { - mock_clock_->MockSleepForSeconds(static_cast(kSecondsPerKey)); - }); - // Flush an L0 file with the following contents (new to old): // // Range deletions [4, 6) [7, 8) [9, 11) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index b7b555070..0b8d21790 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -273,8 +273,9 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, periodic_task_functions_.emplace(PeriodicTaskType::kFlushInfoLog, [this]() { this->FlushInfoLog(); }); periodic_task_functions_.emplace( - PeriodicTaskType::kRecordSeqnoTime, - [this]() { this->RecordSeqnoToTimeMapping(); }); + PeriodicTaskType::kRecordSeqnoTime, [this]() { + this->RecordSeqnoToTimeMapping(/*populate_historical_seconds=*/0); + }); versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_, table_cache_.get(), write_buffer_manager_, @@ -815,9 +816,10 @@ Status DBImpl::StartPeriodicTaskScheduler() { return s; } -Status DBImpl::RegisterRecordSeqnoTimeWorker() { +Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { uint64_t min_preserve_seconds = std::numeric_limits::max(); uint64_t max_preserve_seconds = std::numeric_limits::min(); + bool mapping_was_empty = false; { InstrumentedMutexLock l(&mutex_); @@ -836,7 +838,13 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() { } else { seqno_to_time_mapping_.Resize(min_preserve_seconds, max_preserve_seconds); } + mapping_was_empty = seqno_to_time_mapping_.Empty(); } + // FIXME: because we released the db mutex, there's a race here where + // if e.g. I create or drop two column families in parallel, I might end up + // with the periodic task scheduler in the wrong state. We don't want to + // just keep holding the mutex, however, because of global timer and mutex + // in PeriodicTaskScheduler. uint64_t seqno_time_cadence = 0; if (min_preserve_seconds != std::numeric_limits::max()) { @@ -851,6 +859,39 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker() { if (seqno_time_cadence == 0) { s = periodic_task_scheduler_.Unregister(PeriodicTaskType::kRecordSeqnoTime); } else { + // Before registering the periodic task, we need to be sure to fulfill two + // promises: + // 1) Any DB created with preserve/preclude options set from the beginning + // will get pre-allocated seqnos with pre-populated time mappings back to + // the times we are interested in. (This will enable future import of data + // while preserving rough write time. We can only do this reliably from + // DB::Open, as otherwise there could be a race between CreateColumnFamily + // and the first Write to the DB, and seqno-to-time mappings need to be + // monotonic. + // 2) In any DB, any data written after setting preserve/preclude options + // must have a reasonable time estimate (so that we can accurately place + // the data), which means at least one entry in seqno_to_time_mapping_. + if (from_db_open && GetLatestSequenceNumber() == 0) { + // Pre-allocate seqnos and pre-populate historical mapping + assert(mapping_was_empty); + + // We can simply modify these, before writes are allowed + constexpr uint64_t kMax = SeqnoToTimeMapping::kMaxSeqnoTimePairsPerSST; + versions_->SetLastAllocatedSequence(kMax); + versions_->SetLastPublishedSequence(kMax); + versions_->SetLastSequence(kMax); + // Pre-populate mappings for reserved sequence numbers. + RecordSeqnoToTimeMapping(max_preserve_seconds); + } else if (mapping_was_empty) { + // To ensure there is at least one mapping, we need a non-zero sequence + // number. Outside of DB::Open, we have to be careful. + versions_->EnsureNonZeroSequence(); + assert(GetLatestSequenceNumber() > 0); + + // Ensure at least one mapping (or log a warning) + RecordSeqnoToTimeMapping(/*populate_historical_seconds=*/0); + } + s = periodic_task_scheduler_.Register( PeriodicTaskType::kRecordSeqnoTime, periodic_task_functions_.at(PeriodicTaskType::kRecordSeqnoTime), @@ -3312,7 +3353,7 @@ Status DBImpl::WrapUpCreateColumnFamilies( Status s = WriteOptionsFile(true /*need_mutex_lock*/, true /*need_enter_write_thread*/); if (register_worker) { - s.UpdateIfOk(RegisterRecordSeqnoTimeWorker()); + s.UpdateIfOk(RegisterRecordSeqnoTimeWorker(/*from_db_open=*/false)); } return s; } @@ -3555,7 +3596,7 @@ Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) { if (cfd->ioptions()->preserve_internal_time_seconds > 0 || cfd->ioptions()->preclude_last_level_data_seconds > 0) { - s = RegisterRecordSeqnoTimeWorker(); + s = RegisterRecordSeqnoTimeWorker(/*from_db_open=*/false); } if (s.ok()) { @@ -6383,21 +6424,51 @@ Status DBImpl::GetCreationTimeOfOldestFile(uint64_t* creation_time) { } } -void DBImpl::RecordSeqnoToTimeMapping() { +void DBImpl::RecordSeqnoToTimeMapping(uint64_t populate_historical_seconds) { // TECHNICALITY: Sample last sequence number *before* time, as prescribed - // for SeqnoToTimeMapping + // for SeqnoToTimeMapping. We don't know how long it has been since the last + // sequence number was written, so we at least have a one-sided bound by + // sampling in this order. SequenceNumber seqno = GetLatestSequenceNumber(); - // Get time first then sequence number, so the actual time of seqno is <= - // unix_time recorded - int64_t unix_time = 0; - immutable_db_options_.clock->GetCurrentTime(&unix_time) + int64_t unix_time_signed = 0; + immutable_db_options_.clock->GetCurrentTime(&unix_time_signed) .PermitUncheckedError(); // Ignore error + uint64_t unix_time = static_cast(unix_time_signed); bool appended = false; { InstrumentedMutexLock l(&mutex_); - appended = seqno_to_time_mapping_.Append(seqno, unix_time); + if (populate_historical_seconds > 0) { + if (seqno > 1 && unix_time > populate_historical_seconds) { + // seqno=0 is reserved + SequenceNumber from_seqno = 1; + appended = seqno_to_time_mapping_.PrePopulate( + from_seqno, seqno, unix_time - populate_historical_seconds, + unix_time); + } else { + // One of these will fail + assert(seqno > 1); + assert(unix_time > populate_historical_seconds); + } + } else { + assert(seqno > 0); + appended = seqno_to_time_mapping_.Append(seqno, unix_time); + } } - if (!appended) { + if (populate_historical_seconds > 0) { + if (appended) { + ROCKS_LOG_INFO( + immutable_db_options_.info_log, + "Pre-populated sequence number to time entries: [1,%" PRIu64 + "] -> [%" PRIu64 ",%" PRIu64 "]", + seqno, unix_time - populate_historical_seconds, unix_time); + } else { + ROCKS_LOG_WARN( + immutable_db_options_.info_log, + "Failed to pre-populate sequence number to time entries: [1,%" PRIu64 + "] -> [%" PRIu64 ",%" PRIu64 "]", + seqno, unix_time - populate_historical_seconds, unix_time); + } + } else if (!appended) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Failed to insert sequence number to time entry: %" PRIu64 " -> %" PRIu64, diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index d3ab66d1b..5e7e87bb7 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1212,8 +1212,11 @@ class DBImpl : public DB { // flush LOG out of application buffer void FlushInfoLog(); - // record current sequence number to time mapping - void RecordSeqnoToTimeMapping(); + // record current sequence number to time mapping. If + // populate_historical_seconds > 0 then pre-populate all the + // sequence numbers from [1, last] to map to [now minus + // populate_historical_seconds, now]. + void RecordSeqnoToTimeMapping(uint64_t populate_historical_seconds); // Interface to block and signal the DB in case of stalling writes by // WriteBufferManager. Each DBImpl object contains ptr to WBMStallInterface. @@ -2163,7 +2166,7 @@ class DBImpl : public DB { // Cancel scheduled periodic tasks Status CancelPeriodicTaskScheduler(); - Status RegisterRecordSeqnoTimeWorker(); + Status RegisterRecordSeqnoTimeWorker(bool from_db_open); void PrintStatistics(); diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index f9ca85405..d48f66ae5 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -2247,7 +2247,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, } if (s.ok()) { - s = impl->RegisterRecordSeqnoTimeWorker(); + s = impl->RegisterRecordSeqnoTimeWorker(/*from_db_open=*/true); } if (!s.ok()) { for (auto* h : *handles) { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 189002270..bb3a74e30 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -699,6 +699,7 @@ void DBTestBase::Destroy(const Options& options, bool delete_cf_paths) { } Status DBTestBase::ReadOnlyReopen(const Options& options) { + Close(); MaybeInstallTimeElapseOnlySleep(options); return DB::OpenForReadOnly(options, dbname_, &db_); } diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc index 3a64499be..64f9b53de 100644 --- a/db/seqno_time_test.cc +++ b/db/seqno_time_test.cc @@ -82,14 +82,6 @@ TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) { options.num_levels = kNumLevels; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(kKeyPerSec)); }); - int sst_num = 0; // Write files that are overlap and enough to trigger compaction for (; sst_num < kNumTrigger; sst_num++) { @@ -197,14 +189,6 @@ TEST_F(SeqnoTimeTest, TemperatureBasicLevel) { options.disable_auto_compactions = true; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - int sst_num = 0; // Write files that are overlap for (; sst_num < 4; sst_num++) { @@ -330,14 +314,6 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { options.disable_auto_compactions = true; DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(100)); }); - std::set checked_file_nums; SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber() + 1; uint64_t start_time = mock_clock_->NowSeconds(); @@ -359,9 +335,9 @@ TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) { ASSERT_FALSE(tp_mapping.Empty()); auto seqs = tp_mapping.TEST_GetInternalMapping(); // about ~20 seqs->time entries, because the sample rate is 10000/100, and it - // passes 2k time. - ASSERT_GE(seqs.size(), 19); - ASSERT_LE(seqs.size(), 21); + // passes 2k time. Add (roughly) one for starting entry. + ASSERT_GE(seqs.size(), 20); + ASSERT_LE(seqs.size(), 22); SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber() + 1; for (auto i = start_seq; i < seq_end; i++) { // The result is within the range @@ -721,14 +697,6 @@ TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) { DestroyAndReopen(options); - // bootstrap DB sequence numbers (FIXME: make these steps unnecessary) - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(SingleDelete("foo")); - // pass some time first, otherwise the first a few keys write time are going - // to be zero, and internally zero has special meaning: kUnknownTimeBeforeAll - dbfull()->TEST_WaitForPeriodicTaskRun( - [&] { mock_clock_->MockSleepForSeconds(static_cast(10)); }); - std::atomic_uint64_t num_seqno_zeroing{0}; SyncPoint::GetInstance()->DisableProcessing(); @@ -757,8 +725,9 @@ TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) { ASSERT_OK(tp_mapping.Sort()); ASSERT_FALSE(tp_mapping.Empty()); auto seqs = tp_mapping.TEST_GetInternalMapping(); - ASSERT_GE(seqs.size(), 10 - 1); - ASSERT_LE(seqs.size(), 10 + 1); + // Add (roughly) one for starting entry. + ASSERT_GE(seqs.size(), 10); + ASSERT_LE(seqs.size(), 10 + 2); } // Trigger a compaction @@ -848,6 +817,179 @@ TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) { Close(); } +TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) { + Options base_options = CurrentOptions(); + base_options.env = mock_env_.get(); + base_options.disable_auto_compactions = true; + base_options.create_missing_column_families = true; + Options track_options = base_options; + constexpr uint32_t kPreserveSecs = 1234567; + SetTrackTimeDurationOptions(kPreserveSecs, track_options); + SeqnoToTimeMapping sttm; + SequenceNumber latest_seqno; + uint64_t start_time, end_time; + + // #### DB#1, #2: No pre-population without preserve/preclude #### + // #### But a single entry is added when preserve/preclude enabled #### + for (bool with_write : {false, true}) { + SCOPED_TRACE("with_write=" + std::to_string(with_write)); + DestroyAndReopen(base_options); + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + ASSERT_TRUE(sttm.Empty()); + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); + + if (with_write) { + // Ensure that writes before new CF with preserve/preclude option don't + // interfere with the seqno-to-time mapping getting a starting entry. + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + } + + // Unfortunately, if we add a CF with preserve/preclude option after + // open, that does not reserve seqnos with pre-populated time mappings. + CreateColumnFamilies({"one"}, track_options); + + // No pre-population (unfortunately), just a single starting entry + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + latest_seqno = db_->GetLatestSequenceNumber(); + start_time = mock_clock_->NowSeconds(); + ASSERT_EQ(sttm.Size(), 1); + ASSERT_EQ(latest_seqno, 1U); + // Current time maps to starting entry / seqno + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), 1U); + // Any older times are unknown. + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - 1), + kUnknownSeqnoBeforeAll); + + // Now check that writes can proceed normally (passing about 20% of preserve + // time) + for (int i = 0; i < 20; i++) { + ASSERT_OK(Put(Key(i), "value")); + dbfull()->TEST_WaitForPeriodicTaskRun([&] { + mock_clock_->MockSleepForSeconds(static_cast(kPreserveSecs / 99)); + }); + } + ASSERT_OK(Flush()); + + // Check that mappings are getting populated + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + latest_seqno = db_->GetLatestSequenceNumber(); + end_time = mock_clock_->NowSeconds(); + ASSERT_EQ(sttm.Size(), 21); + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(end_time), latest_seqno); + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), 1U); + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - 1), + kUnknownSeqnoBeforeAll); + } + + // ### DB#3, #4: Read-only DB with preserve/preclude after not #### + // Make sure we don't hit issues with read-only DBs, which don't need + // the mapping in the DB state (though it wouldn't hurt anything) + for (bool with_write : {false, true}) { + SCOPED_TRACE("with_write=" + std::to_string(with_write)); + DestroyAndReopen(base_options); + if (with_write) { + ASSERT_OK(Put("foo", "bar")); + ASSERT_OK(Flush()); + } + + ASSERT_OK(ReadOnlyReopen(base_options)); + if (with_write) { + ASSERT_EQ(Get("foo"), "bar"); + } + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + ASSERT_EQ(sttm.Size(), 0); + + ASSERT_OK(ReadOnlyReopen(track_options)); + if (with_write) { + ASSERT_EQ(Get("foo"), "bar"); + } + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + ASSERT_EQ(sttm.Size(), 0); + } + + // #### DB#5: Destroy and open with preserve/preclude option #### + DestroyAndReopen(track_options); + + // Ensure pre-population + constexpr auto kPrePopPairs = SeqnoToTimeMapping::kMaxSeqnoTimePairsPerSST; + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + latest_seqno = db_->GetLatestSequenceNumber(); + start_time = mock_clock_->NowSeconds(); + ASSERT_EQ(sttm.Size(), kPrePopPairs); + // One nono-zero sequence number per pre-populated pair (this could be + // revised if we want to use interpolation for better approximate time + // mappings with no guarantee of erring in just one direction). + ASSERT_EQ(latest_seqno, kPrePopPairs); + // Current time maps to last pre-allocated seqno + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), latest_seqno); + // Oldest tracking time maps to first pre-allocated seqno + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1); + + // In more detail, check that estimated seqnos (pre-allocated) are uniformly + // spread over the tracked time. + for (auto ratio : {0.0, 0.433, 0.678, 0.987, 1.0}) { + // Round up query time + uint64_t t = start_time - kPreserveSecs + + static_cast(ratio * kPreserveSecs + 0.9999999); + // Round down estimated seqno + SequenceNumber s = + static_cast(ratio * (latest_seqno - 1)) + 1; + // Match + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(t), s); + } + + // Now check that writes can proceed normally (passing about 20% of preserve + // time) + for (int i = 0; i < 20; i++) { + ASSERT_OK(Put(Key(i), "value")); + dbfull()->TEST_WaitForPeriodicTaskRun([&] { + mock_clock_->MockSleepForSeconds(static_cast(kPreserveSecs / 99)); + }); + } + ASSERT_OK(Flush()); + + // Can still see some pre-populated mappings, though some displaced + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + latest_seqno = db_->GetLatestSequenceNumber(); + end_time = mock_clock_->NowSeconds(); + ASSERT_EQ(sttm.Size(), kPrePopPairs); + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(end_time), latest_seqno); + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs / 2), + kPrePopPairs / 2); + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), + kUnknownSeqnoBeforeAll); + + // Make sure we don't hit issues with read-only DBs, which don't need + // the mapping in the DB state (though it wouldn't hurt anything) + ASSERT_OK(ReadOnlyReopen(track_options)); + ASSERT_EQ(Get(Key(0)), "value"); + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + ASSERT_EQ(sttm.Size(), 0); + + // #### DB#6: Destroy and open+create an extra CF with preserve/preclude #### + // (default CF does not have the option) + Destroy(track_options); + ReopenWithColumnFamilies({"default", "one"}, + List({base_options, track_options})); + + // Ensure pre-population (not as exhaustive checking here) + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + latest_seqno = db_->GetLatestSequenceNumber(); + start_time = mock_clock_->NowSeconds(); + ASSERT_EQ(sttm.Size(), kPrePopPairs); + // One nono-zero sequence number per pre-populated pair (this could be + // revised if we want to use interpolation for better approximate time + // mappings with no guarantee of erring in just one direction). + ASSERT_EQ(latest_seqno, kPrePopPairs); + // Current time maps to last pre-allocated seqno + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), latest_seqno); + // Oldest tracking time maps to first pre-allocated seqno + ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1); + + Close(); +} + TEST_F(SeqnoTimeTest, MappingAppend) { SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10); @@ -984,6 +1126,50 @@ TEST_F(SeqnoTimeTest, ProximalFunctions) { EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 50U); } +TEST_F(SeqnoTimeTest, PrePopulate) { + SeqnoToTimeMapping test(/*max_time_duration=*/100, /*max_capacity=*/10); + + EXPECT_EQ(test.Size(), 0U); + + // Smallest case is like two Appends + test.PrePopulate(10, 11, 500, 600); + + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U); + EXPECT_EQ(test.GetProximalTimeBeforeSeqno(12), 600U); + + test.Clear(); + + // Populate a small range + uint64_t kTimeIncrement = 1234567; + test.PrePopulate(1, 12, kTimeIncrement, kTimeIncrement * 2); + + for (uint64_t i = 0; i <= 12; ++i) { + // NOTE: with 1 and 12 as the pre-populated end points, the duration is + // broken into 11 equal(-ish) spans + uint64_t t = kTimeIncrement + (i * kTimeIncrement) / 11 - 1; + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(t), i); + } + + test.Clear(); + + // Populate an excessively large range (in the future we might want to + // interpolate estimated times for seqnos between entries) + test.PrePopulate(1, 34567, kTimeIncrement, kTimeIncrement * 2); + + for (auto ratio : {0.0, 0.433, 0.678, 0.987, 1.0}) { + // Round up query time + uint64_t t = kTimeIncrement + + static_cast(ratio * kTimeIncrement + 0.9999999); + // Round down estimated seqno + SequenceNumber s = static_cast(ratio * (34567 - 1)) + 1; + // Match + // TODO: for now this is exact, but in the future might need approximation + // bounds to account for limited samples. + EXPECT_EQ(test.GetProximalSeqnoBeforeTime(t), s); + } +} + TEST_F(SeqnoTimeTest, TruncateOldEntries) { constexpr uint64_t kMaxTimeDuration = 42; SeqnoToTimeMapping test(kMaxTimeDuration, /*max_capacity=*/10); diff --git a/db/seqno_to_time_mapping.cc b/db/seqno_to_time_mapping.cc index ec7b2d9cb..97a3e9879 100644 --- a/db/seqno_to_time_mapping.cc +++ b/db/seqno_to_time_mapping.cc @@ -258,11 +258,32 @@ bool SeqnoToTimeMapping::Append(SequenceNumber seqno, uint64_t time) { pairs_.emplace_back(seqno, time); if (pairs_.size() > max_capacity_) { + // FIXME: be smarter about how we erase to avoid data falling off the + // front prematurely. pairs_.pop_front(); } return true; } +bool SeqnoToTimeMapping::PrePopulate(SequenceNumber from_seqno, + SequenceNumber to_seqno, + uint64_t from_time, uint64_t to_time) { + assert(Empty()); + assert(from_seqno > 0); + assert(to_seqno > from_seqno); + assert(from_time > kUnknownTimeBeforeAll); + assert(to_time >= from_time); + + // TODO: smartly limit this to max_capacity_ representative samples + for (auto i = from_seqno; i <= to_seqno; i++) { + uint64_t t = from_time + (to_time - from_time) * (i - from_seqno) / + (to_seqno - from_seqno); + pairs_.emplace_back(i, t); + } + + return /*success*/ true; +} + bool SeqnoToTimeMapping::Resize(uint64_t min_time_duration, uint64_t max_time_duration) { uint64_t new_max_capacity = @@ -271,6 +292,8 @@ bool SeqnoToTimeMapping::Resize(uint64_t min_time_duration, return false; } else if (new_max_capacity < pairs_.size()) { uint64_t delta = pairs_.size() - new_max_capacity; + // FIXME: be smarter about how we erase to avoid data falling off the + // front prematurely. pairs_.erase(pairs_.begin(), pairs_.begin() + delta); } max_capacity_ = new_max_capacity; diff --git a/db/seqno_to_time_mapping.h b/db/seqno_to_time_mapping.h index 132d74957..95a4455be 100644 --- a/db/seqno_to_time_mapping.h +++ b/db/seqno_to_time_mapping.h @@ -116,6 +116,11 @@ class SeqnoToTimeMapping { uint64_t max_capacity = 0) : max_time_duration_(max_time_duration), max_capacity_(max_capacity) {} + // Both seqno range and time range are inclusive. ... TODO + // + bool PrePopulate(SequenceNumber from_seqno, SequenceNumber to_seqno, + uint64_t from_time, uint64_t to_time); + // Append a new entry to the list. The new entry should be newer than the // existing ones. It maintains the internal sorted status. bool Append(SequenceNumber seqno, uint64_t time); diff --git a/db/version_set.cc b/db/version_set.cc index 7b20adedc..41e90e13d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -7231,6 +7231,20 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options, return status; } +void VersionSet::EnsureNonZeroSequence() { + uint64_t expected = 0; + // Update each from 0->1, in order, or abort if any becomes non-zero in + // parallel + if (last_allocated_sequence_.compare_exchange_strong(expected, 1)) { + if (last_published_sequence_.compare_exchange_strong(expected, 1)) { + (void)last_sequence_.compare_exchange_strong(expected, 1); + } + } + assert(last_allocated_sequence_.load() > 0); + assert(last_published_sequence_.load() > 0); + assert(last_sequence_.load() > 0); +} + ReactiveVersionSet::ReactiveVersionSet( const std::string& dbname, const ImmutableDBOptions* _db_options, const FileOptions& _file_options, Cache* table_cache, diff --git a/db/version_set.h b/db/version_set.h index 1d7c70592..6774cfcd1 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1342,6 +1342,9 @@ class VersionSet { last_allocated_sequence_.store(s, std::memory_order_seq_cst); } + // Allocate a dummy sequence number as needed to ensure last is non-zero. + void EnsureNonZeroSequence(); + // Note: memory_order_release must be sufficient uint64_t FetchAddLastAllocatedSequence(uint64_t s) { return last_allocated_sequence_.fetch_add(s, std::memory_order_seq_cst); diff --git a/util/cast_util.h b/util/cast_util.h index 3c381d9b2..e010274a7 100644 --- a/util/cast_util.h +++ b/util/cast_util.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include @@ -53,4 +54,13 @@ inline To lossless_cast(From x) { return static_cast(x); } +// For disambiguating a potentially heterogeneous aggregate as a homogeneous +// initializer list. E.g. might be able to write List({x, y}) in some cases +// instead of std::vector({x, y}). +template +inline const std::initializer_list& List( + const std::initializer_list& list) { + return list; +} + } // namespace ROCKSDB_NAMESPACE From 2dc63c891170a15c672ef16390db73982c7ae975 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 6 Oct 2023 10:34:44 -0700 Subject: [PATCH 181/386] =?UTF-8?q?Add=20the=20default=20WritableFile::Get?= =?UTF-8?q?FileSize=20implementation=20back=20for=20com=E2=80=A6=20(#11927?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: As mentioned in https://github.com/facebook/rocksdb/issues/11726, we should defer user feasible API changes to major release. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11927 Reviewed By: anand1976 Differential Revision: D50016723 Pulled By: jowlyzhang fbshipit-source-id: 59781442602fadb9906e37aad2021e3178723db5 --- include/rocksdb/env.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 7828a31eb..08f996658 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1001,7 +1001,7 @@ class WritableFile { /* * Get the size of valid data in the file. */ - virtual uint64_t GetFileSize() = 0; + virtual uint64_t GetFileSize() { return 0; }; /* * Get and set the default pre-allocation block size for writes to From 21a12363e139484bde441c8c43e6b818459ebe02 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 6 Oct 2023 12:37:51 -0700 Subject: [PATCH 182/386] Add EXPERIMENTAL comments about XXOptions::io_activity (#11926) Summary: Context/Summary: this option is experimental right now Pull Request resolved: https://github.com/facebook/rocksdb/pull/11926 Test Plan: no code change Reviewed By: jaykorean Differential Revision: D49985000 Pulled By: hx235 fbshipit-source-id: a5b439ed35e3d6bb04c125f222ac29cd3842d1a1 --- include/rocksdb/file_system.h | 1 + include/rocksdb/options.h | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index f8e321417..7e9d5d4fe 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -120,6 +120,7 @@ struct IOOptions { // directories and list only files in GetChildren API. bool do_not_recurse; + // EXPERIMENTAL Env::IOActivity io_activity = Env::IOActivity::kUnknown; IOOptions() : IOOptions(false) {} diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index b20c66e14..ae6b5cf6d 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1752,9 +1752,13 @@ struct ReadOptions { // *** END options only relevant to iterators or scans *** - // ** For RocksDB internal use only ** + // *** BEGIN options for RocksDB internal use only *** + + // EXPERIMENTAL Env::IOActivity io_activity = Env::IOActivity::kUnknown; + // *** END options for RocksDB internal use only *** + ReadOptions() {} ReadOptions(bool _verify_checksums, bool _fill_cache); explicit ReadOptions(Env::IOActivity _io_activity); From 51d7e6a49efb586419525b63967d5bc2b5f351e6 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 9 Oct 2023 15:25:35 -0700 Subject: [PATCH 183/386] Clean up WriteBatchWithIndexInternal a bit (#11930) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11930 The patch cleans up and refactors the logic in/around `WriteBatchWithIndexInternal` a bit as groundwork for further changes. Specifically, the class is turned back into a stateless collection of static helpers (which is the way it was before PR 6851). Note that there were two apparent reasons for introducing this instance state in PR 6851: a) encapsulating `MergeContext` and b) resolving objects like `Logger` and `Statistics` based on a variety of handles. However, neither reason seems justified at this point. Regarding a), the `MultiGetFromBatchAndDB` logic passes in its own `MergeContext` objects via a second set of methods that do not use the member `MergeContext`. As for b), `Logger` and friends are only needed for Merge, which is only supported if a column family handle is provided; in turn, the column family handle enables us to resolve all the necessary objects without the need for any other handles like `DB` or `DBOptions`. In addition to the above, the patch changes the type of `BaseDeltaIterator::merge_result_` to `std::string` from `PinnableSlice` (since no pinning is ever done) and makes some other small code quality improvements. Reviewed By: jaykorean Differential Revision: D50038302 fbshipit-source-id: 5f34abe2e808bdaea0f3a8033b5764ebd446b85d --- db/column_family.cc | 16 +++ db/column_family.h | 3 + .../write_batch_with_index.cc | 64 ++++++--- .../write_batch_with_index_internal.cc | 131 ++++++------------ .../write_batch_with_index_internal.h | 66 +++------ .../write_batch_with_index_test.cc | 15 +- 6 files changed, 129 insertions(+), 166 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 7563041e9..280533993 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1748,4 +1748,20 @@ const Comparator* GetColumnFamilyUserComparator( return nullptr; } +const ImmutableOptions& GetImmutableOptions(ColumnFamilyHandle* column_family) { + assert(column_family); + + ColumnFamilyHandleImpl* const handle = + static_cast_with_check(column_family); + assert(handle); + + const ColumnFamilyData* const cfd = handle->cfd(); + assert(cfd); + + const ImmutableOptions* ioptions = cfd->ioptions(); + assert(ioptions); + + return *ioptions; +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/column_family.h b/db/column_family.h index 65fb3f4f8..2a38feb73 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -877,4 +877,7 @@ extern uint32_t GetColumnFamilyID(ColumnFamilyHandle* column_family); extern const Comparator* GetColumnFamilyUserComparator( ColumnFamilyHandle* column_family); +extern const ImmutableOptions& GetImmutableOptions( + ColumnFamilyHandle* column_family); + } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index 3c41009fa..b01f70a69 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -5,6 +5,7 @@ #include "rocksdb/utilities/write_batch_with_index.h" +#include #include #include "db/column_family.h" @@ -426,11 +427,12 @@ Status WriteBatchWithIndex::PutLogData(const Slice& blob) { void WriteBatchWithIndex::Clear() { rep->Clear(); } Status WriteBatchWithIndex::GetFromBatch(ColumnFamilyHandle* column_family, - const DBOptions& options, + const DBOptions& /* options */, const Slice& key, std::string* value) { + MergeContext merge_context; Status s; - WriteBatchWithIndexInternal wbwii(&options, column_family); - auto result = wbwii.GetFromBatch(this, key, value, &s); + auto result = WriteBatchWithIndexInternal::GetFromBatch( + this, column_family, key, &merge_context, value, &s); switch (result) { case WBWIIteratorImpl::kFound: @@ -502,20 +504,27 @@ Status WriteBatchWithIndex::GetFromBatchAndDB(DB* db, Status WriteBatchWithIndex::GetFromBatchAndDB( DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val, ReadCallback* callback) { + assert(db); + assert(pinnable_val); + + if (!column_family) { + column_family = db->DefaultColumnFamily(); + } + const Comparator* const ucmp = rep->comparator.GetComparator(column_family); size_t ts_sz = ucmp ? ucmp->timestamp_size() : 0; if (ts_sz > 0 && !read_options.timestamp) { return Status::InvalidArgument("Must specify timestamp"); } - Status s; - WriteBatchWithIndexInternal wbwii(db, column_family); - // Since the lifetime of the WriteBatch is the same as that of the transaction // we cannot pin it as otherwise the returned value will not be available // after the transaction finishes. - std::string& batch_value = *pinnable_val->GetSelf(); - auto result = wbwii.GetFromBatch(this, key, &batch_value, &s); + MergeContext merge_context; + Status s; + + auto result = WriteBatchWithIndexInternal::GetFromBatch( + this, column_family, key, &merge_context, pinnable_val->GetSelf(), &s); if (result == WBWIIteratorImpl::kFound) { pinnable_val->PinSelf(); @@ -545,10 +554,14 @@ Status WriteBatchWithIndex::GetFromBatchAndDB( if (result == WBWIIteratorImpl::kMergeInProgress) { // Merge result from DB with merges in Batch std::string merge_result; + if (s.ok()) { - s = wbwii.MergeKey(key, *pinnable_val, &merge_result); - } else { // Key not present in db (s.IsNotFound()) - s = wbwii.MergeKey(key, &merge_result); + s = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + column_family, key, *pinnable_val, merge_context, &merge_result); + } else { + assert(s.IsNotFound()); + s = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family, key, merge_context, &merge_result); } if (s.ok()) { pinnable_val->Reset(); @@ -573,6 +586,15 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, bool sorted_input, ReadCallback* callback) { + assert(db); + assert(keys); + assert(values); + assert(statuses); + + if (!column_family) { + column_family = db->DefaultColumnFamily(); + } + const Comparator* const ucmp = rep->comparator.GetComparator(column_family); size_t ts_sz = ucmp ? ucmp->timestamp_size() : 0; if (ts_sz > 0 && !read_options.timestamp) { @@ -582,8 +604,6 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( return; } - WriteBatchWithIndexInternal wbwii(db, column_family); - autovector key_context; autovector sorted_keys; // To hold merges from the write batch @@ -599,8 +619,8 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( Status* s = &statuses[i]; PinnableSlice* pinnable_val = &values[i]; pinnable_val->Reset(); - auto result = - wbwii.GetFromBatch(this, keys[i], &merge_context, &batch_value, s); + auto result = WriteBatchWithIndexInternal::GetFromBatch( + this, column_family, keys[i], &merge_context, &batch_value, s); if (result == WBWIIteratorImpl::kFound) { *pinnable_val->GetSelf() = std::move(batch_value); @@ -640,13 +660,17 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( std::pair& merge_result = merges[index]; if (merge_result.first == WBWIIteratorImpl::kMergeInProgress) { - std::string merged_value; // Merge result from DB with merges in Batch + std::string merged_value; + if (key.s->ok()) { - *key.s = wbwii.MergeKey(*key.key, *iter->value, merge_result.second, - &merged_value); - } else { // Key not present in db (s.IsNotFound()) - *key.s = wbwii.MergeKey(*key.key, merge_result.second, &merged_value); + *key.s = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + column_family, *key.key, *key.value, merge_result.second, + &merged_value); + } else { + assert(key.s->IsNotFound()); + *key.s = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family, *key.key, merge_result.second, &merged_value); } if (key.s->ok()) { key.value->Reset(); diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index 4e9c35743..751d14adb 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -7,7 +7,6 @@ #include "db/column_family.h" #include "db/db_impl/db_impl.h" -#include "db/merge_context.h" #include "db/merge_helper.h" #include "options/cf_options.h" #include "rocksdb/comparator.h" @@ -27,13 +26,15 @@ BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family, current_at_base_(true), equal_keys_(false), status_(Status::OK()), + column_family_(column_family), base_iterator_(base_iterator), delta_iterator_(delta_iterator), comparator_(comparator), iterate_upper_bound_(read_options ? read_options->iterate_upper_bound : nullptr) { + assert(base_iterator_); + assert(delta_iterator_); assert(comparator_); - wbwii_.reset(new WriteBatchWithIndexInternal(column_family)); } bool BaseDeltaIterator::Valid() const { @@ -153,23 +154,27 @@ Slice BaseDeltaIterator::value() const { return base_iterator_->value(); } else { WriteEntry delta_entry = delta_iterator_->Entry(); - if (wbwii_->GetNumOperands() == 0) { + if (merge_context_.GetNumOperands() == 0) { return delta_entry.value; } else if (delta_entry.type == kDeleteRecord || delta_entry.type == kSingleDeleteRecord) { - status_ = wbwii_->MergeKey(delta_entry.key, merge_result_.GetSelf()); + status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family_, delta_entry.key, merge_context_, &merge_result_); } else if (delta_entry.type == kPutRecord) { - status_ = wbwii_->MergeKey(delta_entry.key, delta_entry.value, - merge_result_.GetSelf()); + status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + column_family_, delta_entry.key, delta_entry.value, merge_context_, + &merge_result_); } else if (delta_entry.type == kMergeRecord) { if (equal_keys_) { - status_ = wbwii_->MergeKey(delta_entry.key, base_iterator_->value(), - merge_result_.GetSelf()); + status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + column_family_, delta_entry.key, base_iterator_->value(), + merge_context_, &merge_result_); } else { - status_ = wbwii_->MergeKey(delta_entry.key, merge_result_.GetSelf()); + status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family_, delta_entry.key, merge_context_, &merge_result_); } } - merge_result_.PinSelf(); + return merge_result_; } } @@ -283,8 +288,7 @@ void BaseDeltaIterator::UpdateCurrent() { WriteEntry delta_entry; if (DeltaValid()) { assert(delta_iterator_->status().ok()); - delta_result = - delta_iterator_->FindLatestUpdate(wbwii_->GetMergeContext()); + delta_result = delta_iterator_->FindLatestUpdate(&merge_context_); delta_entry = delta_iterator_->Entry(); } else if (!delta_iterator_->status().ok()) { // Expose the error status and stop. @@ -313,7 +317,7 @@ void BaseDeltaIterator::UpdateCurrent() { } } if (delta_result == WBWIIteratorImpl::kDeleted && - wbwii_->GetNumOperands() == 0) { + merge_context_.GetNumOperands() == 0) { AdvanceDelta(); } else { current_at_base_ = false; @@ -333,7 +337,7 @@ void BaseDeltaIterator::UpdateCurrent() { equal_keys_ = true; } if (delta_result != WBWIIteratorImpl::kDeleted || - wbwii_->GetNumOperands() > 0) { + merge_context_.GetNumOperands() > 0) { current_at_base_ = false; return; } @@ -631,116 +635,66 @@ bool WBWIIteratorImpl::MatchesKey(uint32_t cf_id, const Slice& key) { } } -WriteBatchWithIndexInternal::WriteBatchWithIndexInternal( - ColumnFamilyHandle* column_family) - : db_(nullptr), db_options_(nullptr), column_family_(column_family) {} - -WriteBatchWithIndexInternal::WriteBatchWithIndexInternal( - DB* db, ColumnFamilyHandle* column_family) - : db_(db), db_options_(nullptr), column_family_(column_family) { - if (db_ != nullptr && column_family_ == nullptr) { - column_family_ = db_->DefaultColumnFamily(); - } -} - -WriteBatchWithIndexInternal::WriteBatchWithIndexInternal( - const DBOptions* db_options, ColumnFamilyHandle* column_family) - : db_(nullptr), db_options_(db_options), column_family_(column_family) {} - -const ImmutableOptions& WriteBatchWithIndexInternal::GetCFOptions() const { - const auto* cfh = - static_cast_with_check(column_family_); - assert(cfh); - assert(cfh->cfd()); - assert(cfh->cfd()->ioptions()); - - return *cfh->cfd()->ioptions(); -} - -std::tuple -WriteBatchWithIndexInternal::GetStatsLoggerAndClock( - const ImmutableOptions& cf_opts) const { - if (db_) { - const auto& db_opts = static_cast_with_check(db_->GetRootDB()) - ->immutable_db_options(); - - return {db_opts.logger, db_opts.statistics.get(), db_opts.clock}; - } - - if (db_options_) { - assert(db_options_->env); - - return {db_options_->info_log.get(), db_options_->statistics.get(), - db_options_->env->GetSystemClock().get()}; - } - - return {cf_opts.logger, cf_opts.stats, cf_opts.clock}; -} - -Status WriteBatchWithIndexInternal::MergeKey(const Slice& key, - const MergeContext& context, - std::string* result) const { +Status WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + ColumnFamilyHandle* column_family, const Slice& key, + const MergeContext& context, std::string* result) { // TODO: support wide columns in WBWI - if (!column_family_) { - return Status::InvalidArgument("Must provide a column_family"); + if (!column_family) { + return Status::InvalidArgument("Must provide a column family"); } - const auto& cf_opts = GetCFOptions(); + const auto& ioptions = GetImmutableOptions(column_family); - const auto* merge_operator = cf_opts.merge_operator.get(); + const auto* merge_operator = ioptions.merge_operator.get(); if (!merge_operator) { return Status::InvalidArgument( - "Merge_operator must be set for column_family"); + "Merge operator must be set for column family"); } - auto [logger, statistics, clock] = GetStatsLoggerAndClock(cf_opts); - // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its value. return MergeHelper::TimedFullMerge( merge_operator, key, MergeHelper::kNoBaseValue, context.GetOperands(), - logger, statistics, clock, /* update_num_ops_stats */ false, result, + ioptions.logger, ioptions.stats, ioptions.clock, + /* update_num_ops_stats */ false, result, /* columns */ nullptr, /* op_failure_scope */ nullptr); } -Status WriteBatchWithIndexInternal::MergeKey(const Slice& key, - const Slice& value, - const MergeContext& context, - std::string* result) const { +Status WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + ColumnFamilyHandle* column_family, const Slice& key, const Slice& value, + const MergeContext& context, std::string* result) { // TODO: support wide columns in WBWI - if (!column_family_) { - return Status::InvalidArgument("Must provide a column_family"); + if (!column_family) { + return Status::InvalidArgument("Must provide a column family"); } - const auto& cf_opts = GetCFOptions(); + const auto& ioptions = GetImmutableOptions(column_family); - const auto* merge_operator = cf_opts.merge_operator.get(); + const auto* merge_operator = ioptions.merge_operator.get(); if (!merge_operator) { return Status::InvalidArgument( - "Merge_operator must be set for column_family"); + "Merge operator must be set for column family"); } - auto [logger, statistics, clock] = GetStatsLoggerAndClock(cf_opts); - // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its value. return MergeHelper::TimedFullMerge( merge_operator, key, MergeHelper::kPlainBaseValue, value, - context.GetOperands(), logger, statistics, clock, + context.GetOperands(), ioptions.logger, ioptions.stats, ioptions.clock, /* update_num_ops_stats */ false, result, /* columns */ nullptr, /* op_failure_scope */ nullptr); } WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( - WriteBatchWithIndex* batch, const Slice& key, MergeContext* context, - std::string* value, Status* s) { + WriteBatchWithIndex* batch, ColumnFamilyHandle* column_family, + const Slice& key, MergeContext* context, std::string* value, Status* s) { *s = Status::OK(); std::unique_ptr iter( static_cast_with_check( - batch->NewIterator(column_family_))); + batch->NewIterator(column_family))); // Search the iterator for this key, and updates/merges to it. iter->Seek(key); @@ -754,7 +708,8 @@ WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( } else if (result == WBWIIteratorImpl::Result::kFound) { // PUT Slice entry_value = iter->Entry().value; if (context->GetNumOperands() > 0) { - *s = MergeKey(key, entry_value, *context, value); + *s = MergeKeyWithPlainBaseValue(column_family, key, entry_value, *context, + value); if (!s->ok()) { result = WBWIIteratorImpl::Result::kError; } @@ -763,7 +718,7 @@ WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( } } else if (result == WBWIIteratorImpl::kDeleted) { if (context->GetNumOperands() > 0) { - *s = MergeKey(key, *context, value); + *s = MergeKeyWithNoBaseValue(column_family, key, *context, value); if (s->ok()) { result = WBWIIteratorImpl::Result::kFound; } else { diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index c8c201804..35d550767 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -20,9 +20,7 @@ namespace ROCKSDB_NAMESPACE { -class MergeContext; class WBWIIteratorImpl; -class WriteBatchWithIndexInternal; struct Options; struct ImmutableOptions; @@ -63,16 +61,17 @@ class BaseDeltaIterator : public Iterator { bool DeltaValid() const; void UpdateCurrent(); - std::unique_ptr wbwii_; bool forward_; bool current_at_base_; bool equal_keys_; mutable Status status_; + ColumnFamilyHandle* column_family_; std::unique_ptr base_iterator_; std::unique_ptr delta_iterator_; const Comparator* comparator_; // not owned const Slice* iterate_upper_bound_; - mutable PinnableSlice merge_result_; + MergeContext merge_context_; + mutable std::string merge_result_; }; // Key used by skip list, as the binary searchable index of WriteBatchWithIndex. @@ -297,14 +296,15 @@ class WriteBatchWithIndexInternal { static const Comparator* GetUserComparator(const WriteBatchWithIndex& wbwi, uint32_t cf_id); - // For GetFromBatchAndDB or similar - explicit WriteBatchWithIndexInternal(DB* db, - ColumnFamilyHandle* column_family); - // For GetFromBatchAndDB or similar - explicit WriteBatchWithIndexInternal(ColumnFamilyHandle* column_family); - // For GetFromBatch or similar - explicit WriteBatchWithIndexInternal(const DBOptions* db_options, - ColumnFamilyHandle* column_family); + static Status MergeKeyWithNoBaseValue(ColumnFamilyHandle* column_family, + const Slice& key, + const MergeContext& context, + std::string* result); + + static Status MergeKeyWithPlainBaseValue(ColumnFamilyHandle* column_family, + const Slice& key, const Slice& value, + const MergeContext& context, + std::string* result); // If batch contains a value for key, store it in *value and return kFound. // If batch contains a deletion for key, return Deleted. @@ -314,44 +314,10 @@ class WriteBatchWithIndexInternal { // and return kMergeInProgress // If batch does not contain this key, return kNotFound // Else, return kError on error with error Status stored in *s. - WBWIIteratorImpl::Result GetFromBatch(WriteBatchWithIndex* batch, - const Slice& key, std::string* value, - Status* s) { - return GetFromBatch(batch, key, &merge_context_, value, s); - } - WBWIIteratorImpl::Result GetFromBatch(WriteBatchWithIndex* batch, - const Slice& key, - MergeContext* merge_context, - std::string* value, Status* s); - - // Merge with no base value - Status MergeKey(const Slice& key, const MergeContext& context, - std::string* result) const; - Status MergeKey(const Slice& key, std::string* result) const { - return MergeKey(key, merge_context_, result); - } - - // Merge with plain base value - Status MergeKey(const Slice& key, const Slice& value, - const MergeContext& context, std::string* result) const; - Status MergeKey(const Slice& key, const Slice& value, - std::string* result) const { - return MergeKey(key, value, merge_context_, result); - } - - size_t GetNumOperands() const { return merge_context_.GetNumOperands(); } - MergeContext* GetMergeContext() { return &merge_context_; } - Slice GetOperand(int index) const { return merge_context_.GetOperand(index); } - - private: - const ImmutableOptions& GetCFOptions() const; - std::tuple GetStatsLoggerAndClock( - const ImmutableOptions& cf_opts) const; - - DB* db_; - const DBOptions* db_options_; - ColumnFamilyHandle* column_family_; - MergeContext merge_context_; + static WBWIIteratorImpl::Result GetFromBatch( + WriteBatchWithIndex* batch, ColumnFamilyHandle* column_family, + const Slice& key, MergeContext* merge_context, std::string* value, + Status* s); }; } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index b438d7d23..c69dd39a2 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -7,7 +7,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. - #include "rocksdb/utilities/write_batch_with_index.h" #include @@ -2248,6 +2247,8 @@ TEST_F(WBWIOverwriteTest, TestBadMergeOperator) { } TEST_P(WriteBatchWithIndexTest, ColumnFamilyWithTimestamp) { + ASSERT_OK(OpenDB()); + ColumnFamilyHandleImplDummy cf2(2, test::BytewiseComparatorWithU64TsWrapper()); @@ -2263,10 +2264,9 @@ TEST_P(WriteBatchWithIndexTest, ColumnFamilyWithTimestamp) { .IsInvalidArgument()); { std::string value; - ASSERT_TRUE(batch_ - ->GetFromBatchAndDB( - /*db=*/nullptr, ReadOptions(), &cf2, "key", &value) - .IsInvalidArgument()); + ASSERT_TRUE( + batch_->GetFromBatchAndDB(db_, ReadOptions(), &cf2, "key", &value) + .IsInvalidArgument()); } { constexpr size_t num_keys = 2; @@ -2275,8 +2275,8 @@ TEST_P(WriteBatchWithIndexTest, ColumnFamilyWithTimestamp) { {PinnableSlice(), PinnableSlice()}}; std::array statuses{{Status(), Status()}}; constexpr bool sorted_input = false; - batch_->MultiGetFromBatchAndDB(/*db=*/nullptr, ReadOptions(), &cf2, - num_keys, keys.data(), pinnable_vals.data(), + batch_->MultiGetFromBatchAndDB(db_, ReadOptions(), &cf2, num_keys, + keys.data(), pinnable_vals.data(), statuses.data(), sorted_input); for (const auto& s : statuses) { ASSERT_TRUE(s.IsInvalidArgument()); @@ -2406,4 +2406,3 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } - From 229a6e5f55a0a67f87c5f3a22da9f46305164291 Mon Sep 17 00:00:00 2001 From: darionyaphet Date: Mon, 9 Oct 2023 19:05:48 -0700 Subject: [PATCH 184/386] Remove unnecessary comments (#11833) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11833 Reviewed By: jaykorean Differential Revision: D50103376 Pulled By: ltamasi fbshipit-source-id: 0da49252c3e584b9d77e9fd3f27453d4b24afe6e --- include/rocksdb/advanced_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rocksdb/advanced_cache.h b/include/rocksdb/advanced_cache.h index a5a19d3a0..b5dcc3d49 100644 --- a/include/rocksdb/advanced_cache.h +++ b/include/rocksdb/advanced_cache.h @@ -68,7 +68,7 @@ class Cache { enum class Priority { HIGH, LOW, BOTTOM }; // A set of callbacks to allow objects in the primary block cache to be - // be persisted in a secondary cache. The purpose of the secondary cache + // persisted in a secondary cache. The purpose of the secondary cache // is to support other ways of caching the object, such as persistent or // compressed data, that may require the object to be parsed and transformed // in some way. Since the primary cache holds C++ objects and the secondary From ee0829ba76a1edc3257fd3078262b7e064aede82 Mon Sep 17 00:00:00 2001 From: darionyaphet Date: Mon, 9 Oct 2023 19:10:06 -0700 Subject: [PATCH 185/386] fix typo snapshto (#11817) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11817 Reviewed By: jaykorean Differential Revision: D50103497 Pulled By: ltamasi fbshipit-source-id: 77c5cf86ff7eb5021fc91b03225882536163af7b --- db/db_impl/db_impl_compaction_flush.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index e77680d40..91b85de87 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -234,7 +234,7 @@ Status DBImpl::FlushMemTableToOutputFile( // releases and re-acquires the db mutex. In the meantime, the application // can still insert into the memtables and increase the db's sequence number. // The application can take a snapshot, hoping that the latest visible state - // to this snapshto is preserved. This is hard to guarantee since db mutex + // to this snapshot is preserved. This is hard to guarantee since db mutex // not held. This newly-created snapshot is not included in `snapshot_seqs` // and the flush job is unaware of its presence. Consequently, the flush job // may drop certain keys when generating the L0, causing incorrect data to be From 8a9cfd52924c58e74935a604f89bd87f318b4ac3 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 10 Oct 2023 06:29:01 -0700 Subject: [PATCH 186/386] Make stopped writes block on recovery (#11879) Summary: Relaxed the constraints for blocking when writes are stopped. When a recovery is already being attempted, we might as well let `!no_slowdown` writes wait on it in case it succeeds. This makes the user-visible behavior consistent across recovery flush and non-recovery flush. This enables `db_stress` to inject retryable (soft) flush read errors without having to handle user write failures. I changed `db_stress` a bit to permit injected errors in much more foreground operations as more admin operations (like `GetLiveFiles()`) can fail on a retryable error during flush. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11879 Reviewed By: anand1976 Differential Revision: D49571196 Pulled By: ajkr fbshipit-source-id: 5d516d6faf20d2c6bfe0594ab4f2706bca6d69b0 --- db/db_impl/db_impl_write.cc | 10 +-- db_stress_tool/db_stress_listener.h | 12 +++ db_stress_tool/db_stress_test_base.cc | 74 +++++++------------ db_stress_tool/db_stress_test_base.h | 4 +- db_stress_tool/multi_ops_txns_stress.cc | 25 ++++--- .../stopped_writes_wait_for_recovery.md | 1 + 6 files changed, 61 insertions(+), 65 deletions(-) create mode 100644 unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index bc260c5a8..505a37883 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1858,11 +1858,11 @@ Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread, write_thread.EndWriteStall(); } - // Don't wait if there's a background error, even if its a soft error. We - // might wait here indefinitely as the background compaction may never - // finish successfully, resulting in the stall condition lasting - // indefinitely - while (error_handler_.GetBGError().ok() && write_controller_.IsStopped() && + // Don't wait if there's a background error that is not pending recovery + // since recovery might never be attempted. + while ((error_handler_.GetBGError().ok() || + error_handler_.IsRecoveryInProgress()) && + write_controller_.IsStopped() && !shutting_down_.load(std::memory_order_relaxed)) { if (write_options.no_slowdown) { return Status::Incomplete("Write stall"); diff --git a/db_stress_tool/db_stress_listener.h b/db_stress_tool/db_stress_listener.h index aba95d4c0..505b0a604 100644 --- a/db_stress_tool/db_stress_listener.h +++ b/db_stress_tool/db_stress_listener.h @@ -71,11 +71,23 @@ class DbStressListener : public EventListener { VerifyFilePath(info.file_path); // pretending doing some work here RandomSleep(); + if (FLAGS_read_fault_one_in) { + (void)fault_fs_guard->GetAndResetErrorCount(); + fault_fs_guard->DisableErrorInjection(); + } } void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*flush_job_info*/) override { RandomSleep(); + if (FLAGS_read_fault_one_in) { + // Hardcoded to inject retryable error as a non-retryable error would put + // the DB in read-only mode and then it would crash on the next write. + fault_fs_guard->SetThreadLocalReadErrorContext( + static_cast(FLAGS_seed), FLAGS_read_fault_one_in, + true /* retryable */); + fault_fs_guard->EnableErrorInjection(); + } } void OnTableFileDeleted(const TableFileDeletionInfo& /*info*/) override { diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index f7dee86b2..5b843eb5d 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -415,10 +415,22 @@ Status StressTest::AssertSame(DB* db, ColumnFamilyHandle* cf, return Status::OK(); } -void StressTest::VerificationAbort(SharedState* shared, std::string msg, - Status s) const { - fprintf(stderr, "Verification failed: %s. Status is %s\n", msg.c_str(), - s.ToString().c_str()); +void StressTest::ProcessStatus(SharedState* shared, std::string opname, + Status s) const { + if (s.ok()) { + return; + } + if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + std::ostringstream oss; + oss << opname << " failed: " << s.ToString(); + VerificationAbort(shared, oss.str()); + assert(false); + } + fprintf(stdout, "%s failed: %s\n", opname.c_str(), s.ToString().c_str()); +} + +void StressTest::VerificationAbort(SharedState* shared, std::string msg) const { + fprintf(stderr, "Verification failed: %s\n", msg.c_str()); shared->SetVerificationFailure(); } @@ -910,35 +922,24 @@ void StressTest::OperateDb(ThreadState* thread) { if (thread->rand.OneInOpt(FLAGS_get_live_files_one_in) && !FLAGS_write_fault_one_in) { Status status = VerifyGetLiveFiles(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyGetLiveFiles status not OK", status); - } + ProcessStatus(shared, "VerifyGetLiveFiles", status); } // Verify GetSortedWalFiles with a 1 in N chance. if (thread->rand.OneInOpt(FLAGS_get_sorted_wal_files_one_in)) { Status status = VerifyGetSortedWalFiles(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyGetSortedWalFiles status not OK", - status); - } + ProcessStatus(shared, "VerifyGetSortedWalFiles", status); } // Verify GetCurrentWalFile with a 1 in N chance. if (thread->rand.OneInOpt(FLAGS_get_current_wal_file_one_in)) { Status status = VerifyGetCurrentWalFile(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyGetCurrentWalFile status not OK", - status); - } + ProcessStatus(shared, "VerifyGetCurrentWalFile", status); } if (thread->rand.OneInOpt(FLAGS_pause_background_one_in)) { Status status = TestPauseBackground(thread); - if (!status.ok()) { - VerificationAbort( - shared, "Pause/ContinueBackgroundWork status not OK", status); - } + ProcessStatus(shared, "Pause/ContinueBackgroundWork", status); } if (thread->rand.OneInOpt(FLAGS_verify_checksum_one_in)) { @@ -947,9 +948,7 @@ void StressTest::OperateDb(ThreadState* thread) { ThreadStatus::OperationType::OP_VERIFY_DB_CHECKSUM); Status status = db_->VerifyChecksum(); ThreadStatusUtil::ResetThreadStatus(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyChecksum status not OK", status); - } + ProcessStatus(shared, "VerifyChecksum", status); } if (thread->rand.OneInOpt(FLAGS_verify_file_checksums_one_in)) { @@ -958,10 +957,7 @@ void StressTest::OperateDb(ThreadState* thread) { ThreadStatus::OperationType::OP_VERIFY_FILE_CHECKSUMS); Status status = db_->VerifyFileChecksums(read_opts); ThreadStatusUtil::ResetThreadStatus(); - if (!status.ok()) { - VerificationAbort(shared, "VerifyFileChecksums status not OK", - status); - } + ProcessStatus(shared, "VerifyFileChecksums", status); } if (thread->rand.OneInOpt(FLAGS_get_property_one_in)) { @@ -988,35 +984,19 @@ void StressTest::OperateDb(ThreadState* thread) { if (total_size <= FLAGS_backup_max_size) { Status s = TestBackupRestore(thread, rand_column_families, rand_keys); - if (!s.ok()) { - if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { - VerificationAbort(shared, - "Backup/restore gave inconsistent state", s); - } else { - fprintf(stdout, "Backup/restore failed: %s\n", - s.ToString().c_str()); - } - } + ProcessStatus(shared, "Backup/restore", s); } } if (thread->rand.OneInOpt(FLAGS_checkpoint_one_in)) { Status s = TestCheckpoint(thread, rand_column_families, rand_keys); - if (!s.ok()) { - if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { - VerificationAbort(shared, "Checkpoint gave inconsistent state", s); - } else { - fprintf(stdout, "Checkpoint failed: %s\n", s.ToString().c_str()); - } - } + ProcessStatus(shared, "Checkpoint", s); } if (thread->rand.OneInOpt(FLAGS_approximate_size_one_in)) { Status s = TestApproximateSize(thread, i, rand_column_families, rand_keys); - if (!s.ok()) { - VerificationAbort(shared, "ApproximateSize Failed", s); - } + ProcessStatus(shared, "ApproximateSize", s); } if (thread->rand.OneInOpt(FLAGS_acquire_snapshot_one_in)) { TestAcquireSnapshot(thread, rand_column_family, keystr, i); @@ -1024,9 +1004,7 @@ void StressTest::OperateDb(ThreadState* thread) { /*always*/ { Status s = MaybeReleaseSnapshots(thread, i); - if (!s.ok()) { - VerificationAbort(shared, "Snapshot gave inconsistent state", s); - } + ProcessStatus(shared, "Snapshot", s); } // Assign timestamps if necessary. diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h index 3008f0366..fad4926aa 100644 --- a/db_stress_tool/db_stress_test_base.h +++ b/db_stress_tool/db_stress_test_base.h @@ -224,7 +224,9 @@ class StressTest { return Status::NotSupported("TestCustomOperations() must be overridden"); } - void VerificationAbort(SharedState* shared, std::string msg, Status s) const; + void ProcessStatus(SharedState* shared, std::string msg, Status s) const; + + void VerificationAbort(SharedState* shared, std::string msg) const; void VerificationAbort(SharedState* shared, std::string msg, int cf, int64_t key) const; diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc index 1591a52e9..c7d38339b 100644 --- a/db_stress_tool/multi_ops_txns_stress.cc +++ b/db_stress_tool/multi_ops_txns_stress.cc @@ -1104,8 +1104,9 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { Status s = record.DecodePrimaryIndexEntry(it->key(), it->value()); if (!s.ok()) { oss << "Cannot decode primary index entry " << it->key().ToString(true) - << "=>" << it->value().ToString(true); - VerificationAbort(thread->shared, oss.str(), s); + << "=>" << it->value().ToString(true) << ". Status is " + << s.ToString(); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } @@ -1125,8 +1126,9 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { std::string value; s = db_->Get(ropts, sk, &value); if (!s.ok()) { - oss << "Cannot find secondary index entry " << sk.ToString(true); - VerificationAbort(thread->shared, oss.str(), s); + oss << "Cannot find secondary index entry " << sk.ToString(true) + << ". Status is " << s.ToString(); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } @@ -1153,8 +1155,9 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value()); if (!s.ok()) { oss << "Cannot decode secondary index entry " - << it->key().ToString(true) << "=>" << it->value().ToString(true); - VerificationAbort(thread->shared, oss.str(), s); + << it->key().ToString(true) << "=>" << it->value().ToString(true) + << ". Status is " << s.ToString(); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } @@ -1168,7 +1171,7 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { if (!s.ok()) { oss << "Error searching pk " << Slice(pk).ToString(true) << ". " << s.ToString() << ". sk " << it->key().ToString(true); - VerificationAbort(thread->shared, oss.str(), s); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } @@ -1176,8 +1179,8 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { s = std::get<0>(result); if (!s.ok()) { oss << "Error decoding primary index value " - << Slice(value).ToString(true) << ". " << s.ToString(); - VerificationAbort(thread->shared, oss.str(), s); + << Slice(value).ToString(true) << ". Status is " << s.ToString(); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } @@ -1187,7 +1190,7 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { << Slice(value).ToString(true) << " (a=" << record.a_value() << ", c=" << c_in_primary << "), sk: " << it->key().ToString(true) << " (c=" << record.c_value() << ")"; - VerificationAbort(thread->shared, oss.str(), s); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } @@ -1198,7 +1201,7 @@ void MultiOpsTxnsStressTest::VerifyDb(ThreadState* thread) const { oss << "Pk/sk mismatch: primary index has " << primary_index_entries_count << " entries. Secondary index has " << secondary_index_entries_count << " entries."; - VerificationAbort(thread->shared, oss.str(), Status::OK()); + VerificationAbort(thread->shared, oss.str()); assert(false); return; } diff --git a/unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md b/unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md new file mode 100644 index 000000000..2c44d5572 --- /dev/null +++ b/unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md @@ -0,0 +1 @@ +* During a write stop, writes now block on in-progress recovery attempts From 77d160ef476caac76e8f271526c0d0dcadba8b48 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 10 Oct 2023 06:31:45 -0700 Subject: [PATCH 187/386] Consolidate `ErrorHandler`'s recovery status variables (#11937) Summary: cbi42 pointed out a race condition in which `recovery_io_error_` and `recovery_error_` could be updated inconsistently due to releasing the DB mutex in `EventHelpers::NotifyOnBackgroundError()`. There doesn't seem to be a point to having two status objects, so this PR consolidates them. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11937 Reviewed By: cbi42 Differential Revision: D50105793 Pulled By: ajkr fbshipit-source-id: 3de95baccfa44351a49a5c2aa0986c9bc81baa8f --- db/error_handler.cc | 41 +++++++++++++++-------------------------- db/error_handler.h | 6 +----- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/db/error_handler.cc b/db/error_handler.cc index 04a988318..018183ba6 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -320,7 +320,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err, // Check if recovery is currently in progress. If it is, we will save this // error so we can check it at the end to see if recovery succeeded or not if (recovery_in_prog_ && recovery_error_.ok()) { - recovery_error_ = new_bg_err; + recovery_error_ = status_to_io_status(Status(new_bg_err)); } bool auto_recovery = auto_recovery_; @@ -396,9 +396,6 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s", bg_io_err.ToString().c_str()); - if (recovery_in_prog_ && recovery_io_error_.ok()) { - recovery_io_error_ = bg_io_err; - } if (BackgroundErrorReason::kManifestWrite == reason || BackgroundErrorReason::kManifestWriteNoWAL == reason) { // Always returns ok @@ -502,10 +499,6 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT_MISSPELLED); } - // HandleKnownErrors() will use recovery_error_, so ignore - // recovery_io_error_. - // TODO: Do some refactoring and use only one recovery_error_ - recovery_io_error_.PermitUncheckedError(); return HandleKnownErrors(new_bg_io_err, reason); } } @@ -562,9 +555,9 @@ Status ErrorHandler::ClearBGError() { old_bg_error.PermitUncheckedError(); // Clear and check the recovery IO and BG error bg_error_ = Status::OK(); - recovery_io_error_ = IOStatus::OK(); + recovery_error_ = IOStatus::OK(); bg_error_.PermitUncheckedError(); - recovery_io_error_.PermitUncheckedError(); + recovery_error_.PermitUncheckedError(); recovery_in_prog_ = false; soft_error_no_bg_work_ = false; EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error, @@ -602,14 +595,14 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) { if (bg_error_.severity() == Status::Severity::kSoftError && recover_context_.flush_reason == FlushReason::kErrorRecovery) { // Simply clear the background error and return - recovery_error_ = Status::OK(); + recovery_error_ = IOStatus::OK(); return ClearBGError(); } // Reset recovery_error_. We will use this to record any errors that happen // during the recovery process. While recovering, the only operations that // can generate background errors should be the flush operations - recovery_error_ = Status::OK(); + recovery_error_ = IOStatus::OK(); recovery_error_.PermitUncheckedError(); Status s = db_->ResumeImpl(recover_context_); if (s.ok()) { @@ -659,7 +652,7 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( recovery_thread_.reset( new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); - if (recovery_io_error_.ok() && recovery_error_.ok()) { + if (recovery_error_.ok()) { return recovery_error_; } else { return bg_error_; @@ -696,8 +689,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { } TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume0"); TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume1"); - recovery_io_error_ = IOStatus::OK(); - recovery_error_ = Status::OK(); + recovery_error_ = IOStatus::OK(); retry_count++; Status s = db_->ResumeImpl(context); if (bg_error_stats_ != nullptr) { @@ -717,9 +709,9 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { bg_error_, db_mutex_); return; } - if (!recovery_io_error_.ok() && + if (!recovery_error_.ok() && recovery_error_.severity() <= Status::Severity::kHardError && - recovery_io_error_.GetRetryable()) { + recovery_error_.GetRetryable()) { // If new BG IO error happens during auto recovery and it is retryable // and its severity is Hard Error or lower, the auto resmue sleep for // a period of time and redo auto resume if it is allowed. @@ -728,10 +720,10 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { int64_t wait_until = db_options_.clock->NowMicros() + wait_interval; cv_.TimedWait(wait_until); } else { - // There are three possibility: 1) recover_io_error is set during resume + // There are three possibility: 1) recovery_error_ is set during resume // and the error is not retryable, 2) recover is successful, 3) other // error happens during resume and cannot be resumed here. - if (recovery_io_error_.ok() && recovery_error_.ok() && s.ok()) { + if (recovery_error_.ok() && s.ok()) { // recover from the retryable IO error and no other BG errors. Clean // the bg_error and notify user. TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess"); @@ -753,8 +745,8 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { } return; } else { - // In this case: 1) recovery_io_error is more serious or not retryable - // 2) other Non IO recovery_error happens. The auto recovery stops. + // In this case: 1) recovery_error_ is more serious or not retryable + // 2) other error happens. The auto recovery stops. recovery_in_prog_ = false; if (bg_error_stats_ != nullptr) { RecordInHistogram(bg_error_stats_.get(), @@ -762,10 +754,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { } EventHelpers::NotifyOnErrorRecoveryEnd( db_options_.listeners, bg_error_, - !recovery_io_error_.ok() - ? recovery_io_error_ - : (!recovery_error_.ok() ? recovery_error_ : s), - db_mutex_); + !recovery_error_.ok() ? recovery_error_ : s, db_mutex_); return; } } @@ -785,7 +774,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { void ErrorHandler::CheckAndSetRecoveryAndBGError(const Status& bg_err) { if (recovery_in_prog_ && recovery_error_.ok()) { - recovery_error_ = bg_err; + recovery_error_ = status_to_io_status(Status(bg_err)); } if (bg_err.severity() > bg_error_.severity()) { bg_error_ = bg_err; diff --git a/db/error_handler.h b/db/error_handler.h index 6b1e80286..f444a8f8d 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -46,7 +46,6 @@ class ErrorHandler { // Clear the checked flag for uninitialized errors bg_error_.PermitUncheckedError(); recovery_error_.PermitUncheckedError(); - recovery_io_error_.PermitUncheckedError(); } void EnableAutoRecovery() { auto_recovery_ = true; } @@ -87,10 +86,7 @@ class ErrorHandler { Status bg_error_; // A separate Status variable used to record any errors during the // recovery process from hard errors - Status recovery_error_; - // A separate IO Status variable used to record any IO errors during - // the recovery process. At the same time, recovery_error_ is also set. - IOStatus recovery_io_error_; + IOStatus recovery_error_; // The condition variable used with db_mutex during auto resume for time // wait. InstrumentedCondVar cv_; From f1aa17c73ff00de3fbf524c8e21614dd5561206b Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Tue, 10 Oct 2023 08:40:07 -0700 Subject: [PATCH 188/386] Lazy load java native library (#11919) Summary: This address https://github.com/facebook/rocksdb/issues/11277. Java native library is not anymore loaded until the code is first used. It should allow to manually load native library from different location with `RocksDB#loadLibrary(List)` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11919 Reviewed By: jaykorean Differential Revision: D50103182 Pulled By: ltamasi fbshipit-source-id: 6090b529c7299b032f4e93cd0c3025a60f58652f --- .../java/org/rocksdb/ColumnFamilyOptions.java | 9 ++-- .../main/java/org/rocksdb/ConfigOptions.java | 9 ++-- java/src/main/java/org/rocksdb/DBOptions.java | 10 +++-- java/src/main/java/org/rocksdb/Env.java | 41 ++++++++++++------- .../src/main/java/org/rocksdb/EnvOptions.java | 10 ++--- .../main/java/org/rocksdb/FlushOptions.java | 11 +++-- java/src/main/java/org/rocksdb/Options.java | 9 ++-- java/src/main/java/org/rocksdb/RocksDB.java | 7 +--- .../main/java/org/rocksdb/SstFileReader.java | 3 -- .../main/java/org/rocksdb/SstFileWriter.java | 3 -- .../java/org/rocksdb/WriteBufferManager.java | 10 +++-- 11 files changed, 65 insertions(+), 57 deletions(-) diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index 8274ebeea..aca72e06e 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -18,9 +18,6 @@ public class ColumnFamilyOptions extends RocksObject implements ColumnFamilyOptionsInterface, MutableColumnFamilyOptionsInterface { - static { - RocksDB.loadLibrary(); - } /** * Construct ColumnFamilyOptions. @@ -29,7 +26,7 @@ public class ColumnFamilyOptions extends RocksObject * an {@code rocksdb::ColumnFamilyOptions} in the c++ side. */ public ColumnFamilyOptions() { - super(newColumnFamilyOptions()); + super(newColumnFamilyOptionsInstance()); } /** @@ -1333,6 +1330,10 @@ private static native long getColumnFamilyOptionsFromProps( final long cfgHandle, String optString); private static native long getColumnFamilyOptionsFromProps(final String optString); + private static long newColumnFamilyOptionsInstance() { + RocksDB.loadLibrary(); + return newColumnFamilyOptions(); + } private static native long newColumnFamilyOptions(); private static native long copyColumnFamilyOptions(final long handle); private static native long newColumnFamilyOptionsFromOptions( diff --git a/java/src/main/java/org/rocksdb/ConfigOptions.java b/java/src/main/java/org/rocksdb/ConfigOptions.java index 026f8b01d..fa69b9896 100644 --- a/java/src/main/java/org/rocksdb/ConfigOptions.java +++ b/java/src/main/java/org/rocksdb/ConfigOptions.java @@ -7,15 +7,12 @@ package org.rocksdb; public class ConfigOptions extends RocksObject { - static { - RocksDB.loadLibrary(); - } /** * Construct with default Options */ public ConfigOptions() { - super(newConfigOptions()); + super(newConfigOptionsInstance()); } public ConfigOptions setDelimiter(final String delimiter) { @@ -44,6 +41,10 @@ public ConfigOptions setSanityLevel(final SanityLevel level) { @Override protected final native void disposeInternal(final long handle); + private static long newConfigOptionsInstance() { + RocksDB.loadLibrary(); + return newConfigOptions(); + } private static native long newConfigOptions(); private static native void setEnv(final long handle, final long envHandle); private static native void setDelimiter(final long handle, final String delimiter); diff --git a/java/src/main/java/org/rocksdb/DBOptions.java b/java/src/main/java/org/rocksdb/DBOptions.java index 62ad137ee..4d66b15f2 100644 --- a/java/src/main/java/org/rocksdb/DBOptions.java +++ b/java/src/main/java/org/rocksdb/DBOptions.java @@ -18,9 +18,6 @@ public class DBOptions extends RocksObject implements DBOptionsInterface, MutableDBOptionsInterface { - static { - RocksDB.loadLibrary(); - } /** * Construct DBOptions. @@ -29,7 +26,7 @@ public class DBOptions extends RocksObject * an {@code rocksdb::DBOptions} in the c++ side. */ public DBOptions() { - super(newDBOptions()); + super(newDBOptionsInstance()); numShardBits_ = DEFAULT_NUM_SHARD_BITS; env_ = Env.getDefault(); } @@ -1253,7 +1250,12 @@ private DBOptions(final long nativeHandle) { private static native long getDBOptionsFromProps(long cfgHandle, String optString); private static native long getDBOptionsFromProps(String optString); + private static long newDBOptionsInstance() { + RocksDB.loadLibrary(); + return newDBOptions(); + } private static native long newDBOptions(); + private static native long copyDBOptions(final long handle); private static native long newDBOptionsFromOptions(final long optionsHandle); @Override protected final native void disposeInternal(final long handle); diff --git a/java/src/main/java/org/rocksdb/Env.java b/java/src/main/java/org/rocksdb/Env.java index db4c6fd78..9499cf3b6 100644 --- a/java/src/main/java/org/rocksdb/Env.java +++ b/java/src/main/java/org/rocksdb/Env.java @@ -7,25 +7,13 @@ import java.util.Arrays; import java.util.List; +import java.util.concurrent.atomic.AtomicReference; /** * Base class for all Env implementations in RocksDB. */ public abstract class Env extends RocksObject { - - static { - RocksDB.loadLibrary(); - } - - private static final Env DEFAULT_ENV = new RocksEnv(getDefaultEnvInternal()); - static { - /* - * The Ownership of the Default Env belongs to C++ - * and so we disown the native handle here so that - * we cannot accidentally free it from Java. - */ - DEFAULT_ENV.disOwnNativeHandle(); - } + private static final AtomicReference SINGULAR_DEFAULT_ENV = new AtomicReference<>(null); /** *

Returns the default environment suitable for the current operating @@ -39,7 +27,30 @@ public abstract class Env extends RocksObject { * @return the default {@link org.rocksdb.RocksEnv} instance. */ public static Env getDefault() { - return DEFAULT_ENV; + RocksEnv defaultEnv = null; + RocksEnv newDefaultEnv = null; + + while ((defaultEnv = SINGULAR_DEFAULT_ENV.get()) == null) { + // construct the RocksEnv only once in this thread + if (newDefaultEnv == null) { + // load the library just in-case it isn't already loaded! + RocksDB.loadLibrary(); + + newDefaultEnv = new RocksEnv(getDefaultEnvInternal()); + + /* + * The Ownership of the Default Env belongs to C++ + * and so we disown the native handle here so that + * we cannot accidentally free it from Java. + */ + newDefaultEnv.disOwnNativeHandle(); + } + + // use CAS to gracefully handle thread pre-emption + SINGULAR_DEFAULT_ENV.compareAndSet(null, newDefaultEnv); + } + + return defaultEnv; } /** diff --git a/java/src/main/java/org/rocksdb/EnvOptions.java b/java/src/main/java/org/rocksdb/EnvOptions.java index 5cb193ac1..fd56bc49e 100644 --- a/java/src/main/java/org/rocksdb/EnvOptions.java +++ b/java/src/main/java/org/rocksdb/EnvOptions.java @@ -9,15 +9,11 @@ * Options while opening a file to read/write */ public class EnvOptions extends RocksObject { - static { - RocksDB.loadLibrary(); - } - /** * Construct with default Options */ public EnvOptions() { - super(newEnvOptions()); + super(newEnvOptionsInstance()); } /** @@ -323,6 +319,10 @@ public RateLimiter rateLimiter() { return rateLimiter; } + private static long newEnvOptionsInstance() { + RocksDB.loadLibrary(); + return newEnvOptions(); + } private static native long newEnvOptions(); private static native long newEnvOptions(final long dboptions_handle); @Override protected final native void disposeInternal(final long handle); diff --git a/java/src/main/java/org/rocksdb/FlushOptions.java b/java/src/main/java/org/rocksdb/FlushOptions.java index 0ec835089..be8c4bc94 100644 --- a/java/src/main/java/org/rocksdb/FlushOptions.java +++ b/java/src/main/java/org/rocksdb/FlushOptions.java @@ -10,15 +10,11 @@ * {@link org.rocksdb.RocksDB}. */ public class FlushOptions extends RocksObject { - static { - RocksDB.loadLibrary(); - } - /** * Construct a new instance of FlushOptions. */ public FlushOptions(){ - super(newFlushOptions()); + super(newFlushOptionsInance()); } /** @@ -77,7 +73,10 @@ public boolean allowWriteStall() { assert(isOwningHandle()); return allowWriteStall(nativeHandle_); } - + private static long newFlushOptionsInance() { + RocksDB.loadLibrary(); + return newFlushOptions(); + } private static native long newFlushOptions(); @Override protected final native void disposeInternal(final long handle); diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index d00b489ab..2d735e5c3 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -20,9 +20,6 @@ public class Options extends RocksObject MutableDBOptionsInterface, ColumnFamilyOptionsInterface, MutableColumnFamilyOptionsInterface { - static { - RocksDB.loadLibrary(); - } /** * Converts the input properties into a Options-style formatted string @@ -50,7 +47,7 @@ public static String getOptionStringFromProps(final Properties properties) { * an {@code rocksdb::Options} in the c++ side. */ public Options() { - super(newOptions()); + super(newOptionsInstance()); env_ = Env.getDefault(); } @@ -2129,6 +2126,10 @@ public PrepopulateBlobCache prepopulateBlobCache() { // END options for blobs (integrated BlobDB) // + private static long newOptionsInstance() { + RocksDB.loadLibrary(); + return newOptions(); + } private static native long newOptions(); private static native long newOptions(long dbOptHandle, long cfOptHandle); private static native long copyOptions(long handle); diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index fb35208bc..fd0e0bd30 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -31,11 +31,6 @@ private enum LibraryState { private static final AtomicReference libraryLoaded = new AtomicReference<>(LibraryState.NOT_LOADED); - - static { - RocksDB.loadLibrary(); - } - private final List ownedColumnFamilyHandles = new ArrayList<>(); /** @@ -175,6 +170,7 @@ protected RocksDB(final long nativeHandle) { * @see Options#setCreateIfMissing(boolean) */ public static RocksDB open(final String path) throws RocksDBException { + RocksDB.loadLibrary(); final Options options = new Options(); options.setCreateIfMissing(true); return open(options, path); @@ -330,6 +326,7 @@ public static RocksDB open(final DBOptions options, final String path, */ public static RocksDB openReadOnly(final String path) throws RocksDBException { + RocksDB.loadLibrary(); // This allows to use the rocksjni default Options instead of // the c++ one. final Options options = new Options(); diff --git a/java/src/main/java/org/rocksdb/SstFileReader.java b/java/src/main/java/org/rocksdb/SstFileReader.java index 678c3519c..d7d5b400c 100644 --- a/java/src/main/java/org/rocksdb/SstFileReader.java +++ b/java/src/main/java/org/rocksdb/SstFileReader.java @@ -6,9 +6,6 @@ package org.rocksdb; public class SstFileReader extends RocksObject { - static { - RocksDB.loadLibrary(); - } public SstFileReader(final Options options) { super(newSstFileReader(options.nativeHandle_)); diff --git a/java/src/main/java/org/rocksdb/SstFileWriter.java b/java/src/main/java/org/rocksdb/SstFileWriter.java index 5dd0b6dd5..985dc619a 100644 --- a/java/src/main/java/org/rocksdb/SstFileWriter.java +++ b/java/src/main/java/org/rocksdb/SstFileWriter.java @@ -13,9 +13,6 @@ * sequence number = 0. */ public class SstFileWriter extends RocksObject { - static { - RocksDB.loadLibrary(); - } /** * SstFileWriter Constructor. diff --git a/java/src/main/java/org/rocksdb/WriteBufferManager.java b/java/src/main/java/org/rocksdb/WriteBufferManager.java index 3364d6eab..9c5645880 100644 --- a/java/src/main/java/org/rocksdb/WriteBufferManager.java +++ b/java/src/main/java/org/rocksdb/WriteBufferManager.java @@ -9,9 +9,6 @@ * Java wrapper over native write_buffer_manager class */ public class WriteBufferManager extends RocksObject { - static { - RocksDB.loadLibrary(); - } /** * Construct a new instance of WriteBufferManager. @@ -28,7 +25,7 @@ public class WriteBufferManager extends RocksObject { */ public WriteBufferManager( final long bufferSizeBytes, final Cache cache, final boolean allowStall) { - super(newWriteBufferManager(bufferSizeBytes, cache.nativeHandle_, allowStall)); + super(newWriteBufferManagerInstance(bufferSizeBytes, cache.nativeHandle_, allowStall)); this.allowStall_ = allowStall; } @@ -40,6 +37,11 @@ public boolean allowStall() { return allowStall_; } + private static long newWriteBufferManagerInstance( + final long bufferSizeBytes, final long cacheHandle, final boolean allowStall) { + RocksDB.loadLibrary(); + return newWriteBufferManager(bufferSizeBytes, cacheHandle, allowStall); + } private static native long newWriteBufferManager( final long bufferSizeBytes, final long cacheHandle, final boolean allowStall); From 98ab2d80fadd1a72e3d5cce6b87c58746167613b Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Tue, 10 Oct 2023 11:07:33 -0700 Subject: [PATCH 189/386] Add PerfContext API in Java (#11805) Summary: This PR expose RocksDB C++ API for performance measurement in Java. It's initial implementation and it doesn't support ```level_to_perf_context``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11805 Reviewed By: akankshamahajan15 Differential Revision: D50128356 Pulled By: ltamasi fbshipit-source-id: afb35980a89129a30d4a6b4cce12352c9de186b6 --- java/CMakeLists.txt | 3 + java/Makefile | 4 + java/rocksjni/jni_perf_context.cc | 1183 +++++++++++++++++ java/rocksjni/portal.h | 47 + java/rocksjni/rocksjni.cc | 32 + .../main/java/org/rocksdb/PerfContext.java | 756 +++++++++++ java/src/main/java/org/rocksdb/PerfLevel.java | 55 + java/src/main/java/org/rocksdb/RocksDB.java | 39 + .../java/org/rocksdb/PerfContextTest.java | 97 ++ .../test/java/org/rocksdb/PerfLevelTest.java | 60 + src.mk | 1 + 11 files changed, 2277 insertions(+) create mode 100644 java/rocksjni/jni_perf_context.cc create mode 100644 java/src/main/java/org/rocksdb/PerfContext.java create mode 100644 java/src/main/java/org/rocksdb/PerfLevel.java create mode 100644 java/src/test/java/org/rocksdb/PerfContextTest.java create mode 100644 java/src/test/java/org/rocksdb/PerfLevelTest.java diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index ff1f05a32..dbc40b292 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -48,6 +48,7 @@ set(JNI_NATIVE_SOURCES rocksjni/options.cc rocksjni/options_util.cc rocksjni/persistent_cache.cc + rocksjni/jni_perf_context.cc rocksjni/ratelimiterjni.cc rocksjni/remove_emptyvalue_compactionfilterjni.cc rocksjni/restorejni.cc @@ -193,6 +194,8 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/OptionString.java src/main/java/org/rocksdb/OptionsUtil.java src/main/java/org/rocksdb/PersistentCache.java + src/main/java/org/rocksdb/PerfContext.java + src/main/java/org/rocksdb/PerfLevel.java src/main/java/org/rocksdb/PlainTableConfig.java src/main/java/org/rocksdb/PrepopulateBlobCache.java src/main/java/org/rocksdb/Priority.java diff --git a/java/Makefile b/java/Makefile index 7d2695af8..847b18cc4 100644 --- a/java/Makefile +++ b/java/Makefile @@ -51,6 +51,8 @@ NATIVE_JAVA_CLASSES = \ org.rocksdb.Options\ org.rocksdb.OptionsUtil\ org.rocksdb.PersistentCache\ + org.rocksdb.PerfContext\ + org.rocksdb.PerfLevel\ org.rocksdb.PlainTableConfig\ org.rocksdb.RateLimiter\ org.rocksdb.ReadOptions\ @@ -161,6 +163,8 @@ JAVA_TESTS = \ org.rocksdb.OptimisticTransactionOptionsTest\ org.rocksdb.OptionsUtilTest\ org.rocksdb.OptionsTest\ + org.rocksdb.PerfLevelTest \ + org.rocksdb.PerfContextTest \ org.rocksdb.PlainTableConfigTest\ org.rocksdb.RateLimiterTest\ org.rocksdb.ReadOnlyTest\ diff --git a/java/rocksjni/jni_perf_context.cc b/java/rocksjni/jni_perf_context.cc new file mode 100644 index 000000000..e38f3fea4 --- /dev/null +++ b/java/rocksjni/jni_perf_context.cc @@ -0,0 +1,1183 @@ +#include + +#include "include/org_rocksdb_PerfContext.h" +#include "rocksdb/db.h" +#include "rocksdb/perf_context.h" + +void Java_org_rocksdb_PerfContext_reset(JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + perf_context->Reset(); +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getUserKeyComparisonCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getUserKeyComparisonCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->user_key_comparison_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockCacheHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockCacheHitCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_cache_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockReadCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockReadCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_read_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockCacheIndexHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockCacheIndexHitCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_cache_index_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockCacheStandaloneHandleCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockCacheStandaloneHandleCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_cache_standalone_handle_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockCacheRealHandleCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockCacheRealHandleCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_cache_real_handle_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getIndexBlockReadCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getIndexBlockReadCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->index_block_read_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockCacheFilterHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockCacheFilterHitCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_cache_filter_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getFilterBlockReadCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getFilterBlockReadCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->filter_block_read_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getCompressionDictBlockReadCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getCompressionDictBlockReadCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->compression_dict_block_read_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockReadByte + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockReadByte(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_read_byte; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockReadTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockReadTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_read_time; +} + +jlong Java_org_rocksdb_PerfContext_getBlockReadCpuTime(JNIEnv*, jobject, + jlong jpc_handler) { + // reinterpret_cast(jcf_handle); + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handler); + return perf_context->block_read_cpu_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSecondaryCacheHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSecondaryCacheHitCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->secondary_cache_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getCompressedSecCacheInsertRealCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheInsertRealCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->compressed_sec_cache_insert_real_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getCompressedSecCacheInsertDummyCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheInsertDummyCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->compressed_sec_cache_insert_dummy_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getCompressedSecCacheUncompressedBytes + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheUncompressedBytes( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->compressed_sec_cache_uncompressed_bytes; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getCompressedSecCacheCompressedBytes + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheCompressedBytes( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->compressed_sec_cache_compressed_bytes; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockChecksumTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockChecksumTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_checksum_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockDecompressTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockDecompressTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_decompress_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getReadBytes + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getReadBytes(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_read_bytes; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getMultigetReadBytes + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getMultigetReadBytes(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->multiget_read_bytes; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getIterReadBytes + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getIterReadBytes(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->iter_read_bytes; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlobCacheHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlobCacheHitCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->blob_cache_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlobReadCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlobReadCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->blob_read_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlobReadByte + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlobReadByte(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->blob_read_byte; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlobReadTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlobReadTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->blob_read_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlobChecksumTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlobChecksumTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->blob_checksum_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlobDecompressTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlobDecompressTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->blob_decompress_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getInternal_key_skipped_count + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getInternalKeySkippedCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->internal_key_skipped_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getInternalDeleteSkippedCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getInternalDeleteSkippedCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->internal_delete_skipped_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getInternalRecentSkippedCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getInternalRecentSkippedCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->internal_recent_skipped_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getInternalMergeCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getInternalMergeCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->internal_merge_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getInternalMergePointLookupCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getInternalMergePointLookupCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->internal_merge_point_lookup_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getInternalRangeDelReseekCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getInternalRangeDelReseekCount( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->internal_range_del_reseek_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSnapshotTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSnapshotTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_snapshot_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getFromMemtableTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getFromMemtableTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_from_memtable_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getFromMemtableCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getFromMemtableCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_from_memtable_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getPostProcessTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getPostProcessTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_post_process_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getFromOutputFilesTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getFromOutputFilesTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_from_output_files_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekOnMemtableTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekOnMemtableTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_on_memtable_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekOnMemtableCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekOnMemtableCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_on_memtable_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getNextOnMemtableCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getNextOnMemtableCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->next_on_memtable_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getPrevOnMemtableCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getPrevOnMemtableCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->prev_on_memtable_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekChildSeekTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekChildSeekTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_child_seek_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekChildSeekCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekChildSeekCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_child_seek_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekMinHeapTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekMinHeapTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_min_heap_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekMaxHeapTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekMaxHeapTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_max_heap_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getSeekInternalSeekTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getSeekInternalSeekTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->seek_internal_seek_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getFindNextUserEntryTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getFindNextUserEntryTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->find_next_user_entry_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getWriteWalTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getWriteWalTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->write_wal_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getWriteMemtableTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getWriteMemtableTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->write_memtable_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getWriteDelayTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getWriteDelayTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->write_delay_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getWriteSchedulingFlushesCompactionsTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getWriteSchedulingFlushesCompactionsTime( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->write_scheduling_flushes_compactions_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getWritePreAndPostProcessTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getWritePreAndPostProcessTime( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->write_pre_and_post_process_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getWriteThreadWaitNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getWriteThreadWaitNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->write_thread_wait_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getDbMutexLockNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getDbMutexLockNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->db_mutex_lock_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getDbConditionWaitNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getDbConditionWaitNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->db_condition_wait_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getMergeOperatorTimeNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getMergeOperatorTimeNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->merge_operator_time_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getReadIndexBlockNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getReadIndexBlockNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->read_index_block_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getReadFilterBlockNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getReadFilterBlockNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->read_filter_block_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getNewTableBlockIterNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getNewTableBlockIterNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->new_table_block_iter_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getNewTableIteratorNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getNewTableIteratorNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->new_table_iterator_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBlockSeekNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBlockSeekNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->block_seek_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getFindTableNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getFindTableNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->find_table_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBloomMemtableHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBloomMemtableHitCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->bloom_memtable_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBloomMemtableMissCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBloomMemtableMissCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->bloom_memtable_miss_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBloomSstHitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBloomSstHitCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->bloom_sst_hit_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getBloomSstMissCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getBloomSstMissCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->bloom_sst_miss_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getKeyLockWaitTime + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getKeyLockWaitTime(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->key_lock_wait_time; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getKeyLockWaitCount + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getKeyLockWaitCount(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->key_lock_wait_count; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvNewSequentialFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvNewSequentialFileNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_new_sequential_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvNewRandomAccessFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvNewRandomAccessFileNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_new_random_access_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvNewWritableFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvNewWritableFileNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_new_writable_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvReuseWritableFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvReuseWritableFileNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_reuse_writable_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvNewRandomRwFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvNewRandomRwFileNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_new_random_rw_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvNewDirectoryNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvNewDirectoryNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_new_directory_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvFileExistsNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvFileExistsNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_file_exists_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvGetChildrenNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvGetChildrenNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_get_children_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvGetChildrenFileAttributesNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvGetChildrenFileAttributesNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_get_children_file_attributes_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvDeleteFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvDeleteFileNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_delete_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvCreateDirNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvCreateDirNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_create_dir_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvCreateDirIfMissingNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvCreateDirIfMissingNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_create_dir_if_missing_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvDeleteDirNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvDeleteDirNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_delete_dir_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvGetFileSizeNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvGetFileSizeNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_get_file_size_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvGetFileModificationTimeNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvGetFileModificationTimeNanos( + JNIEnv*, jobject, jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_get_file_modification_time_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvRenameFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvRenameFileNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_rename_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvLinkFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvLinkFileNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_link_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvLockFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvLockFileNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_lock_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvUnlockFileNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvUnlockFileNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_unlock_file_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEnvNewLoggerNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEnvNewLoggerNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->env_new_logger_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getCpuNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getGetCpuNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->get_cpu_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getIterNextCpuNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getIterNextCpuNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->iter_next_cpu_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getIterPrevCpuNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getIterPrevCpuNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->iter_prev_cpu_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getIterSeekCpuNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getIterSeekCpuNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->iter_seek_cpu_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getEncryptDataNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getEncryptDataNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->encrypt_data_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getDecryptDataNanos + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getDecryptDataNanos(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->decrypt_data_nanos; +} + +/* + * Class: org_rocksdb_PerfContext + * Method: getNumberAsyncSeek + * Signature: (J)J + */ +jlong Java_org_rocksdb_PerfContext_getNumberAsyncSeek(JNIEnv*, jobject, + jlong jpc_handle) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = + reinterpret_cast(jpc_handle); + return perf_context->number_async_seek; +} \ No newline at end of file diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 16120b037..aed3dc593 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -27,6 +27,7 @@ #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/filter_policy.h" +#include "rocksdb/perf_level.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/status.h" #include "rocksdb/table.h" @@ -5911,6 +5912,52 @@ class MemoryUsageTypeJni { } }; +class PerfLevelTypeJni { + public: + static jbyte toJavaPerfLevelType(const ROCKSDB_NAMESPACE::PerfLevel level) { + switch (level) { + case ROCKSDB_NAMESPACE::PerfLevel::kUninitialized: + return 0x0; + case ROCKSDB_NAMESPACE::PerfLevel::kDisable: + return 0x1; + case ROCKSDB_NAMESPACE::PerfLevel::kEnableCount: + return 0x2; + case ROCKSDB_NAMESPACE::PerfLevel::kEnableTimeExceptForMutex: + return 0x3; + case ROCKSDB_NAMESPACE::PerfLevel::kEnableTimeAndCPUTimeExceptForMutex: + return 0x4; + case ROCKSDB_NAMESPACE::PerfLevel::kEnableTime: + return 0x5; + case ROCKSDB_NAMESPACE::PerfLevel::kOutOfBounds: + return 0x6; + default: + return 0x6; + } + } + + static ROCKSDB_NAMESPACE::PerfLevel toCppPerfLevelType(const jbyte level) { + switch (level) { + case 0x0: + return ROCKSDB_NAMESPACE::PerfLevel::kUninitialized; + case 0x1: + return ROCKSDB_NAMESPACE::PerfLevel::kDisable; + case 0x2: + return ROCKSDB_NAMESPACE::PerfLevel::kEnableCount; + case 0x3: + return ROCKSDB_NAMESPACE::PerfLevel::kEnableTimeExceptForMutex; + case 0x4: + return ROCKSDB_NAMESPACE::PerfLevel:: + kEnableTimeAndCPUTimeExceptForMutex; + case 0x5: + return ROCKSDB_NAMESPACE::PerfLevel::kEnableTime; + case 0x6: + return ROCKSDB_NAMESPACE::PerfLevel::kOutOfBounds; + default: + return ROCKSDB_NAMESPACE::PerfLevel::kOutOfBounds; + } + } +}; + // The portal class for org.rocksdb.Transaction class TransactionJni : public JavaClass { public: diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index ced72e841..8823b0d31 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -22,6 +22,7 @@ #include "rocksdb/convenience.h" #include "rocksdb/db.h" #include "rocksdb/options.h" +#include "rocksdb/perf_context.h" #include "rocksdb/types.h" #include "rocksdb/version.h" #include "rocksjni/cplusplus_to_java_convert.h" @@ -3078,6 +3079,37 @@ jstring Java_org_rocksdb_RocksDB_getDBOptions(JNIEnv* env, jobject, return env->NewStringUTF(options_as_string.c_str()); } +/* + * Class: org_rocksdb_RocksDB + * Method: setPerfLevel + * Signature: (JB)V + */ +void Java_org_rocksdb_RocksDB_setPerfLevel(JNIEnv*, jobject, + jbyte jperf_level) { + rocksdb::SetPerfLevel( + ROCKSDB_NAMESPACE::PerfLevelTypeJni::toCppPerfLevelType(jperf_level)); +} + +/* + * Class: org_rocksdb_RocksDB + * Method: getPerfLevel + * Signature: (J)B + */ +jbyte Java_org_rocksdb_RocksDB_getPerfLevelNative(JNIEnv*, jobject) { + return ROCKSDB_NAMESPACE::PerfLevelTypeJni::toJavaPerfLevelType( + rocksdb::GetPerfLevel()); +} + +/* + * Class: org_rocksdb_RocksDB + * Method: getPerfContextNative + * Signature: ()J + */ +jlong Java_org_rocksdb_RocksDB_getPerfContextNative(JNIEnv*, jobject) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = rocksdb::get_perf_context(); + return reinterpret_cast(perf_context); +} + /* * Class: org_rocksdb_RocksDB * Method: compactFiles diff --git a/java/src/main/java/org/rocksdb/PerfContext.java b/java/src/main/java/org/rocksdb/PerfContext.java new file mode 100644 index 000000000..0adac26fa --- /dev/null +++ b/java/src/main/java/org/rocksdb/PerfContext.java @@ -0,0 +1,756 @@ +package org.rocksdb; + +public class PerfContext extends RocksObject { + protected PerfContext(final long nativeHandle) { + super(nativeHandle); + } + + public void reset() { + reset(nativeHandle_); + } + + /** + * @return total number of user key comparisons + */ + public long getUserKeyComparisonCount() { + return getUserKeyComparisonCount(nativeHandle_); + } + + /** + * @return total number of block cache hits + */ + public long getBlockCacheHitCount() { + return getBlockCacheHitCount(nativeHandle_); + } + + /** + * @return total number of block reads (with IO) + */ + public long getBlockReadCount() { + return getBlockReadCount(nativeHandle_); + } + + /** + * @return total number of bytes from block reads + */ + public long getBlockReadByte() { + return getBlockReadByte(nativeHandle_); + } + + /* + @return total nanos spent on block reads + */ + public long getBlockReadTime() { + return getBlockReadTime(nativeHandle_); + } + + /** + * @return total cpu time in nanos spent on block reads + */ + public long getBlockReadCpuTime() { + return getBlockReadCpuTime(nativeHandle_); + } + + /** + * @return total number of index block hits + */ + public long getBlockCacheIndexHitCount() { + return getBlockCacheIndexHitCount(nativeHandle_); + } + + /** + * @return total number of standalone handles lookup from secondary cache + */ + public long getBlockCacheStandaloneHandleCount() { + return getBlockCacheStandaloneHandleCount(nativeHandle_); + } + + /** + * @return total number of real handles lookup from secondary cache that are inserted into + * primary cache + */ + public long getBlockCacheRealHandleCount() { + return getBlockCacheRealHandleCount(nativeHandle_); + } + + /** + * @return total number of index block reads + */ + public long getIndexBlockReadCount() { + return getIndexBlockReadCount(nativeHandle_); + } + + /** + * @return total number of filter block hits + */ + public long getBlockCacheFilterHitCount() { + return getBlockCacheFilterHitCount(nativeHandle_); + } + + /** + * @return total number of filter block reads + */ + public long getFilterBlockReadCount() { + return getFilterBlockReadCount(nativeHandle_); + } + + /** + * @return total number of compression dictionary block reads + */ + public long getCompressionDictBlockReadCount() { + return getCompressionDictBlockReadCount(nativeHandle_); + } + + /** + * @return total number of secondary cache hits + */ + public long getSecondaryCacheHitCount() { + return getSecondaryCacheHitCount(nativeHandle_); + } + + /** + * @return total number of real handles inserted into secondary cache + */ + public long getCompressedSecCacheInsertRealCount() { + return getCompressedSecCacheInsertRealCount(nativeHandle_); + } + + /** + * @return total number of dummy handles inserted into secondary cache + */ + public long getCompressedSecCacheInsertDummyCount() { + return getCompressedSecCacheInsertDummyCount(nativeHandle_); + } + + /** + * @return bytes for vals before compression in secondary cache + */ + public long getCompressedSecCacheUncompressedBytes() { + return getCompressedSecCacheUncompressedBytes(nativeHandle_); + } + + /** + * @return bytes for vals after compression in secondary cache + */ + public long getCompressedSecCacheCompressedBytes() { + return getCompressedSecCacheCompressedBytes(nativeHandle_); + } + + /** + * @return total nanos spent on block checksum + */ + public long getBlockChecksumTime() { + return getBlockChecksumTime(nativeHandle_); + } + + /** + * + * @return total nanos spent on block decompression + */ + public long getBlockDecompressTime() { + return getBlockDecompressTime(nativeHandle_); + } + + /** + * @return bytes for vals returned by Get + */ + public long getReadBytes() { + return getReadBytes(nativeHandle_); + } + + /** + * @return bytes for vals returned by MultiGet + */ + public long getMultigetReadBytes() { + return getMultigetReadBytes(nativeHandle_); + } + + /** + * @return bytes for keys/vals decoded by iterator + */ + public long getIterReadBytes() { + return getIterReadBytes(nativeHandle_); + } + + /** + * @return total number of blob cache hits + */ + public long getBlobCacheHitCount() { + return getBlobCacheHitCount(nativeHandle_); + } + + /** + * @return total number of blob reads (with IO) + */ + public long getBlobReadCount() { + return getBlobReadCount(nativeHandle_); + } + + /** + * @return total number of bytes from blob reads + */ + public long getBlobReadByte() { + return getBlobReadByte(nativeHandle_); + } + + /** + * @return total nanos spent on blob reads + */ + public long getBlobReadTime() { + return getBlobReadTime(nativeHandle_); + } + + /** + * @return total nanos spent on blob checksum + */ + public long getBlobChecksumTime() { + return getBlobChecksumTime(nativeHandle_); + } + + /** + * @return total nanos spent on blob decompression + */ + public long getBlobDecompressTime() { + return getBlobDecompressTime(nativeHandle_); + } + + /** + * total number of internal keys skipped over during iteration. + * There are several reasons for it: + * 1. when calling Next(), the iterator is in the position of the previous + * key, so that we'll need to skip it. It means this counter will always + * be incremented in Next(). + * 2. when calling Next(), we need to skip internal entries for the previous + * keys that are overwritten. + * 3. when calling Next(), Seek() or SeekToFirst(), after previous key + * before calling Next(), the seek key in Seek() or the beginning for + * SeekToFirst(), there may be one or more deleted keys before the next + * valid key that the operation should place the iterator to. We need + * to skip both of the tombstone and updates hidden by the tombstones. The + * tombstones are not included in this counter, while previous updates + * hidden by the tombstones will be included here. + * 4. symmetric cases for Prev() and SeekToLast() + * internal_recent_skipped_count is not included in this counter. + */ + public long getInternalKeySkippedCount() { + return getInternalKeySkippedCount(nativeHandle_); + } + + /** + * Total number of deletes and single deletes skipped over during iteration + * When calling Next(), Seek() or SeekToFirst(), after previous position + * before calling Next(), the seek key in Seek() or the beginning for + * SeekToFirst(), there may be one or more deleted keys before the next valid + * key. Every deleted key is counted once. We don't recount here if there are + * still older updates invalidated by the tombstones. + */ + public long getInternalDeleteSkippedCount() { + return getInternalDeleteSkippedCount(nativeHandle_); + } + + /** + * How many times iterators skipped over internal keys that are more recent + * than the snapshot that iterator is using. + */ + public long getInternalRecentSkippedCount() { + return getInternalRecentSkippedCount(nativeHandle_); + } + + /** + * How many merge operands were fed into the merge operator by iterators. + * Note: base values are not included in the count. + */ + public long getInternalMergeCount() { + return getInternalMergeCount(nativeHandle_); + } + + /** + * How many merge operands were fed into the merge operator by point lookups. + * Note: base values are not included in the count. + */ + public long getInternalMergePointLookupCount() { + return getInternalMergePointLookupCount(nativeHandle_); + } + + /** + * Number of times we reseeked inside a merging iterator, specifically to skip + * after or before a range of keys covered by a range deletion in a newer LSM + * component. + */ + public long getInternalRangeDelReseekCount() { + return getInternalRangeDelReseekCount(nativeHandle_); + } + + /** + * @return total nanos spent on getting snapshot + */ + public long getSnapshotTime() { + return getSnapshotTime(nativeHandle_); + } + + /** + * @return total nanos spent on querying memtables + */ + public long getFromMemtableTime() { + return getFromMemtableTime(nativeHandle_); + } + + /** + * @return number of mem tables queried + */ + public long getFromMemtableCount() { + return getFromMemtableCount(nativeHandle_); + } + + /** + * @return total nanos spent after Get() finds a key + */ + public long getPostProcessTime() { + return getPostProcessTime(nativeHandle_); + } + + /** + * @return total nanos reading from output files + */ + public long getFromOutputFilesTime() { + return getFromOutputFilesTime(nativeHandle_); + } + + /** + * @return total nanos spent on seeking memtable + */ + public long getSeekOnMemtableTime() { + return getSeekOnMemtableTime(nativeHandle_); + } + + /** + * number of seeks issued on memtable + * (including SeekForPrev but not SeekToFirst and SeekToLast) + * @return number of seeks issued on memtable + */ + public long getSeekOnMemtableCount() { + return getSeekOnMemtableCount(nativeHandle_); + } + + /** + * @return number of Next()s issued on memtable + */ + public long getNextOnMemtableCount() { + return getNextOnMemtableCount(nativeHandle_); + } + + /** + * @return number of Prev()s issued on memtable + */ + public long getPrevOnMemtableCount() { + return getPrevOnMemtableCount(nativeHandle_); + } + + /** + * @return total nanos spent on seeking child iters + */ + public long getSeekChildSeekTime() { + return getSeekChildSeekTime(nativeHandle_); + } + + /** + * @return number of seek issued in child iterators + */ + public long getSeekChildSeekCount() { + return getSeekChildSeekCount(nativeHandle_); + } + + /** + * @return total nanos spent on the merge min heap + */ + public long getSeekMinHeapTime() { + return getSeekMinHeapTime(nativeHandle_); + } + + /** + * @return total nanos spent on the merge max heap + */ + public long getSeekMaxHeapTime() { + return getSeekMaxHeapTime(nativeHandle_); + } + + /** + * @return total nanos spent on seeking the internal entries + */ + public long getSeekInternalSeekTime() { + return getSeekInternalSeekTime(nativeHandle_); + } + + /** + * @return total nanos spent on iterating internal entries to find the next user entry + */ + public long getFindNextUserEntryTime() { + return getFindNextUserEntryTime(nativeHandle_); + } + + /** + * @return total nanos spent on writing to WAL + */ + public long getWriteWalTime() { + return getWriteWalTime(nativeHandle_); + } + + /** + * @return total nanos spent on writing to mem tables + */ + public long getWriteMemtableTime() { + return getWriteMemtableTime(nativeHandle_); + } + + /** + * @return total nanos spent on delaying or throttling write + */ + public long getWriteDelayTime() { + return getWriteDelayTime(nativeHandle_); + } + + /** + * @return total nanos spent on switching memtable/wal and scheduling flushes/compactions. + */ + public long getWriteSchedulingFlushesCompactionsTime() { + return getWriteSchedulingFlushesCompactionsTime(nativeHandle_); + } + + /** + * @return total nanos spent on writing a record, excluding the above four things + */ + public long getWritePreAndPostProcessTime() { + return getWritePreAndPostProcessTime(nativeHandle_); + } + + /** + * @return time spent waiting for other threads of the batch group + */ + public long getWriteThreadWaitNanos() { + return getWriteThreadWaitNanos(nativeHandle_); + } + + /** + * @return time spent on acquiring DB mutex. + */ + public long getDbMutexLockNanos() { + return getDbMutexLockNanos(nativeHandle_); + } + + /** + * @return Time spent on waiting with a condition variable created with DB mutex. + */ + public long getDbConditionWaitNanos() { + return getDbConditionWaitNanos(nativeHandle_); + } + + /** + * @return Time spent on merge operator. + */ + public long getMergeOperatorTimeNanos() { + return getMergeOperatorTimeNanos(nativeHandle_); + } + + /** + * @return Time spent on reading index block from block cache or SST file + */ + public long getReadIndexBlockNanos() { + return getReadIndexBlockNanos(nativeHandle_); + } + + /** + * @return Time spent on reading filter block from block cache or SST file + */ + public long getReadFilterBlockNanos() { + return getReadFilterBlockNanos(nativeHandle_); + } + + /** + * @return Time spent on creating data block iterator + */ + public long getNewTableBlockIterNanos() { + return getNewTableBlockIterNanos(nativeHandle_); + } + + /** + * @return Time spent on creating a iterator of an SST file. + */ + public long getNewTableIteratorNanos() { + return getNewTableIteratorNanos(nativeHandle_); + } + + /** + * @return Time spent on seeking a key in data/index blocks + */ + public long getBlockSeekNanos() { + return getBlockSeekNanos(nativeHandle_); + } + /** + * @return Time spent on finding or creating a table reader + */ + public long getFindTableNanos() { + return getFindTableNanos(nativeHandle_); + } + + /** + * @return total number of mem table bloom hits + */ + public long getBloomMemtableHitCount() { + return getBloomMemtableHitCount(nativeHandle_); + } + + // total number of mem table bloom misses + public long getBloomMemtableMissCount() { + return getBloomMemtableMissCount(nativeHandle_); + } + + /** + * @return total number of SST bloom hits + */ + public long getBloomSstHitCount() { + return getBloomSstHitCount(nativeHandle_); + } + + /** + * @return total number of SST bloom misses + */ + public long getBloomSstMissCount() { + return getBloomSstMissCount(nativeHandle_); + } + + /** + * @return Time spent waiting on key locks in transaction lock manager. + */ + public long getKeyLockWaitTime() { + return getKeyLockWaitTime(nativeHandle_); + } + /** + * @return number of times acquiring a lock was blocked by another transaction. + */ + public long getKeyLockWaitCount() { + return getKeyLockWaitCount(nativeHandle_); + } + + /** + * @return Total time spent in Env filesystem operations. These are only populated when TimedEnv + * is used. + */ + public long getEnvNewSequentialFileNanos() { + return getEnvNewSequentialFileNanos(nativeHandle_); + } + + public long getEnvNewRandomAccessFileNanos() { + return getEnvNewRandomAccessFileNanos(nativeHandle_); + } + + public long getEnvNewWritableFileNanos() { + return getEnvNewWritableFileNanos(nativeHandle_); + } + + public long getEnvReuseWritableFileNanos() { + return getEnvReuseWritableFileNanos(nativeHandle_); + } + + public long getEnvNewRandomRwFileNanos() { + return getEnvNewRandomRwFileNanos(nativeHandle_); + } + + public long getEnvNewDirectoryNanos() { + return getEnvNewDirectoryNanos(nativeHandle_); + } + + public long getEnvFileExistsNanos() { + return getEnvFileExistsNanos(nativeHandle_); + } + public long getEnvGetChildrenNanos() { + return getEnvGetChildrenNanos(nativeHandle_); + } + + public long getEnvGetChildrenFileAttributesNanos() { + return getEnvGetChildrenFileAttributesNanos(nativeHandle_); + } + + public long getEnvDeleteFileNanos() { + return getEnvDeleteFileNanos(nativeHandle_); + } + + public long getEnvCreateDirNanos() { + return getEnvCreateDirNanos(nativeHandle_); + } + public long getEnvCreateDirIfMissingNanos() { + return getEnvCreateDirIfMissingNanos(nativeHandle_); + } + + public long getEnvDeleteDirNanos() { + return getEnvDeleteDirNanos(nativeHandle_); + } + + public long getEnvGetFileSizeNanos() { + return getEnvGetFileSizeNanos(nativeHandle_); + } + + public long getEnvGetFileModificationTimeNanos() { + return getEnvGetFileModificationTimeNanos(nativeHandle_); + } + + public long getEnvRenameFileNanos() { + return getEnvRenameFileNanos(nativeHandle_); + } + + public long getEnvLinkFileNanos() { + return getEnvLinkFileNanos(nativeHandle_); + } + + public long getEnvLockFileNanos() { + return getEnvLockFileNanos(nativeHandle_); + } + + public long getEnvUnlockFileNanos() { + return getEnvUnlockFileNanos(nativeHandle_); + } + + public long getEnvNewLoggerNanos() { + return getEnvNewLoggerNanos(nativeHandle_); + } + + public long getGetCpuNanos() { + return getGetCpuNanos(nativeHandle_); + } + + public long getIterNextCpuNanos() { + return getIterNextCpuNanos(nativeHandle_); + } + public long getIterPrevCpuNanos() { + return getIterPrevCpuNanos(nativeHandle_); + } + + public long getIterSeekCpuNanos() { + return getIterSeekCpuNanos(nativeHandle_); + } + + /** + * @return Time spent in encrypting data. Populated when EncryptedEnv is used. + */ + public long getEncryptDataNanos() { + return getEncryptDataNanos(nativeHandle_); + } + + /** + * @return Time spent in decrypting data. Populated when EncryptedEnv is used. + */ + public long getDecryptDataNanos() { + return getDecryptDataNanos(nativeHandle_); + } + + public long getNumberAsyncSeek() { + return getNumberAsyncSeek(nativeHandle_); + } + + @Override + protected void disposeInternal(long handle) { + // Nothing to do. Perf context is valid for all the time of application is running. + } + + private native void reset(final long nativeHandle); + + private native long getUserKeyComparisonCount(final long handle); + private native long getBlockCacheHitCount(final long handle); + private native long getBlockReadCount(final long handle); + private native long getBlockReadByte(final long handle); + private native long getBlockReadTime(final long handle); + private native long getBlockReadCpuTime(final long handle); + private native long getBlockCacheIndexHitCount(final long handle); + private native long getBlockCacheStandaloneHandleCount(final long handle); + private native long getBlockCacheRealHandleCount(final long handle); + private native long getIndexBlockReadCount(final long handle); + private native long getBlockCacheFilterHitCount(final long handle); + private native long getFilterBlockReadCount(final long handle); + private native long getCompressionDictBlockReadCount(final long handle); + + private native long getSecondaryCacheHitCount(long handle); + private native long getCompressedSecCacheInsertRealCount(long handle); + + private native long getCompressedSecCacheInsertDummyCount(final long handle); + private native long getCompressedSecCacheUncompressedBytes(final long handle); + private native long getCompressedSecCacheCompressedBytes(final long handle); + private native long getBlockChecksumTime(final long handle); + private native long getBlockDecompressTime(final long handle); + private native long getReadBytes(final long handle); + private native long getMultigetReadBytes(final long handle); + private native long getIterReadBytes(final long handle); + private native long getBlobCacheHitCount(final long handle); + private native long getBlobReadCount(final long handle); + private native long getBlobReadByte(final long handle); + private native long getBlobReadTime(final long handle); + private native long getBlobChecksumTime(final long handle); + private native long getBlobDecompressTime(final long handle); + private native long getInternalKeySkippedCount(final long handle); + private native long getInternalDeleteSkippedCount(final long handle); + private native long getInternalRecentSkippedCount(final long handle); + private native long getInternalMergeCount(final long handle); + private native long getInternalMergePointLookupCount(final long handle); + private native long getInternalRangeDelReseekCount(final long handle); + private native long getSnapshotTime(final long handle); + private native long getFromMemtableTime(final long handle); + private native long getFromMemtableCount(final long handle); + private native long getPostProcessTime(final long handle); + private native long getFromOutputFilesTime(final long handle); + private native long getSeekOnMemtableTime(final long handle); + private native long getSeekOnMemtableCount(final long handle); + private native long getNextOnMemtableCount(final long handle); + private native long getPrevOnMemtableCount(final long handle); + private native long getSeekChildSeekTime(final long handle); + private native long getSeekChildSeekCount(final long handle); + private native long getSeekMinHeapTime(final long handle); + private native long getSeekMaxHeapTime(final long handle); + private native long getSeekInternalSeekTime(final long handle); + private native long getFindNextUserEntryTime(final long handle); + private native long getWriteWalTime(long handle); + private native long getWriteMemtableTime(long handle); + private native long getWriteDelayTime(long handle); + private native long getWriteSchedulingFlushesCompactionsTime(long handle); + private native long getWritePreAndPostProcessTime(long handle); + private native long getWriteThreadWaitNanos(long handle); + private native long getDbMutexLockNanos(long handle); + private native long getDbConditionWaitNanos(long handle); + private native long getMergeOperatorTimeNanos(long handle); + private native long getReadIndexBlockNanos(long handle); + private native long getReadFilterBlockNanos(long handle); + private native long getNewTableBlockIterNanos(long handle); + private native long getNewTableIteratorNanos(long handle); + private native long getBlockSeekNanos(long handle); + private native long getFindTableNanos(long handle); + private native long getBloomMemtableHitCount(long handle); + private native long getBloomMemtableMissCount(long handle); + private native long getBloomSstHitCount(long handle); + private native long getBloomSstMissCount(long handle); + private native long getKeyLockWaitTime(long handle); + private native long getKeyLockWaitCount(long handle); + private native long getEnvNewSequentialFileNanos(long handle); + private native long getEnvNewRandomAccessFileNanos(long handle); + private native long getEnvNewWritableFileNanos(long handle); + private native long getEnvReuseWritableFileNanos(long handle); + private native long getEnvNewRandomRwFileNanos(long handle); + private native long getEnvNewDirectoryNanos(long handle); + private native long getEnvFileExistsNanos(long handle); + private native long getEnvGetChildrenNanos(long handle); + private native long getEnvGetChildrenFileAttributesNanos(long handle); + private native long getEnvDeleteFileNanos(long handle); + private native long getEnvCreateDirNanos(long handle); + private native long getEnvCreateDirIfMissingNanos(long handle); + private native long getEnvDeleteDirNanos(long handle); + private native long getEnvGetFileSizeNanos(long handle); + private native long getEnvGetFileModificationTimeNanos(long handle); + private native long getEnvRenameFileNanos(long handle); + private native long getEnvLinkFileNanos(long handle); + private native long getEnvLockFileNanos(long handle); + private native long getEnvUnlockFileNanos(long handle); + private native long getEnvNewLoggerNanos(long handle); + private native long getGetCpuNanos(long nativeHandle_); + private native long getIterNextCpuNanos(long nativeHandle_); + private native long getIterPrevCpuNanos(long nativeHandle_); + private native long getIterSeekCpuNanos(long nativeHandle_); + private native long getEncryptDataNanos(long nativeHandle_); + private native long getDecryptDataNanos(long nativeHandle_); + private native long getNumberAsyncSeek(long nativeHandle_); +} diff --git a/java/src/main/java/org/rocksdb/PerfLevel.java b/java/src/main/java/org/rocksdb/PerfLevel.java new file mode 100644 index 000000000..1cb07f042 --- /dev/null +++ b/java/src/main/java/org/rocksdb/PerfLevel.java @@ -0,0 +1,55 @@ +package org.rocksdb; + +public enum PerfLevel { + /** + * Unknown setting + */ + UNINITIALIZED((byte) 0), + /** + * disable perf stats + */ + DISABLE((byte) 1), + /** + * enable only count stats + */ + ENABLE_COUNT((byte) 2), + /** + * Other than count stats, also enable time stats except for mutexes + */ + ENABLE_TIME_EXCEPT_FOR_MUTEX((byte) 3), + + /** + * Other than time, also measure CPU time counters. Still don't measure + * time (neither wall time nor CPU time) for mutexes + */ + ENABLE_TIME_AND_CPU_TIME_EXCEPT_FOR_MUTEX((byte) 4), + /** + * enable count and time stats + */ + ENABLE_TIME((byte) 5), + + /** + * Do not use + * @deprecated It's here to just keep parity with C++ API. + */ + @Deprecated OUT_OF_BOUNDS((byte) 6); + + private PerfLevel(byte _value) { + this._value = _value; + } + + private final byte _value; + + public byte getValue() { + return _value; + } + + public static PerfLevel getPerfLevel(byte level) { + for (PerfLevel l : PerfLevel.values()) { + if (l.getValue() == level) { + return l; + } + } + throw new IllegalArgumentException("Uknknown PerfLevel constant : " + level); + } +} diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index fd0e0bd30..7863790c1 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -3430,6 +3430,40 @@ public void setOptions( mutableColumnFamilyOptions.getKeys(), mutableColumnFamilyOptions.getValues()); } + /** + * Set performance level for rocksdb performance measurement. + * @param level + * @throws IllegalArgumentException for UNINITIALIZED and OUT_OF_BOUNDS values + * as they can't be used for settings. + */ + public void setPerfLevel(final PerfLevel level) { + if (level == PerfLevel.UNINITIALIZED) { + throw new IllegalArgumentException("Unable to set UNINITIALIZED level"); + } else if (level == PerfLevel.OUT_OF_BOUNDS) { + throw new IllegalArgumentException("Unable to set OUT_OF_BOUNDS level"); + } else { + setPerfLevel(level.getValue()); + } + } + + /** + * Return current performance level measurement settings. + * @return + */ + public PerfLevel getPerfLevel() { + byte level = getPerfLevelNative(); + return PerfLevel.getPerfLevel(level); + } + + /** + * Return perf context bound to this thread. + * @return + */ + public PerfContext getPerfContext() { + long native_handle = getPerfContextNative(); + return new PerfContext(native_handle); + } + /** * Get the options for the column family handle * @@ -4570,6 +4604,11 @@ private native void setOptions(final long handle, final long cfHandle, private native void setDBOptions(final long handle, final String[] keys, final String[] values) throws RocksDBException; private native String getDBOptions(final long handle); + private native void setPerfLevel(final byte level); + private native byte getPerfLevelNative(); + + private native long getPerfContextNative(); + private native String[] compactFiles(final long handle, final long compactionOptionsHandle, final long columnFamilyHandle, diff --git a/java/src/test/java/org/rocksdb/PerfContextTest.java b/java/src/test/java/org/rocksdb/PerfContextTest.java new file mode 100644 index 000000000..ae70afc0a --- /dev/null +++ b/java/src/test/java/org/rocksdb/PerfContextTest.java @@ -0,0 +1,97 @@ +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.beans.BeanInfo; +import java.beans.IntrospectionException; +import java.beans.Introspector; +import java.beans.PropertyDescriptor; +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.*; +import org.junit.rules.TemporaryFolder; + +public class PerfContextTest { + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + List cfDescriptors; + List columnFamilyHandleList = new ArrayList<>(); + RocksDB db; + + @Before + public void before() throws RocksDBException { + cfDescriptors = Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), + new ColumnFamilyDescriptor("new_cf".getBytes())); + final DBOptions options = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + + db = RocksDB.open( + options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList); + } + + @After + public void after() { + for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { + columnFamilyHandle.close(); + } + db.close(); + } + + @Test + public void testReset() { + db.setPerfLevel(PerfLevel.ENABLE_TIME_AND_CPU_TIME_EXCEPT_FOR_MUTEX); + PerfContext ctx = db.getPerfContext(); + assertThat(ctx).isNotNull(); + ctx.reset(); + } + + /** + * Call all properties to check that we don't have problem with UnsatisfiedLinkError. + */ + @Test + public void testAllGetters() throws RocksDBException, IntrospectionException, + InvocationTargetException, IllegalAccessException { + db.setPerfLevel(PerfLevel.ENABLE_TIME_AND_CPU_TIME_EXCEPT_FOR_MUTEX); + db.put("key".getBytes(), "value".getBytes()); + db.compactRange(); + db.get("key".getBytes()); + PerfContext ctx = db.getPerfContext(); + + BeanInfo info = Introspector.getBeanInfo(ctx.getClass(), RocksObject.class); + for (PropertyDescriptor property : info.getPropertyDescriptors()) { + if (property.getReadMethod() != null) { + Object result = property.getReadMethod().invoke(ctx); + assertThat(result).isNotNull(); + assertThat(result).isInstanceOf(Long.class); + } + } + } + + @Test + public void testGetBlockReadCpuTime() throws RocksDBException { + db.setPerfLevel(PerfLevel.ENABLE_TIME_AND_CPU_TIME_EXCEPT_FOR_MUTEX); + db.put("key".getBytes(), "value".getBytes()); + db.compactRange(); + db.get("key".getBytes()); + PerfContext ctx = db.getPerfContext(); + assertThat(ctx).isNotNull(); + assertThat(ctx.getBlockReadCpuTime()).isGreaterThan(0); + } + + @Test + public void testGetPostProcessTime() throws RocksDBException { + db.setPerfLevel(PerfLevel.ENABLE_TIME_AND_CPU_TIME_EXCEPT_FOR_MUTEX); + db.put("key".getBytes(), "value".getBytes()); + db.compactRange(); + db.get("key".getBytes()); + PerfContext ctx = db.getPerfContext(); + assertThat(ctx).isNotNull(); + assertThat(ctx.getPostProcessTime()).isGreaterThan(0); + } +} diff --git a/java/src/test/java/org/rocksdb/PerfLevelTest.java b/java/src/test/java/org/rocksdb/PerfLevelTest.java new file mode 100644 index 000000000..05532e556 --- /dev/null +++ b/java/src/test/java/org/rocksdb/PerfLevelTest.java @@ -0,0 +1,60 @@ +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.rocksdb.PerfLevel.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.*; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +public class PerfLevelTest { + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + List cfDescriptors; + List columnFamilyHandleList = new ArrayList<>(); + RocksDB db; + + @Before + public void before() throws RocksDBException { + cfDescriptors = Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), + new ColumnFamilyDescriptor("new_cf".getBytes())); + final DBOptions options = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + + db = RocksDB.open( + options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList); + } + + @After + public void after() { + for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { + columnFamilyHandle.close(); + } + db.close(); + } + @Test + public void testForInvalidValues() { + assertThatThrownBy(() -> db.setPerfLevel(UNINITIALIZED)) + .isInstanceOf(IllegalArgumentException.class); + assertThatThrownBy(() -> db.setPerfLevel(OUT_OF_BOUNDS)) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void testAllPerfLevels() { + for (PerfLevel level : new PerfLevel[] {DISABLE, ENABLE_COUNT, ENABLE_TIME_EXCEPT_FOR_MUTEX, + ENABLE_TIME_AND_CPU_TIME_EXCEPT_FOR_MUTEX, ENABLE_TIME}) { + db.setPerfLevel(level); + assertThat(db.getPerfLevel()).isEqualTo(level); + } + db.setPerfLevel(DISABLE); + } +} diff --git a/src.mk b/src.mk index f6927256a..a019205ae 100644 --- a/src.mk +++ b/src.mk @@ -668,6 +668,7 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/ingest_external_file_options.cc \ java/rocksjni/filter.cc \ java/rocksjni/iterator.cc \ + java/rocksjni/jni_perf_context.cc \ java/rocksjni/jnicallback.cc \ java/rocksjni/loggerjnicallback.cc \ java/rocksjni/lru_cache.cc \ From 5b11f5a3a294b8e4f2278f95c4236f5a2737ec03 Mon Sep 17 00:00:00 2001 From: anand76 Date: Tue, 10 Oct 2023 13:12:18 -0700 Subject: [PATCH 190/386] Add TieredCache and compressed cache capacity change to db_stress (#11935) Summary: Add `TieredCache` to the cache types tested by db_stress. Also add compressed secondary cache capacity change, and `WriteBufferManager` integration with `TieredCache` for memory charging. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11935 Test Plan: Run whitebox/blackbox crash tests locally Reviewed By: akankshamahajan15 Differential Revision: D50135365 Pulled By: anand1976 fbshipit-source-id: 7d73ed00c00a0953d86e49f35cce6bd550ba00f1 --- cache/secondary_cache_adapter.cc | 3 - db_stress_tool/db_stress_common.cc | 85 +++++++++++++++++++++++++++ db_stress_tool/db_stress_common.h | 12 +++- db_stress_tool/db_stress_driver.cc | 16 ++++- db_stress_tool/db_stress_gflags.cc | 30 +++++++--- db_stress_tool/db_stress_test_base.cc | 74 +++++++++++++++++++---- tools/db_crashtest.py | 27 ++++++++- 7 files changed, 220 insertions(+), 27 deletions(-) diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index d6b347246..b378197cc 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -507,7 +507,6 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( return s; } - assert(old_sec_capacity >= pri_cache_res_->GetTotalMemoryUsed()); size_t old_sec_reserved = old_sec_capacity - pri_cache_res_->GetTotalMemoryUsed(); // Calculate the new secondary cache reservation @@ -527,7 +526,6 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( // cache utilization (increase in capacity - increase in share of cache // reservation) // 3. Increase secondary cache capacity - assert(sec_reserved > old_sec_reserved || sec_reserved == 0); s = secondary_cache_->Deflate(sec_reserved - old_sec_reserved); assert(s.ok()); s = pri_cache_res_->UpdateCacheReservation( @@ -544,7 +542,6 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( // reservations) // 3. Inflate the secondary cache to give it back the reduction in its // share of cache reservations - assert(old_sec_reserved > sec_reserved || sec_reserved == 0); s = secondary_cache_->SetCapacity(sec_capacity); if (s.ok()) { s = pri_cache_res_->UpdateCacheReservation( diff --git a/db_stress_tool/db_stress_common.cc b/db_stress_tool/db_stress_common.cc index 6a5986665..c7a8ef0a2 100644 --- a/db_stress_tool/db_stress_common.cc +++ b/db_stress_tool/db_stress_common.cc @@ -13,6 +13,7 @@ #include +#include "rocksdb/secondary_cache.h" #include "util/file_checksum_helper.h" #include "util/xxhash.h" @@ -21,6 +22,8 @@ ROCKSDB_NAMESPACE::Env* db_stress_env = nullptr; // If non-null, injects read error at a rate specified by the // read_fault_one_in or write_fault_one_in flag std::shared_ptr fault_fs_guard; +std::shared_ptr compressed_secondary_cache; +std::shared_ptr block_cache; enum ROCKSDB_NAMESPACE::CompressionType compression_type_e = ROCKSDB_NAMESPACE::kSnappyCompression; enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e = @@ -148,6 +151,88 @@ void DbVerificationThread(void* v) { } } +void CompressedCacheSetCapacityThread(void* v) { + assert(FLAGS_compressed_secondary_cache_size > 0 || + FLAGS_compressed_secondary_cache_ratio > 0.0); + auto* thread = reinterpret_cast(v); + SharedState* shared = thread->shared; + while (true) { + { + MutexLock l(shared->GetMutex()); + if (shared->ShouldStopBgThread()) { + shared->IncBgThreadsFinished(); + if (shared->BgThreadsFinished()) { + shared->GetCondVar()->SignalAll(); + } + return; + } + } + db_stress_env->SleepForMicroseconds(FLAGS_secondary_cache_update_interval); + if (FLAGS_compressed_secondary_cache_size > 0) { + Status s = compressed_secondary_cache->SetCapacity(0); + size_t capacity; + if (s.ok()) { + s = compressed_secondary_cache->GetCapacity(capacity); + assert(capacity == 0); + } + db_stress_env->SleepForMicroseconds(10 * 1000 * 1000); + if (s.ok()) { + s = compressed_secondary_cache->SetCapacity( + FLAGS_compressed_secondary_cache_size); + } + if (s.ok()) { + s = compressed_secondary_cache->GetCapacity(capacity); + assert(capacity == FLAGS_compressed_secondary_cache_size); + } + if (!s.ok()) { + fprintf(stderr, "Compressed cache Set/GetCapacity returned error: %s\n", + s.ToString().c_str()); + } + } else if (FLAGS_compressed_secondary_cache_ratio > 0.0) { + if (thread->rand.OneIn(2)) { + size_t capacity = block_cache->GetCapacity(); + size_t adjustment; + if (FLAGS_use_write_buffer_manager && FLAGS_db_write_buffer_size > 0) { + adjustment = (capacity - FLAGS_db_write_buffer_size); + } else { + adjustment = capacity; + } + // Lower by upto 50% of usable block cache capacity + adjustment = (adjustment * thread->rand.Uniform(50)) / 100; + block_cache->SetCapacity(capacity - adjustment); + fprintf(stderr, "New cache capacity = %lu\n", + block_cache->GetCapacity()); + db_stress_env->SleepForMicroseconds(10 * 1000 * 1000); + block_cache->SetCapacity(capacity); + } else { + Status s; + double new_comp_cache_ratio = + (double)thread->rand.Uniform( + FLAGS_compressed_secondary_cache_ratio * 100) / + 100; + if (new_comp_cache_ratio == 0.0) { + new_comp_cache_ratio = 0.05; + } + fprintf(stderr, "New comp cache ratio = %f\n", new_comp_cache_ratio); + + s = UpdateTieredCache(block_cache, /*capacity*/ -1, + new_comp_cache_ratio); + if (s.ok()) { + db_stress_env->SleepForMicroseconds(10 * 1000 * 1000); + } + if (s.ok()) { + s = UpdateTieredCache(block_cache, /*capacity*/ -1, + FLAGS_compressed_secondary_cache_ratio); + } + if (!s.ok()) { + fprintf(stderr, "UpdateTieredCache returned error: %s\n", + s.ToString().c_str()); + } + } + } + } +} + void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz) { if (!FLAGS_verbose) { return; diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 93b5f32d2..485400e05 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -108,11 +108,14 @@ DECLARE_int32(max_write_buffer_number); DECLARE_int32(min_write_buffer_number_to_merge); DECLARE_int32(max_write_buffer_number_to_maintain); DECLARE_int64(max_write_buffer_size_to_maintain); +DECLARE_bool(use_write_buffer_manager); DECLARE_double(memtable_prefix_bloom_size_ratio); DECLARE_bool(memtable_whole_key_filtering); DECLARE_int32(open_files); -DECLARE_int64(compressed_cache_size); -DECLARE_int32(compressed_cache_numshardbits); +DECLARE_uint64(compressed_secondary_cache_size); +DECLARE_int32(compressed_secondary_cache_numshardbits); +DECLARE_int32(secondary_cache_update_interval); +DECLARE_double(compressed_secondary_cache_ratio); DECLARE_int32(compaction_style); DECLARE_int32(compaction_pri); DECLARE_int32(num_levels); @@ -358,6 +361,9 @@ constexpr int kValueMaxLen = 100; extern ROCKSDB_NAMESPACE::Env* db_stress_env; extern ROCKSDB_NAMESPACE::Env* db_stress_listener_env; extern std::shared_ptr fault_fs_guard; +extern std::shared_ptr + compressed_secondary_cache; +extern std::shared_ptr block_cache; extern enum ROCKSDB_NAMESPACE::CompressionType compression_type_e; extern enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e; @@ -650,6 +656,8 @@ extern void PoolSizeChangeThread(void* v); extern void DbVerificationThread(void* v); +extern void CompressedCacheSetCapacityThread(void* v); + extern void TimestampedSnapshotsThread(void* v); extern void PrintKeyValue(int cf, uint64_t key, const char* value, size_t sz); diff --git a/db_stress_tool/db_stress_driver.cc b/db_stress_tool/db_stress_driver.cc index 2ab0b0d71..92730beca 100644 --- a/db_stress_tool/db_stress_driver.cc +++ b/db_stress_tool/db_stress_driver.cc @@ -121,6 +121,11 @@ bool RunStressTestImpl(SharedState* shared) { shared->IncBgThreads(); } + if (FLAGS_compressed_secondary_cache_size > 0 || + FLAGS_compressed_secondary_cache_ratio > 0.0) { + shared->IncBgThreads(); + } + std::vector threads(n); for (uint32_t i = 0; i < n; i++) { threads[i] = new ThreadState(i, shared); @@ -138,6 +143,13 @@ bool RunStressTestImpl(SharedState* shared) { &continuous_verification_thread); } + ThreadState compressed_cache_set_capacity_thread(0, shared); + if (FLAGS_compressed_secondary_cache_size > 0 || + FLAGS_compressed_secondary_cache_ratio > 0.0) { + db_stress_env->StartThread(CompressedCacheSetCapacityThread, + &compressed_cache_set_capacity_thread); + } + // Each thread goes through the following states: // initializing -> wait for others to init -> read/populate/depopulate // wait for others to operate -> verify -> done @@ -230,7 +242,9 @@ bool RunStressTestImpl(SharedState* shared) { } if (FLAGS_compaction_thread_pool_adjust_interval > 0 || - FLAGS_continuous_verification_interval > 0) { + FLAGS_continuous_verification_interval > 0 || + FLAGS_compressed_secondary_cache_size > 0 || + FLAGS_compressed_secondary_cache_ratio > 0.0) { MutexLock l(shared->GetMutex()); shared->SetShouldStopBgThread(); while (!shared->BgThreadsFinished()) { diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index cdea77c19..cd1c978b8 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -136,6 +136,9 @@ DEFINE_uint64(db_write_buffer_size, ROCKSDB_NAMESPACE::Options().db_write_buffer_size, "Number of bytes to buffer in all memtables before compacting"); +DEFINE_bool(use_write_buffer_manager, false, + "Charge WriteBufferManager memory to the block cache"); + DEFINE_int32( write_buffer_size, static_cast(ROCKSDB_NAMESPACE::Options().write_buffer_size), @@ -198,15 +201,23 @@ DEFINE_int32(open_files, ROCKSDB_NAMESPACE::Options().max_open_files, "Maximum number of files to keep open at the same time " "(use default if == 0)"); -DEFINE_int64(compressed_cache_size, 0, - "Number of bytes to use as a cache of compressed data." - " 0 means use default settings."); +DEFINE_uint64(compressed_secondary_cache_size, 0, + "Number of bytes to use as a cache of compressed data." + " 0 means use default settings."); -DEFINE_int32( - compressed_cache_numshardbits, -1, - "Number of shards for the compressed block cache is 2 ** " - "compressed_cache_numshardbits. Negative value means default settings. " - "This is applied only if compressed_cache_size is greater than 0."); +DEFINE_int32(compressed_secondary_cache_numshardbits, -1, + "Number of shards for the compressed secondary cache is 2 ** " + "compressed_secondary_cache_numshardbits. " + "Negative value means default settings. This is applied only " + "if compressed_secondary_cache_size is greater than 0."); + +DEFINE_double(compressed_secondary_cache_ratio, 0.0, + "Fraction of block cache memory budget to use for compressed " + "secondary cache"); + +DEFINE_int32(secondary_cache_update_interval, 30 * 1000 * 1000, + "Interval between modification of secondary cache parameters, in " + "microseconds"); DEFINE_int32(compaction_style, ROCKSDB_NAMESPACE::Options().compaction_style, ""); @@ -1023,6 +1034,9 @@ DEFINE_string(secondary_cache_uri, "", DEFINE_int32(secondary_cache_fault_one_in, 0, "On non-zero, enables fault injection in secondary cache inserts" " and lookups"); +DEFINE_double(tiered_cache_percent_compressed, 0.0, + "Percentage of total block cache budget to allocate to the " + "compressed cache"); DEFINE_int32(open_write_fault_one_in, 0, "On non-zero, enables fault injection on file writes " "during DB reopen."); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 5b843eb5d..02933e00c 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -112,6 +112,11 @@ std::shared_ptr StressTest::NewCache(size_t capacity, std::shared_ptr secondary_cache; if (!FLAGS_secondary_cache_uri.empty()) { + assert(!strstr(FLAGS_secondary_cache_uri.c_str(), + "compressed_secondary_cache") || + (FLAGS_compressed_secondary_cache_size == 0 && + FLAGS_compressed_secondary_cache_ratio == 0.0 && + !StartsWith(FLAGS_cache_type, "tiered_"))); Status s = SecondaryCache::CreateFromString( config_options, FLAGS_secondary_cache_uri, &secondary_cache); if (secondary_cache == nullptr) { @@ -125,36 +130,81 @@ std::shared_ptr StressTest::NewCache(size_t capacity, secondary_cache, static_cast(FLAGS_seed), FLAGS_secondary_cache_fault_one_in); } + } else if (FLAGS_compressed_secondary_cache_size > 0) { + if (StartsWith(FLAGS_cache_type, "tiered_")) { + fprintf(stderr, + "Cannot specify both compressed_secondary_cache_size and %s\n", + FLAGS_cache_type.c_str()); + exit(1); + } + CompressedSecondaryCacheOptions opts; + opts.capacity = FLAGS_compressed_secondary_cache_size; + secondary_cache = NewCompressedSecondaryCache(opts); + if (secondary_cache == nullptr) { + fprintf(stderr, "Failed to allocate compressed secondary cache\n"); + exit(1); + } + compressed_secondary_cache = secondary_cache; } - if (FLAGS_cache_type == "clock_cache") { + std::string cache_type = FLAGS_cache_type; + size_t cache_size = FLAGS_cache_size; + bool tiered = false; + if (StartsWith(cache_type, "tiered_")) { + tiered = true; + cache_type.erase(0, strlen("tiered_")); + } + if (FLAGS_use_write_buffer_manager) { + cache_size += FLAGS_db_write_buffer_size; + } + if (cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); - } else if (EndsWith(FLAGS_cache_type, "hyper_clock_cache")) { + } else if (EndsWith(cache_type, "hyper_clock_cache")) { size_t estimated_entry_charge; - if (FLAGS_cache_type == "fixed_hyper_clock_cache" || - FLAGS_cache_type == "hyper_clock_cache") { + if (cache_type == "fixed_hyper_clock_cache" || + cache_type == "hyper_clock_cache") { estimated_entry_charge = FLAGS_block_size; - } else if (FLAGS_cache_type == "auto_hyper_clock_cache") { + } else if (cache_type == "auto_hyper_clock_cache") { estimated_entry_charge = 0; } else { fprintf(stderr, "Cache type not supported."); exit(1); } - HyperClockCacheOptions opts(FLAGS_cache_size, estimated_entry_charge, + HyperClockCacheOptions opts(cache_size, estimated_entry_charge, num_shard_bits); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); - return opts.MakeSharedCache(); - } else if (FLAGS_cache_type == "lru_cache") { + if (tiered) { + TieredCacheOptions tiered_opts; + tiered_opts.cache_opts = &opts; + tiered_opts.cache_type = PrimaryCacheType::kCacheTypeHCC; + tiered_opts.total_capacity = cache_size; + tiered_opts.compressed_secondary_ratio = 0.5; + block_cache = NewTieredCache(tiered_opts); + } else { + opts.secondary_cache = std::move(secondary_cache); + block_cache = opts.MakeSharedCache(); + } + } else if (EndsWith(cache_type, "lru_cache")) { LRUCacheOptions opts; opts.capacity = capacity; opts.num_shard_bits = num_shard_bits; - opts.secondary_cache = std::move(secondary_cache); - return NewLRUCache(opts); + if (tiered) { + TieredCacheOptions tiered_opts; + tiered_opts.cache_opts = &opts; + tiered_opts.cache_type = PrimaryCacheType::kCacheTypeLRU; + tiered_opts.total_capacity = cache_size; + tiered_opts.compressed_secondary_ratio = 0.5; + block_cache = NewTieredCache(tiered_opts); + } else { + opts.secondary_cache = std::move(secondary_cache); + block_cache = NewLRUCache(opts); + } } else { fprintf(stderr, "Cache type not supported."); exit(1); } + return block_cache; } std::vector StressTest::GetBlobCompressionTags() { @@ -3153,6 +3203,10 @@ void InitializeOptionsFromFlags( FLAGS_max_write_buffer_size_to_maintain; options.memtable_prefix_bloom_size_ratio = FLAGS_memtable_prefix_bloom_size_ratio; + if (FLAGS_use_write_buffer_manager) { + options.write_buffer_manager.reset( + new WriteBufferManager(FLAGS_db_write_buffer_size, block_cache)); + } options.memtable_whole_key_filtering = FLAGS_memtable_whole_key_filtering; options.disable_auto_compactions = FLAGS_disable_auto_compactions; options.max_background_compactions = FLAGS_max_background_compactions; diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 7b2ce96a1..15d974ab8 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -43,7 +43,7 @@ [random.randint(0, 19), random.lognormvariate(2.3, 1.3)] ), "cache_index_and_filter_blocks": lambda: random.randint(0, 1), - "cache_size": 8388608, + "cache_size": lambda: random.choice([8388608, 33554432]), "charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]), "charge_filter_construction": lambda: random.choice([0, 1]), "charge_table_reader": lambda: random.choice([0, 1]), @@ -126,7 +126,9 @@ "mock_direct_io": False, "cache_type": lambda: random.choice( ["lru_cache", "fixed_hyper_clock_cache", "auto_hyper_clock_cache", - "auto_hyper_clock_cache"] + "auto_hyper_clock_cache", "tiered_lru_cache", + "tiered_fixed_hyper_clock_cache", "tiered_auto_hyper_clock_cache", + "tiered_auto_hyper_clock_cache"] ), "use_full_merge_v1": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1), @@ -163,6 +165,7 @@ "db_write_buffer_size": lambda: random.choice( [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024] ), + "use_write_buffer_manager": lambda: random.randint(0,1), "avoid_unnecessary_blocking_io": random.randint(0, 1), "write_dbid_to_manifest": random.randint(0, 1), "avoid_flush_during_recovery": lambda: random.choice( @@ -191,6 +194,7 @@ ), "user_timestamp_size": 0, "secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]), + "compressed_secondary_cache_size": lambda: random.choice([8388608, 16777216]), "prepopulate_block_cache": lambda: random.choice([0, 1]), "memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]), "memtable_whole_key_filtering": lambda: random.randint(0, 1), @@ -202,7 +206,8 @@ "secondary_cache_uri": lambda: random.choice( [ "", - "compressed_secondary_cache://capacity=8388608", + "", + "", "compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true", ] ), @@ -681,6 +686,22 @@ def finalize_and_sanitize(src_params): if dest_params["write_fault_one_in"] > 0: # background work may be disabled while DB is resuming after some error dest_params["max_write_buffer_number"] = max(dest_params["max_write_buffer_number"], 10) + if dest_params["secondary_cache_uri"].find("compressed_secondary_cache") >= 0: + dest_params["compressed_secondary_cache_size"] = 0 + dest_params["compressed_secondary_cache_ratio"] = 0.0 + if dest_params["cache_type"].find("tiered_") >= 0: + if dest_params["compressed_secondary_cache_size"] > 0: + dest_params["compressed_secondary_cache_ratio"] = \ + float(dest_params["compressed_secondary_cache_size"]/ \ + (dest_params["cache_size"] + dest_params["compressed_secondary_cache_size"])) + dest_params["compressed_secondary_cache_size"] = 0 + else: + dest_params["compressed_secondary_cache_ratio"] = 0.0 + dest_params["cache_type"] = dest_params["cache_type"].replace("tiered_", "") + if dest_params["use_write_buffer_manager"]: + if (dest_params["cache_size"] <= 0 + or dest_params["db_write_buffer_size"] <= 0): + dest_params["use_write_buffer_manager"] = 0 return dest_params From 4bd5aa4f5541d22ae52ff1ee2f6c9b782cf5ccdc Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 11 Oct 2023 09:42:48 -0700 Subject: [PATCH 191/386] Fix two `ErrorHandler` race conditions (#11939) Summary: 1. Prevent a double join on a `port::Thread` 2. Ensure `recovery_in_prog_` and `bg_error_` are both set under same lock hold. This is useful for writers who see a non-OK `bg_error_` and are deciding whether to stall based on whether the error will be auto-recovered. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11939 Reviewed By: cbi42 Differential Revision: D50155484 Pulled By: ajkr fbshipit-source-id: fbc1f85c50e7eaee27ee0e376aee688d8a06c93b --- db/error_handler.cc | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/db/error_handler.cc b/db/error_handler.cc index 018183ba6..c7dd4750f 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -638,16 +638,22 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( ROCKS_LOG_INFO( db_options_.info_log, "ErrorHandler: Call StartRecoverFromRetryableBGIOError to resume\n"); + // Needs to be set in the same lock hold as setting BG error, otherwise + // intervening writes could see a BG error without a recovery and bail out. + recovery_in_prog_ = true; + if (recovery_thread_) { + // Ensure only one thread can execute the join(). + std::unique_ptr old_recovery_thread( + std::move(recovery_thread_)); // In this case, if recovery_in_prog_ is false, current thread should // wait the previous recover thread to finish and create a new thread // to recover from the bg error. db_mutex_->Unlock(); - recovery_thread_->join(); + old_recovery_thread->join(); db_mutex_->Lock(); } - recovery_in_prog_ = true; TEST_SYNC_POINT("StartRecoverFromRetryableBGIOError::in_progress"); recovery_thread_.reset( new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); @@ -790,12 +796,15 @@ void ErrorHandler::EndAutoRecovery() { if (!end_recovery_) { end_recovery_ = true; } - cv_.SignalAll(); - db_mutex_->Unlock(); if (recovery_thread_) { - recovery_thread_->join(); + // Ensure only one thread can execute the join(). + std::unique_ptr old_recovery_thread( + std::move(recovery_thread_)); + db_mutex_->Unlock(); + cv_.SignalAll(); + old_recovery_thread->join(); + db_mutex_->Lock(); } - db_mutex_->Lock(); return; } From 20b4f1356edd14370bb30b15682fe66f1476bd87 Mon Sep 17 00:00:00 2001 From: anand76 Date: Wed, 11 Oct 2023 11:28:00 -0700 Subject: [PATCH 192/386] Enable write fault injection in db_stress (#11924) Summary: This PR depends on https://github.com/facebook/rocksdb/issues/11879 . Enable write fault injection for the basic whitebox, blackbox, and cf_consistency modes. For other test modes like multiops_txn, best_efforts_recovery etc., leave it disabled for now until we can do more testing. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11924 Reviewed By: ajkr Differential Revision: D50178252 Pulled By: anand1976 fbshipit-source-id: 5794f81c14cded1eb28762b2de818dfff1c1a34c --- db_stress_tool/db_stress_test_base.cc | 30 +++++++++++++++------------ tools/db_crashtest.py | 6 +----- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 02933e00c..62ddead7b 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1882,7 +1882,7 @@ Status StressTest::TestBackupRestore( from = "Destroy restore dir"; } } - if (!s.ok()) { + if (!s.ok() && (!s.IsIOError() || !std::strstr(s.getState(), "injected"))) { fprintf(stderr, "Failure in %s with: %s\n", from.c_str(), s.ToString().c_str()); } @@ -1958,17 +1958,19 @@ Status StressTest::TestCheckpoint(ThreadState* thread, if (s.ok()) { s = checkpoint->CreateCheckpoint(checkpoint_dir); if (!s.ok()) { - fprintf(stderr, "Fail to create checkpoint to %s\n", - checkpoint_dir.c_str()); - std::vector files; - Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files); - if (my_s.ok()) { - for (const auto& f : files) { - fprintf(stderr, " %s\n", f.c_str()); + if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + fprintf(stderr, "Fail to create checkpoint to %s\n", + checkpoint_dir.c_str()); + std::vector files; + Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files); + if (my_s.ok()) { + for (const auto& f : files) { + fprintf(stderr, " %s\n", f.c_str()); + } + } else { + fprintf(stderr, "Fail to get files under the directory to %s\n", + my_s.ToString().c_str()); } - } else { - fprintf(stderr, "Fail to get files under the directory to %s\n", - my_s.ToString().c_str()); } } } @@ -2044,8 +2046,10 @@ Status StressTest::TestCheckpoint(ThreadState* thread, } if (!s.ok()) { - fprintf(stderr, "A checkpoint operation failed with: %s\n", - s.ToString().c_str()); + if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + fprintf(stderr, "A checkpoint operation failed with: %s\n", + s.ToString().c_str()); + } } else { DestroyDB(checkpoint_dir, tmp_opts); } diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 15d974ab8..8952aed1f 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -182,7 +182,7 @@ "max_key_len": 3, "key_len_percent_dist": "1,30,69", "read_fault_one_in": lambda: random.choice([0, 32, 1000]), - "write_fault_one_in": 0, + "write_fault_one_in": lambda: random.choice([0, 128, 1000]), "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]), "open_write_fault_one_in": lambda: random.choice([0, 0, 16]), "open_read_fault_one_in": lambda: random.choice([0, 0, 32]), @@ -380,10 +380,6 @@ def is_direct_io_supported(dbname): # use small value for write_buffer_size so that RocksDB triggers flush # more frequently "write_buffer_size": 1024 * 1024, - # Small write buffer size with more frequent flush has a higher chance - # of hitting write error. DB may be stopped if memtable fills up during - # auto resume. - "write_fault_one_in": 0, "enable_pipelined_write": lambda: random.randint(0, 1), # Snapshots are used heavily in this test mode, while they are incompatible # with compaction filter. From 5e2906c288bf67307b3d51b755b300e24b349944 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 11 Oct 2023 12:56:39 -0700 Subject: [PATCH 193/386] Add missing copyright headers to files added in PR 11805 (#11942) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11942 Reviewed By: akankshamahajan15 Differential Revision: D50188986 fbshipit-source-id: 56a8e72eac085470824e33f2126d2a6ec3880400 --- java/rocksjni/jni_perf_context.cc | 7 ++++++- java/src/main/java/org/rocksdb/PerfContext.java | 5 +++++ java/src/main/java/org/rocksdb/PerfLevel.java | 5 +++++ java/src/test/java/org/rocksdb/PerfContextTest.java | 5 +++++ java/src/test/java/org/rocksdb/PerfLevelTest.java | 5 +++++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/java/rocksjni/jni_perf_context.cc b/java/rocksjni/jni_perf_context.cc index e38f3fea4..e0124fdaa 100644 --- a/java/rocksjni/jni_perf_context.cc +++ b/java/rocksjni/jni_perf_context.cc @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + #include #include "include/org_rocksdb_PerfContext.h" @@ -1180,4 +1185,4 @@ jlong Java_org_rocksdb_PerfContext_getNumberAsyncSeek(JNIEnv*, jobject, ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); return perf_context->number_async_seek; -} \ No newline at end of file +} diff --git a/java/src/main/java/org/rocksdb/PerfContext.java b/java/src/main/java/org/rocksdb/PerfContext.java index 0adac26fa..3934e4115 100644 --- a/java/src/main/java/org/rocksdb/PerfContext.java +++ b/java/src/main/java/org/rocksdb/PerfContext.java @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + package org.rocksdb; public class PerfContext extends RocksObject { diff --git a/java/src/main/java/org/rocksdb/PerfLevel.java b/java/src/main/java/org/rocksdb/PerfLevel.java index 1cb07f042..a5b452640 100644 --- a/java/src/main/java/org/rocksdb/PerfLevel.java +++ b/java/src/main/java/org/rocksdb/PerfLevel.java @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + package org.rocksdb; public enum PerfLevel { diff --git a/java/src/test/java/org/rocksdb/PerfContextTest.java b/java/src/test/java/org/rocksdb/PerfContextTest.java index ae70afc0a..3145b59e4 100644 --- a/java/src/test/java/org/rocksdb/PerfContextTest.java +++ b/java/src/test/java/org/rocksdb/PerfContextTest.java @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/PerfLevelTest.java b/java/src/test/java/org/rocksdb/PerfLevelTest.java index 05532e556..bb766cbd4 100644 --- a/java/src/test/java/org/rocksdb/PerfLevelTest.java +++ b/java/src/test/java/org/rocksdb/PerfLevelTest.java @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; From d367b34cc921f4ac2aa358c324a8a8f1f47589f0 Mon Sep 17 00:00:00 2001 From: anand76 Date: Wed, 11 Oct 2023 13:28:10 -0700 Subject: [PATCH 194/386] Fix TSAN crash test false positive (#11941) Summary: Fix the TSAN false positive caused by reading a bool flag without synchronization. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11941 Test Plan: Run tsan crash test locally Reviewed By: akankshamahajan15 Differential Revision: D50181799 Pulled By: anand1976 fbshipit-source-id: 889e7237e9f3c9452a9df94a0d949db5fe13bb57 --- cache/compressed_secondary_cache.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index 32b30f0ed..b29670b77 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -33,9 +33,13 @@ std::unique_ptr CompressedSecondaryCache::Lookup( Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase, bool& kept_in_sec_cache) { assert(helper); + // This is a minor optimization. Its ok to skip it in TSAN in order to + // avoid a false positive. +#ifndef __SANITIZE_THREAD__ if (disable_cache_) { return nullptr; } +#endif std::unique_ptr handle; kept_in_sec_cache = false; From d2daa10afcb3f19e2351e19b0295f498b78b6019 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Wed, 11 Oct 2023 14:26:10 -0700 Subject: [PATCH 195/386] Fix crash_test_with_best_efforts_recovery (#11938) Summary: Thanks ltamasi and ajkr for initial investigations on the test failure. Per the investigations, the following scenario is likely causing the test to fail. 1. Recovery is needed (could be any reason during crash test) 2. Trying to recover from the latest manifest fails (likely due to read error injection) 3. DB opens with recovery from the next manifest which is different from step 2. 4. Expected state is based on the manifest we tried and failed in step 2. 5. Two manifests used in step 2 and 3 are confirmed to have difference in LSM trees (Thanks ltamasi again for the finding). ``` 2023/10/05-11:24:18.942189 56341 [db/version_set.cc:6079] Trying to recover from manifest: /dev/shm/rocksdb_test/rocksdb_crashtest_blackbox/MANIFEST-007184 ... 2023/10/05-11:24:18.978007 56341 [db/version_set.cc:6079] Trying to recover from manifest: /dev/shm/rocksdb_test/rocksdb_crashtest_blackbox/MANIFEST-007180 ``` ``` [ltamasi@devbig1024.prn1 /tmp/x]$ ldb manifest_dump --hex --path=MANIFEST-007184_renamed_ > 2 [ltamasi@devbig1024.prn1 /tmp/x]$ ldb manifest_dump --hex --path=MANIFEST-007180_renamed_ > 1 [ltamasi@devbig1024.prn1 /tmp/x]$ diff 1 2 --- 1 2023-10-09 10:29:16.966215207 -0700 +++ 2 2023-10-09 10:29:11.984241645 -0700 @@ -13,7 +13,7 @@ 7174:3950254[1875617 .. 2203952]['000000000003415B000000000000012B000000000000007D' seq:1906214, type:1 .. '000000000003CA59000000000000012B000000000000005C' seq:2039838, type:1] 7175:88060[2074748 .. 2203892]['000000000003CA6300000000000000CF78787878787878' seq:2167539, type:2 .. '000000000003D08F000000000000012B0000000000000130' seq:2112478, type:0] --- level 6 --- version# 1 --- - 7057:3132633[0 .. 2046144]['0000000000000009000000000000000978' seq:0, type:1 .. '0000000000005F8B000000000000012B00000000000002AC' seq:0, type:1] + 7219:2135565[0 .. 2046144]['0000000000000009000000000000000978' seq:0, type:1 .. '0000000000005F8B000000000000012B00000000000002AC' seq:0, type:1] 7061:827724[0 .. 2046131]['0000000000005F95000000000000000778787878787878' seq:0, type:1 .. '000000000000784F000000000000012B0000000000000113' seq:0, type:1] 6763:1352[0 .. 0]['000000000000784F000000000000012B0000000000000129' seq:0, type:1 .. '000000000000784F000000000000012B0000000000000129' seq:0, type:1] 7173:4812291[0 .. 2203957]['000000000000784F000000000000012B0000000000000138' seq:0, type:1 .. '0000000000020FAE787878787878' seq:0, type:1] @@ -77,4 +77,4 @@ --- level 61 --- version# 1 --- --- level 62 --- version# 1 --- --- level 63 --- version# 1 --- -next_file_number 7182 last_sequence 2203963 prev_log_number 0 max_column_family 0 min_log_number_to_keep 7015 +next_file_number 7221 last_sequence 2203963 prev_log_number 0 max_column_family 0 min_log_number_to_keep 7015 ``` We have two options to fix this. Either skip verification against expected state or disable read injection when BE recovery is enabled. I chose to skip verification against expected state per discussion. (See comments in this PR) Please note that some linter changes were included in this PR. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11938 Test Plan: ``` TEST_TMPDIR=/dev/shm/rocksdb make crash_test_with_best_efforts_recovery ``` Reviewed By: ltamasi Differential Revision: D50136341 Pulled By: jaykorean fbshipit-source-id: ac7434d592aebc148bfc3a4fcaa34936f136b95c --- db_stress_tool/db_stress_tool.cc | 6 +++--- tools/db_crashtest.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 9c57dafd7..3c50cdb88 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -240,10 +240,10 @@ int db_stress_tool(int argc, char** argv) { FLAGS_secondaries_base = default_secondaries_path; } - if (FLAGS_best_efforts_recovery && !FLAGS_skip_verifydb && - !FLAGS_disable_wal) { + if (FLAGS_best_efforts_recovery && + !(FLAGS_skip_verifydb && FLAGS_disable_wal)) { fprintf(stderr, - "With best-efforts recovery, either skip_verifydb or disable_wal " + "With best-efforts recovery, skip_verifydb and disable_wal " "should be set to true.\n"); exit(1); } diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 8952aed1f..5c37b7b95 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -160,12 +160,12 @@ "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]), "bytes_per_sync": lambda: random.choice([0, 262144]), "wal_bytes_per_sync": lambda: random.choice([0, 524288]), - "compaction_readahead_size" : lambda : random.choice( + "compaction_readahead_size": lambda: random.choice( [0, 0, 1024 * 1024]), "db_write_buffer_size": lambda: random.choice( [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024] ), - "use_write_buffer_manager": lambda: random.randint(0,1), + "use_write_buffer_manager": lambda: random.randint(0, 1), "avoid_unnecessary_blocking_io": random.randint(0, 1), "write_dbid_to_manifest": random.randint(0, 1), "avoid_flush_during_recovery": lambda: random.choice( @@ -221,7 +221,9 @@ "preserve_internal_time_seconds": lambda: random.choice([0, 60, 3600, 36000]), "memtable_max_range_deletions": lambda: random.choice([0] * 6 + [100, 1000]), # 0 (disable) is the default and more commonly used value. - "bottommost_file_compaction_delay": lambda: random.choice([0, 0, 0, 600, 3600, 86400]), + "bottommost_file_compaction_delay": lambda: random.choice( + [0, 0, 0, 600, 3600, 86400] + ), "auto_readahead_size" : lambda: random.choice([0, 1]), } @@ -423,6 +425,8 @@ def is_direct_io_supported(dbname): "atomic_flush": 0, "disable_wal": 1, "column_families": 1, + "skip_verifydb": 1, + "verify_db_one_in": 0 } blob_params = { @@ -664,6 +668,8 @@ def finalize_and_sanitize(src_params): dest_params["enable_compaction_filter"] = 0 dest_params["sync"] = 0 dest_params["write_fault_one_in"] = 0 + dest_params["skip_verifydb"] = 1 + dest_params["verify_db_one_in"] = 0 # Remove the following once write-prepared/write-unprepared with/without # unordered write supports timestamped snapshots if dest_params.get("create_timestamped_snapshot_one_in", 0) > 0: @@ -774,7 +780,7 @@ def gen_cmd(params, unknown_params): "stress_cmd", "test_tiered_storage", "cleanup_cmd", - "skip_tmpdir_check" + "skip_tmpdir_check", } and v is not None ] From b2fe14817e8e3e9dce1d1483a5422711a07a56be Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Thu, 12 Oct 2023 09:39:01 -0700 Subject: [PATCH 196/386] java API - load block based table config (#10826) Summary: Closes https://github.com/facebook/rocksdb/issues/5297 The BlockBasedTableConfig (or more generally, the TableFormatConfig) of ColumnFamilyOptions, isn't being constructed when column family options are loaded. This happens in `OptionsUtil` which implements the loading. In `OptionsUtil` we add the method `private native static TableFormatConfig readTableFormatConfig(final long nativeHandle_)` which defers to a JNI method which creates a `TableFormatConfig` (specifically a `BlockBasedTableConfig`) for the supplied `ColumnFamilyOptions`, by copying the table format attached to the C++ column family options. A new Java constructor for `BlockBasedTableConfig` is implemented which is called from C++ with the parameters retrieved from the table format, and then returned to the calling `readTableFormatConfig`. At the Java side in `OptionsUtil`, the new `TableFormatConfig` is added as the `tableFormatConfig_` field of the `ColumnFamilyOptions`. To support this, the new class `BlockBasedTableOptionsJni` and associated support methods are added to 'portal.h'. `BloomFilter.java` has a constructor and field added so that the filter in use can be read back and inspected. `FilterPolicyType.java` implements an enum (shadowed in C++) to support transfer of filter policy information back to Java from being read at the C++ side. Tests written to cover the block based table config, and cleaned up and generalised a bit as some of the methods on OptionsUtil weren't tested; and these had their own unique JNI method variants which in turn were never exercised in test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10826 Reviewed By: ajkr Differential Revision: D50136247 Pulled By: jowlyzhang fbshipit-source-id: 39387448147abc574e99f43979d89b0900e5f81d --- java/CMakeLists.txt | 1 + java/rocksjni/options_util.cc | 48 +++ java/rocksjni/portal.h | 139 +++++++- .../org/rocksdb/BlockBasedTableConfig.java | 52 ++- .../main/java/org/rocksdb/BloomFilter.java | 32 +- .../java/org/rocksdb/ColumnFamilyOptions.java | 5 +- .../java/org/rocksdb/FilterPolicyType.java | 49 +++ java/src/main/java/org/rocksdb/Options.java | 1 - .../main/java/org/rocksdb/OptionsUtil.java | 12 + .../java/org/rocksdb/OptionsUtilTest.java | 335 +++++++++++++++--- .../block_based/block_based_table_factory.cc | 4 +- 11 files changed, 623 insertions(+), 55 deletions(-) create mode 100644 java/src/main/java/org/rocksdb/FilterPolicyType.java diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index dbc40b292..4126ebe29 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -152,6 +152,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/Experimental.java src/main/java/org/rocksdb/ExternalFileIngestionInfo.java src/main/java/org/rocksdb/Filter.java + src/main/java/org/rocksdb/FilterPolicyType.java src/main/java/org/rocksdb/FileOperationInfo.java src/main/java/org/rocksdb/FlushJobInfo.java src/main/java/org/rocksdb/FlushReason.java diff --git a/java/rocksjni/options_util.cc b/java/rocksjni/options_util.cc index c3d7fcef6..5ebdbba92 100644 --- a/java/rocksjni/options_util.cc +++ b/java/rocksjni/options_util.cc @@ -137,3 +137,51 @@ jstring Java_org_rocksdb_OptionsUtil_getLatestOptionsFileName( return env->NewStringUTF(options_file_name.c_str()); } } + +/* + * Class: org_rocksdb_OptionsUtil + * Method: readTableFormatConfig + * Signature: (J)Lorg/rocksdb/TableFormatConfig; + */ +jobject Java_org_rocksdb_OptionsUtil_readTableFormatConfig(JNIEnv* env, jclass, + jlong jcf_options) { + if (jcf_options == 0) { + env->ThrowNew( + ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::getJClass(env), + "Null column family options handle supplied to " + "readNewTableFormatConfig"); + return nullptr; + } + + auto* cf_options = + reinterpret_cast(jcf_options); + auto* table_factory = cf_options->table_factory.get(); + if (table_factory == nullptr) { + env->ThrowNew( + ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::getJClass(env), + "Column family options supplied to readNewTableFormatConfig has no " + "table options"); + return nullptr; + } + + if (strcmp(ROCKSDB_NAMESPACE::TableFactory::kBlockBasedTableName(), + table_factory->Name()) == 0) { + auto* table_factory_options = + table_factory->GetOptions(); + if (table_factory_options == nullptr) { + ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( + env, std::string("Null table format options supplied to " + "readNewTableFormatConfig() ") + + table_factory->Name()); + return nullptr; + } + return ROCKSDB_NAMESPACE::BlockBasedTableOptionsJni::construct( + env, table_factory_options); + } else { + ROCKSDB_NAMESPACE::IllegalArgumentExceptionJni::ThrowNew( + env, std::string("readNewTableFormatConfig() is not implemented for " + "this table format: ") + + table_factory->Name()); + return nullptr; + } +} diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index aed3dc593..eba181203 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -208,6 +208,18 @@ class IllegalArgumentExceptionJni return JavaException::ThrowNew(env, s.ToString()); } + + /** + * Create and throw a Java IllegalArgumentException with the provided message + * + * @param env A pointer to the Java environment + * @param msg The message for the exception + * + * @return true if an exception was thrown, false otherwise + */ + static bool ThrowNew(JNIEnv* env, const std::string& msg) { + return JavaException::ThrowNew(env, msg); + } }; // The portal class for org.rocksdb.Status.Code @@ -3562,13 +3574,20 @@ class IteratorJni } }; -// The portal class for org.rocksdb.Filter -class FilterJni +// The portal class for org.rocksdb.FilterPolicy + +enum FilterPolicyTypeJni { + kUnknownFilterPolicy = 0x00, + kBloomFilterPolicy = 0x01, + kRibbonFilterPolicy = 0x02, +}; +class FilterPolicyJni : public RocksDBNativeClass< - std::shared_ptr*, FilterJni> { + std::shared_ptr*, FilterPolicyJni> { + private: public: /** - * Get the Java Class org.rocksdb.Filter + * Get the Java Class org.rocksdb.FilterPolicy * * @param env A pointer to the Java environment * @@ -3577,7 +3596,19 @@ class FilterJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Filter"); + return RocksDBNativeClass::getJClass(env, "org/rocksdb/FilterPolicy"); + } + + static jbyte toJavaIndexType(const FilterPolicyTypeJni& filter_policy_type) { + return static_cast(filter_policy_type); + } + + static FilterPolicyTypeJni getFilterPolicyType( + const std::string& policy_class_name) { + if (policy_class_name == "rocksdb.BuiltinBloomFilter") { + return kBloomFilterPolicy; + } + return kUnknownFilterPolicy; } }; @@ -6720,7 +6751,7 @@ class ChecksumTypeJni { return ROCKSDB_NAMESPACE::ChecksumType::kXXH3; default: // undefined/default - return ROCKSDB_NAMESPACE::ChecksumType::kCRC32c; + return ROCKSDB_NAMESPACE::ChecksumType::kXXH3; } } }; @@ -8797,5 +8828,101 @@ class CompactRangeOptionsTimestampJni : public JavaClass { } }; +// The portal class for org.rocksdb.BlockBasedTableOptions +class BlockBasedTableOptionsJni + : public RocksDBNativeClass { + public: + /** + * Get the Java Class org.rocksdb.BlockBasedTableConfig + * + * @param env A pointer to the Java environment + * + * @return The Java Class or nullptr if one of the + * ClassFormatError, ClassCircularityError, NoClassDefFoundError, + * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown + */ + static jclass getJClass(JNIEnv* env) { + return RocksDBNativeClass::getJClass(env, + "org/rocksdb/BlockBasedTableConfig"); + } + + /** + * Create a new Java org.rocksdb.BlockBasedTableConfig object with the + * properties as the provided C++ ROCKSDB_NAMESPACE::BlockBasedTableOptions + * object + * + * @param env A pointer to the Java environment + * @param cfoptions A pointer to ROCKSDB_NAMESPACE::ColumnFamilyOptions object + * + * @return A reference to a Java org.rocksdb.ColumnFamilyOptions object, or + * nullptr if an an exception occurs + */ + static jobject construct( + JNIEnv* env, const BlockBasedTableOptions* table_factory_options) { + jclass jclazz = getJClass(env); + if (jclazz == nullptr) { + // exception occurred accessing class + return nullptr; + } + + jmethodID method_id_init = + env->GetMethodID(jclazz, "", "(ZZZZBBDBZJIIIJZZZZZIIZZBBJD)V"); + if (method_id_init == nullptr) { + // exception thrown: NoSuchMethodException or OutOfMemoryError + return nullptr; + } + + FilterPolicyTypeJni filter_policy_type = + FilterPolicyTypeJni::kUnknownFilterPolicy; + jlong filter_policy_handle = 0L; + jdouble filter_policy_config_value = 0.0; + if (table_factory_options->filter_policy) { + auto filter_policy = table_factory_options->filter_policy.get(); + filter_policy_type = FilterPolicyJni::getFilterPolicyType( + filter_policy->CompatibilityName()); + if (FilterPolicyTypeJni::kUnknownFilterPolicy != filter_policy_type) { + filter_policy_handle = GET_CPLUSPLUS_POINTER(filter_policy); + } + } + + jobject jcfd = env->NewObject( + jclazz, method_id_init, + table_factory_options->cache_index_and_filter_blocks, + table_factory_options->cache_index_and_filter_blocks_with_high_priority, + table_factory_options->pin_l0_filter_and_index_blocks_in_cache, + table_factory_options->pin_top_level_index_and_filter, + IndexTypeJni::toJavaIndexType(table_factory_options->index_type), + DataBlockIndexTypeJni::toJavaDataBlockIndexType( + table_factory_options->data_block_index_type), + table_factory_options->data_block_hash_table_util_ratio, + ChecksumTypeJni::toJavaChecksumType(table_factory_options->checksum), + table_factory_options->no_block_cache, + static_cast(table_factory_options->block_size), + table_factory_options->block_size_deviation, + table_factory_options->block_restart_interval, + table_factory_options->index_block_restart_interval, + static_cast(table_factory_options->metadata_block_size), + table_factory_options->partition_filters, + table_factory_options->optimize_filters_for_memory, + table_factory_options->use_delta_encoding, + table_factory_options->whole_key_filtering, + table_factory_options->verify_compression, + table_factory_options->read_amp_bytes_per_bit, + table_factory_options->format_version, + table_factory_options->enable_index_compression, + table_factory_options->block_align, + IndexShorteningModeJni::toJavaIndexShorteningMode( + table_factory_options->index_shortening), + FilterPolicyJni::toJavaIndexType(filter_policy_type), + filter_policy_handle, filter_policy_config_value); + if (env->ExceptionCheck()) { + return nullptr; + } + + return jcfd; + } +}; + } // namespace ROCKSDB_NAMESPACE #endif // JAVA_ROCKSJNI_PORTAL_H_ diff --git a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java index 70dee3dd9..ea9a766ad 100644 --- a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java @@ -5,7 +5,7 @@ package org.rocksdb; /** - * The config for plain table sst format. + * The config for block based table sst format. *

* BlockBasedTable is a RocksDB's default SST file format. */ @@ -21,7 +21,7 @@ public BlockBasedTableConfig() { indexType = IndexType.kBinarySearch; dataBlockIndexType = DataBlockIndexType.kDataBlockBinarySearch; dataBlockHashTableUtilRatio = 0.75; - checksumType = ChecksumType.kCRC32c; + checksumType = ChecksumType.kXXH3; noBlockCache = false; blockCache = null; persistentCache = null; @@ -47,6 +47,54 @@ public BlockBasedTableConfig() { blockCacheNumShardBits = 0; } + /** + * Constructor for use by C++ via JNI + */ + private BlockBasedTableConfig(final boolean cacheIndexAndFilterBlocks, + final boolean cacheIndexAndFilterBlocksWithHighPriority, + final boolean pinL0FilterAndIndexBlocksInCache, final boolean pinTopLevelIndexAndFilter, + final byte indexType, final byte dataBlockIndexType, final double dataBlockHashTableUtilRatio, + final byte checksumType, final boolean noBlockCache, final long blockSize, + final int blockSizeDeviation, final int blockRestartInterval, + final int indexBlockRestartInterval, final long metadataBlockSize, + final boolean partitionFilters, final boolean optimizeFiltersForMemory, + final boolean useDeltaEncoding, final boolean wholeKeyFiltering, + final boolean verifyCompression, final int readAmpBytesPerBit, final int formatVersion, + final boolean enableIndexCompression, final boolean blockAlign, final byte indexShortening, + final byte filterPolicyType, final long filterPolicyHandle, + final double filterPolicyConfigValue) { + this.cacheIndexAndFilterBlocks = cacheIndexAndFilterBlocks; + this.cacheIndexAndFilterBlocksWithHighPriority = cacheIndexAndFilterBlocksWithHighPriority; + this.pinL0FilterAndIndexBlocksInCache = pinL0FilterAndIndexBlocksInCache; + this.pinTopLevelIndexAndFilter = pinTopLevelIndexAndFilter; + this.indexType = IndexType.values()[indexType]; + this.dataBlockIndexType = DataBlockIndexType.values()[dataBlockIndexType]; + this.dataBlockHashTableUtilRatio = dataBlockHashTableUtilRatio; + this.checksumType = ChecksumType.values()[checksumType]; + this.noBlockCache = noBlockCache; + this.blockSize = blockSize; + this.blockSizeDeviation = blockSizeDeviation; + this.blockRestartInterval = blockRestartInterval; + this.indexBlockRestartInterval = indexBlockRestartInterval; + this.metadataBlockSize = metadataBlockSize; + this.partitionFilters = partitionFilters; + this.optimizeFiltersForMemory = optimizeFiltersForMemory; + this.useDeltaEncoding = useDeltaEncoding; + this.wholeKeyFiltering = wholeKeyFiltering; + this.verifyCompression = verifyCompression; + this.readAmpBytesPerBit = readAmpBytesPerBit; + this.formatVersion = formatVersion; + this.enableIndexCompression = enableIndexCompression; + this.blockAlign = blockAlign; + this.indexShortening = IndexShorteningMode.values()[indexShortening]; + Filter filterPolicy = FilterPolicyType.values()[filterPolicyType].createFilter( + filterPolicyHandle, filterPolicyConfigValue); + if (filterPolicy != null) { + filterPolicy.disOwnNativeHandle(); + this.setFilterPolicy(filterPolicy); + } + } + /** * Indicating if we'd put index/filter blocks to the block cache. * If not specified, each "table reader" object will pre-load index/filter diff --git a/java/src/main/java/org/rocksdb/BloomFilter.java b/java/src/main/java/org/rocksdb/BloomFilter.java index 0b4e93229..0d1a5ad3b 100644 --- a/java/src/main/java/org/rocksdb/BloomFilter.java +++ b/java/src/main/java/org/rocksdb/BloomFilter.java @@ -5,6 +5,8 @@ package org.rocksdb; +import java.util.Objects; + /** * Bloom filter policy that uses a bloom filter with approximately * the specified number of bits per key. @@ -33,6 +35,9 @@ public BloomFilter() { this(DEFAULT_BITS_PER_KEY); } + // record this for comparison of filters. + private final double bitsPerKey; + /** * BloomFilter constructor * @@ -47,7 +52,17 @@ public BloomFilter() { * @param bitsPerKey number of bits to use */ public BloomFilter(final double bitsPerKey) { - super(createNewBloomFilter(bitsPerKey)); + this(createNewBloomFilter(bitsPerKey), bitsPerKey); + } + + /** + * + * @param nativeHandle handle to existing bloom filter at RocksDB C++ side + * @param bitsPerKey number of bits to use - recorded for comparison + */ + BloomFilter(final long nativeHandle, final double bitsPerKey) { + super(nativeHandle); + this.bitsPerKey = bitsPerKey; } /** @@ -69,5 +84,20 @@ public BloomFilter(final double bitsPerKey, final boolean IGNORED_useBlockBasedM this(bitsPerKey); } + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + BloomFilter that = (BloomFilter) o; + return bitsPerKey == that.bitsPerKey; + } + + @Override + public int hashCode() { + return Objects.hash(bitsPerKey); + } + private static native long createNewBloomFilter(final double bitsKeyKey); } diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index aca72e06e..65a2dc5d6 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -599,6 +599,10 @@ public ColumnFamilyOptions setTableFormatConfig( return this; } + void setFetchedTableFormatConfig(final TableFormatConfig tableFormatConfig) { + this.tableFormatConfig_ = tableFormatConfig; + } + @Override public String tableFactoryName() { assert(isOwningHandle()); @@ -1512,7 +1516,6 @@ private static native void setCompactionThreadLimiter( final long nativeHandle_, final long compactionThreadLimiterHandle); private native void setMemtableMaxRangeDeletions(final long handle, final int count); private native int memtableMaxRangeDeletions(final long handle); - private native void setEnableBlobFiles(final long nativeHandle_, final boolean enableBlobFiles); private native boolean enableBlobFiles(final long nativeHandle_); private native void setMinBlobSize(final long nativeHandle_, final long minBlobSize); diff --git a/java/src/main/java/org/rocksdb/FilterPolicyType.java b/java/src/main/java/org/rocksdb/FilterPolicyType.java new file mode 100644 index 000000000..6a693ee40 --- /dev/null +++ b/java/src/main/java/org/rocksdb/FilterPolicyType.java @@ -0,0 +1,49 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * IndexType used in conjunction with BlockBasedTable. + */ +public enum FilterPolicyType { + kUnknownFilterPolicy((byte) 0), + + /** + * This is a user-facing policy that automatically choose between + * LegacyBloom and FastLocalBloom based on context at build time, + * including compatibility with format_version. + */ + kBloomFilterPolicy((byte) 1), + + /** + * This is a user-facing policy that chooses between Standard128Ribbon + * and FastLocalBloom based on context at build time (LSM level and other + * factors in extreme cases). + */ + kRibbonFilterPolicy((byte) 2); + + public Filter createFilter(final long handle, final double param) { + if (this == kBloomFilterPolicy) { + return new BloomFilter(handle, param); + } + return null; + } + + /** + * Returns the byte value of the enumerations value + * + * @return byte representation + */ + public byte getValue() { + return value_; + } + + FilterPolicyType(byte value) { + value_ = value; + } + + private final byte value_; +} diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index 2d735e5c3..7ba0f8b93 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -2540,7 +2540,6 @@ private static native void setMaxBgErrorResumeCount( private static native void setBgerrorResumeRetryInterval( final long handle, final long bgerrorResumeRetryInterval); private static native long bgerrorResumeRetryInterval(final long handle); - private native void setEnableBlobFiles(final long nativeHandle_, final boolean enableBlobFiles); private native boolean enableBlobFiles(final long nativeHandle_); private native void setMinBlobSize(final long nativeHandle_, final long minBlobSize); diff --git a/java/src/main/java/org/rocksdb/OptionsUtil.java b/java/src/main/java/org/rocksdb/OptionsUtil.java index 612023d8e..e242cae9f 100644 --- a/java/src/main/java/org/rocksdb/OptionsUtil.java +++ b/java/src/main/java/org/rocksdb/OptionsUtil.java @@ -47,6 +47,7 @@ public static void loadLatestOptions(final ConfigOptions configOptions, final St final DBOptions dbOptions, final List cfDescs) throws RocksDBException { loadLatestOptions(configOptions.nativeHandle_, dbPath, dbOptions.nativeHandle_, cfDescs); + loadTableFormatConfig(cfDescs); } /** @@ -68,6 +69,7 @@ public static void loadOptionsFromFile(final ConfigOptions configOptions, final List cfDescs) throws RocksDBException { loadOptionsFromFile( configOptions.nativeHandle_, optionsFileName, dbOptions.nativeHandle_, cfDescs); + loadTableFormatConfig(cfDescs); } /** @@ -85,6 +87,14 @@ public static String getLatestOptionsFileName(final String dbPath, final Env env return getLatestOptionsFileName(dbPath, env.nativeHandle_); } + private static void loadTableFormatConfig(final List cfDescs) { + for (final ColumnFamilyDescriptor columnFamilyDescriptor : cfDescs) { + final ColumnFamilyOptions columnFamilyOptions = columnFamilyDescriptor.getOptions(); + columnFamilyOptions.setFetchedTableFormatConfig( + readTableFormatConfig(columnFamilyOptions.nativeHandle_)); + } + } + /** * Private constructor. * This class has only static methods and shouldn't be instantiated. @@ -98,4 +108,6 @@ private static native void loadOptionsFromFile(long cfgHandle, String optionsFil long dbOptionsHandle, List cfDescs) throws RocksDBException; private static native String getLatestOptionsFileName(String dbPath, long envHandle) throws RocksDBException; + + private native static TableFormatConfig readTableFormatConfig(final long nativeHandle_); } diff --git a/java/src/test/java/org/rocksdb/OptionsUtilTest.java b/java/src/test/java/org/rocksdb/OptionsUtilTest.java index c2975eadc..23949ac06 100644 --- a/java/src/test/java/org/rocksdb/OptionsUtilTest.java +++ b/java/src/test/java/org/rocksdb/OptionsUtilTest.java @@ -20,16 +20,146 @@ public class OptionsUtilTest { @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); - enum TestAPI { LOAD_LATEST_OPTIONS, LOAD_OPTIONS_FROM_FILE } - @Test public void loadLatestOptions() throws RocksDBException { - verifyOptions(TestAPI.LOAD_LATEST_OPTIONS); + verifyOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + try (final ConfigOptions configOptions = new ConfigOptions() + .setIgnoreUnknownOptions(false) + .setInputStringsEscaped(true) + .setEnv(Env.getDefault())) { + final List cfDescs = new ArrayList<>(); + OptionsUtil.loadLatestOptions(configOptions, dbPath, dbOptions, cfDescs); + return cfDescs; + } + } + }); } @Test public void loadOptionsFromFile() throws RocksDBException { - verifyOptions(TestAPI.LOAD_OPTIONS_FROM_FILE); + verifyOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + try (final ConfigOptions configOptions = new ConfigOptions() + .setIgnoreUnknownOptions(false) + .setInputStringsEscaped(true) + .setEnv(Env.getDefault())) { + final List cfDescs = new ArrayList<>(); + final String path = + dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); + OptionsUtil.loadOptionsFromFile(configOptions, path, dbOptions, cfDescs); + return cfDescs; + } + } + }); + } + + @Test + public void loadLatestTableFormatOptions() throws RocksDBException { + verifyTableFormatOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + try (final ConfigOptions configOptions = new ConfigOptions() + .setIgnoreUnknownOptions(false) + .setInputStringsEscaped(true) + .setEnv(Env.getDefault())) { + final List cfDescs = new ArrayList<>(); + OptionsUtil.loadLatestOptions(configOptions, dbPath, dbOptions, cfDescs); + return cfDescs; + } + } + }); + } + + @Test + public void loadLatestTableFormatOptions2() throws RocksDBException { + verifyTableFormatOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + try (final ConfigOptions configOptions = new ConfigOptions() + .setIgnoreUnknownOptions(false) + .setInputStringsEscaped(true) + .setEnv(Env.getDefault())) { + final List cfDescs = new ArrayList<>(); + OptionsUtil.loadLatestOptions(configOptions, dbPath, dbOptions, cfDescs); + return cfDescs; + } + } + }); + } + + @Test + public void loadLatestTableFormatOptions3() throws RocksDBException { + verifyTableFormatOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + final List cfDescs = new ArrayList<>(); + OptionsUtil.loadLatestOptions(new ConfigOptions(), dbPath, dbOptions, cfDescs); + return cfDescs; + } + }); + } + + @Test + public void loadTableFormatOptionsFromFile() throws RocksDBException { + verifyTableFormatOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + try (final ConfigOptions configOptions = new ConfigOptions() + .setIgnoreUnknownOptions(false) + .setInputStringsEscaped(true) + .setEnv(Env.getDefault())) { + final List cfDescs = new ArrayList<>(); + final String path = + dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); + OptionsUtil.loadOptionsFromFile(configOptions, path, dbOptions, cfDescs); + return cfDescs; + } + } + }); + } + + @Test + public void loadTableFormatOptionsFromFile2() throws RocksDBException { + verifyTableFormatOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + try (final ConfigOptions configOptions = new ConfigOptions() + .setIgnoreUnknownOptions(false) + .setInputStringsEscaped(true) + .setEnv(Env.getDefault())) { + final List cfDescs = new ArrayList<>(); + final String path = + dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); + OptionsUtil.loadOptionsFromFile(configOptions, path, dbOptions, cfDescs); + return cfDescs; + } + } + }); + } + + @Test + public void loadTableFormatOptionsFromFile3() throws RocksDBException { + verifyTableFormatOptions(new LoaderUnderTest() { + @Override + List loadOptions(final String dbPath, final DBOptions dbOptions) + throws RocksDBException { + final List cfDescs = new ArrayList<>(); + final String path = + dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); + OptionsUtil.loadOptionsFromFile(new ConfigOptions(), path, dbOptions, cfDescs); + return cfDescs; + } + }); } @Test @@ -46,7 +176,12 @@ public void getLatestOptionsFileName() throws RocksDBException { // System.out.println("latest options fileName: " + fName); } - private void verifyOptions(final TestAPI apiType) throws RocksDBException { + static abstract class LoaderUnderTest { + abstract List loadOptions(final String path, final DBOptions dbOptions) + throws RocksDBException; + } + + private void verifyOptions(final LoaderUnderTest loaderUnderTest) throws RocksDBException { final String dbPath = dbFolder.getRoot().getAbsolutePath(); final Options options = new Options() .setCreateIfMissing(true) @@ -76,18 +211,113 @@ private void verifyOptions(final TestAPI apiType) throws RocksDBException { } // Read the options back and verify - final DBOptions dbOptions = new DBOptions(); - final ConfigOptions configOptions = - new ConfigOptions().setIgnoreUnknownOptions(false).setInputStringsEscaped(true).setEnv( - Env.getDefault()); - final List cfDescs = new ArrayList<>(); - String path = dbPath; - if (apiType == TestAPI.LOAD_LATEST_OPTIONS) { - OptionsUtil.loadLatestOptions(configOptions, path, dbOptions, cfDescs); - } else if (apiType == TestAPI.LOAD_OPTIONS_FROM_FILE) { - path = dbPath + "/" + OptionsUtil.getLatestOptionsFileName(dbPath, Env.getDefault()); - OptionsUtil.loadOptionsFromFile(configOptions, path, dbOptions, cfDescs); + try (DBOptions dbOptions = new DBOptions()) { + final List cfDescs = loaderUnderTest.loadOptions(dbPath, dbOptions); + + assertThat(dbOptions.createIfMissing()).isEqualTo(options.createIfMissing()); + assertThat(dbOptions.paranoidChecks()).isEqualTo(options.paranoidChecks()); + assertThat(dbOptions.maxOpenFiles()).isEqualTo(options.maxOpenFiles()); + assertThat(dbOptions.delayedWriteRate()).isEqualTo(options.delayedWriteRate()); + + assertThat(cfDescs.size()).isEqualTo(2); + assertThat(cfDescs.get(0)).isNotNull(); + assertThat(cfDescs.get(1)).isNotNull(); + assertThat(cfDescs.get(0).getName()).isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); + assertThat(cfDescs.get(1).getName()).isEqualTo(secondCFName); + + final ColumnFamilyOptions defaultCFOpts = cfDescs.get(0).getOptions(); + assertThat(defaultCFOpts.writeBufferSize()).isEqualTo(baseDefaultCFOpts.writeBufferSize()); + assertThat(defaultCFOpts.maxWriteBufferNumber()) + .isEqualTo(baseDefaultCFOpts.maxWriteBufferNumber()); + assertThat(defaultCFOpts.maxBytesForLevelBase()) + .isEqualTo(baseDefaultCFOpts.maxBytesForLevelBase()); + assertThat(defaultCFOpts.level0FileNumCompactionTrigger()) + .isEqualTo(baseDefaultCFOpts.level0FileNumCompactionTrigger()); + assertThat(defaultCFOpts.level0SlowdownWritesTrigger()) + .isEqualTo(baseDefaultCFOpts.level0SlowdownWritesTrigger()); + assertThat(defaultCFOpts.bottommostCompressionType()) + .isEqualTo(baseDefaultCFOpts.bottommostCompressionType()); + + final ColumnFamilyOptions secondCFOpts = cfDescs.get(1).getOptions(); + assertThat(secondCFOpts.writeBufferSize()).isEqualTo(baseSecondCFOpts.writeBufferSize()); + assertThat(secondCFOpts.maxWriteBufferNumber()) + .isEqualTo(baseSecondCFOpts.maxWriteBufferNumber()); + assertThat(secondCFOpts.maxBytesForLevelBase()) + .isEqualTo(baseSecondCFOpts.maxBytesForLevelBase()); + assertThat(secondCFOpts.level0FileNumCompactionTrigger()) + .isEqualTo(baseSecondCFOpts.level0FileNumCompactionTrigger()); + assertThat(secondCFOpts.level0SlowdownWritesTrigger()) + .isEqualTo(baseSecondCFOpts.level0SlowdownWritesTrigger()); + assertThat(secondCFOpts.bottommostCompressionType()) + .isEqualTo(baseSecondCFOpts.bottommostCompressionType()); } + } + + private void verifyTableFormatOptions(final LoaderUnderTest loaderUnderTest) + throws RocksDBException { + final String dbPath = dbFolder.getRoot().getAbsolutePath(); + final Options options = new Options() + .setCreateIfMissing(true) + .setParanoidChecks(false) + .setMaxOpenFiles(478) + .setDelayedWriteRate(1234567L); + final ColumnFamilyOptions defaultCFOptions = new ColumnFamilyOptions(); + defaultCFOptions.setTableFormatConfig(new BlockBasedTableConfig()); + final byte[] altCFName = "alt_cf".getBytes(); + final ColumnFamilyOptions altCFOptions = + new ColumnFamilyOptions() + .setWriteBufferSize(70 * 1024) + .setMaxWriteBufferNumber(7) + .setMaxBytesForLevelBase(53 * 1024 * 1024) + .setLevel0FileNumCompactionTrigger(3) + .setLevel0SlowdownWritesTrigger(51) + .setBottommostCompressionType(CompressionType.ZSTD_COMPRESSION); + + final BlockBasedTableConfig altCFTableConfig = new BlockBasedTableConfig(); + altCFTableConfig.setCacheIndexAndFilterBlocks(true); + altCFTableConfig.setCacheIndexAndFilterBlocksWithHighPriority(false); + altCFTableConfig.setPinL0FilterAndIndexBlocksInCache(true); + altCFTableConfig.setPinTopLevelIndexAndFilter(false); + altCFTableConfig.setIndexType(IndexType.kTwoLevelIndexSearch); + altCFTableConfig.setDataBlockIndexType(DataBlockIndexType.kDataBlockBinaryAndHash); + altCFTableConfig.setDataBlockHashTableUtilRatio(0.65); + altCFTableConfig.setChecksumType(ChecksumType.kxxHash64); + altCFTableConfig.setNoBlockCache(true); + altCFTableConfig.setBlockSize(35 * 1024); + altCFTableConfig.setBlockSizeDeviation(20); + altCFTableConfig.setBlockRestartInterval(12); + altCFTableConfig.setIndexBlockRestartInterval(6); + altCFTableConfig.setMetadataBlockSize(12 * 1024); + altCFTableConfig.setPartitionFilters(true); + altCFTableConfig.setOptimizeFiltersForMemory(true); + altCFTableConfig.setUseDeltaEncoding(false); + altCFTableConfig.setFilterPolicy(new BloomFilter(7.5)); + altCFTableConfig.setWholeKeyFiltering(false); + altCFTableConfig.setVerifyCompression(true); + altCFTableConfig.setReadAmpBytesPerBit(2); + altCFTableConfig.setFormatVersion(8); + altCFTableConfig.setEnableIndexCompression(false); + altCFTableConfig.setBlockAlign(true); + altCFTableConfig.setIndexShortening(IndexShorteningMode.kShortenSeparatorsAndSuccessor); + altCFTableConfig.setBlockCacheSize(3 * 1024 * 1024); + // Note cache objects are not set here, as they are not read back when reading config. + + altCFOptions.setTableFormatConfig(altCFTableConfig); + + // Create a database with a new column family + try (final RocksDB db = RocksDB.open(options, dbPath)) { + assertThat(db).isNotNull(); + + // create column family + try (final ColumnFamilyHandle columnFamilyHandle = + db.createColumnFamily(new ColumnFamilyDescriptor(altCFName, altCFOptions))) { + assert (columnFamilyHandle != null); + } + } + + // Read the options back and verify + final DBOptions dbOptions = new DBOptions(); + final List cfDescs = loaderUnderTest.loadOptions(dbPath, dbOptions); assertThat(dbOptions.createIfMissing()).isEqualTo(options.createIfMissing()); assertThat(dbOptions.paranoidChecks()).isEqualTo(options.paranoidChecks()); @@ -98,32 +328,51 @@ private void verifyOptions(final TestAPI apiType) throws RocksDBException { assertThat(cfDescs.get(0)).isNotNull(); assertThat(cfDescs.get(1)).isNotNull(); assertThat(cfDescs.get(0).getName()).isEqualTo(RocksDB.DEFAULT_COLUMN_FAMILY); - assertThat(cfDescs.get(1).getName()).isEqualTo(secondCFName); - - final ColumnFamilyOptions defaultCFOpts = cfDescs.get(0).getOptions(); - assertThat(defaultCFOpts.writeBufferSize()).isEqualTo(baseDefaultCFOpts.writeBufferSize()); - assertThat(defaultCFOpts.maxWriteBufferNumber()) - .isEqualTo(baseDefaultCFOpts.maxWriteBufferNumber()); - assertThat(defaultCFOpts.maxBytesForLevelBase()) - .isEqualTo(baseDefaultCFOpts.maxBytesForLevelBase()); - assertThat(defaultCFOpts.level0FileNumCompactionTrigger()) - .isEqualTo(baseDefaultCFOpts.level0FileNumCompactionTrigger()); - assertThat(defaultCFOpts.level0SlowdownWritesTrigger()) - .isEqualTo(baseDefaultCFOpts.level0SlowdownWritesTrigger()); - assertThat(defaultCFOpts.bottommostCompressionType()) - .isEqualTo(baseDefaultCFOpts.bottommostCompressionType()); - - final ColumnFamilyOptions secondCFOpts = cfDescs.get(1).getOptions(); - assertThat(secondCFOpts.writeBufferSize()).isEqualTo(baseSecondCFOpts.writeBufferSize()); - assertThat(secondCFOpts.maxWriteBufferNumber()) - .isEqualTo(baseSecondCFOpts.maxWriteBufferNumber()); - assertThat(secondCFOpts.maxBytesForLevelBase()) - .isEqualTo(baseSecondCFOpts.maxBytesForLevelBase()); - assertThat(secondCFOpts.level0FileNumCompactionTrigger()) - .isEqualTo(baseSecondCFOpts.level0FileNumCompactionTrigger()); - assertThat(secondCFOpts.level0SlowdownWritesTrigger()) - .isEqualTo(baseSecondCFOpts.level0SlowdownWritesTrigger()); - assertThat(secondCFOpts.bottommostCompressionType()) - .isEqualTo(baseSecondCFOpts.bottommostCompressionType()); + assertThat(cfDescs.get(1).getName()).isEqualTo(altCFName); + + verifyBlockBasedTableConfig( + cfDescs.get(0).getOptions().tableFormatConfig(), new BlockBasedTableConfig()); + verifyBlockBasedTableConfig(cfDescs.get(1).getOptions().tableFormatConfig(), altCFTableConfig); + } + + private void verifyBlockBasedTableConfig( + final TableFormatConfig actualTableConfig, final BlockBasedTableConfig expected) { + assertThat(actualTableConfig).isNotNull(); + assertThat(actualTableConfig).isInstanceOf(BlockBasedTableConfig.class); + final BlockBasedTableConfig actual = (BlockBasedTableConfig) actualTableConfig; + assertThat(actual.cacheIndexAndFilterBlocks()).isEqualTo(expected.cacheIndexAndFilterBlocks()); + assertThat(actual.cacheIndexAndFilterBlocksWithHighPriority()) + .isEqualTo(expected.cacheIndexAndFilterBlocksWithHighPriority()); + assertThat(actual.pinL0FilterAndIndexBlocksInCache()) + .isEqualTo(expected.pinL0FilterAndIndexBlocksInCache()); + assertThat(actual.indexType()).isEqualTo(expected.indexType()); + assertThat(actual.dataBlockIndexType()).isEqualTo(expected.dataBlockIndexType()); + assertThat(actual.dataBlockHashTableUtilRatio()) + .isEqualTo(expected.dataBlockHashTableUtilRatio()); + assertThat(actual.checksumType()).isEqualTo(expected.checksumType()); + assertThat(actual.noBlockCache()).isEqualTo(expected.noBlockCache()); + assertThat(actual.blockSize()).isEqualTo(expected.blockSize()); + assertThat(actual.blockSizeDeviation()).isEqualTo(expected.blockSizeDeviation()); + assertThat(actual.blockRestartInterval()).isEqualTo(expected.blockRestartInterval()); + assertThat(actual.indexBlockRestartInterval()).isEqualTo(expected.indexBlockRestartInterval()); + assertThat(actual.metadataBlockSize()).isEqualTo(expected.metadataBlockSize()); + assertThat(actual.partitionFilters()).isEqualTo(expected.partitionFilters()); + assertThat(actual.optimizeFiltersForMemory()).isEqualTo(expected.optimizeFiltersForMemory()); + assertThat(actual.wholeKeyFiltering()).isEqualTo(expected.wholeKeyFiltering()); + assertThat(actual.verifyCompression()).isEqualTo(expected.verifyCompression()); + assertThat(actual.readAmpBytesPerBit()).isEqualTo(expected.readAmpBytesPerBit()); + assertThat(actual.formatVersion()).isEqualTo(expected.formatVersion()); + assertThat(actual.enableIndexCompression()).isEqualTo(expected.enableIndexCompression()); + assertThat(actual.blockAlign()).isEqualTo(expected.blockAlign()); + assertThat(actual.indexShortening()).isEqualTo(expected.indexShortening()); + if (expected.filterPolicy() == null) { + assertThat(actual.filterPolicy()).isNull(); + } else { + assertThat(expected.filterPolicy().equals(actual.filterPolicy())); + } + + // not currently persisted - always true when read from options + // this test will fail, and need repaired, if and when "useDeltaEncoding" is persisted. + assertThat(actual.useDeltaEncoding()).isEqualTo(true); } } diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 4ef5da419..25299ecab 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -225,7 +225,6 @@ static std::unordered_map block_based_table_type_info = { /* currently not supported @@ -310,6 +309,9 @@ static std::unordered_map {offsetof(struct BlockBasedTableOptions, optimize_filters_for_memory), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + // TODO "use_delta_encoding" has not been persisted - + // this may have been an omission, but changing this now might be a + // breaker {"filter_policy", OptionTypeInfo::AsCustomSharedPtr( offsetof(struct BlockBasedTableOptions, filter_policy), From d010b02e86104ec5f11d8aff2bb2977b88d1620c Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 12 Oct 2023 10:05:23 -0700 Subject: [PATCH 197/386] Fix race in options taking effect (#11929) Summary: In follow-up to https://github.com/facebook/rocksdb/issues/11922, fix a race in functions like CreateColumnFamily and SetDBOptions where the DB reports one option setting but a different one is left in effect. To fix, we can add an extra mutex around these rare operations. We don't want to hold the DB mutex during I/O or other slow things because of the many purposes it serves, but a mutex more limited to these cases should be fine. I believe this would fix a write-write race in https://github.com/facebook/rocksdb/issues/10079 but not the read-write race. Intended follow-up to this: * Should be able to remove write thread synchronization from DBImpl::WriteOptionsFile Pull Request resolved: https://github.com/facebook/rocksdb/pull/11929 Test Plan: Added two mini-stress style regression tests that fail with >1% probability before this change: DBOptionsTest::SetStatsDumpPeriodSecRace ColumnFamilyTest::CreateAndDropPeriodicRace I haven't reproduced such an inconsistency between in-memory options and on disk latest options, but this change at least improves safety and adds a test anyway: DBOptionsTest::SetStatsDumpPeriodSecRace Reviewed By: ajkr Differential Revision: D50024506 Pulled By: pdillinger fbshipit-source-id: 1e99a9ed4d96fdcf3ac5061ec6b3cee78aecdda4 --- db/column_family_test.cc | 52 +++++++++++++++++++- db/db_impl/db_impl.cc | 23 +++++++-- db/db_impl/db_impl.h | 14 +++++- db/db_impl/db_impl_debug.cc | 1 + db/db_impl/db_impl_open.cc | 9 ++-- db/db_options_test.cc | 50 +++++++++++++++++++ monitoring/instrumented_mutex.h | 2 +- port/port_example.h | 2 +- port/port_posix.cc | 2 +- port/port_posix.h | 8 +-- port/win/port_win.h | 4 +- unreleased_history/bug_fixes/options_race.md | 1 + util/distributed_mutex.h | 2 +- 13 files changed, 148 insertions(+), 22 deletions(-) create mode 100644 unreleased_history/bug_fixes/options_race.md diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 6fa4373c2..25bc0b36f 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -2473,7 +2473,10 @@ void DropSingleColumnFamily(ColumnFamilyTest* cf_test, int cf_id, } } // anonymous namespace -TEST_P(ColumnFamilyTest, CreateAndDropRace) { +// This test attempts to set up a race condition in a way that is no longer +// possible, causing the test to hang. If DBImpl::options_mutex_ is removed +// in the future, this test might become relevant again. +TEST_P(ColumnFamilyTest, DISABLED_CreateAndDropRace) { const int kCfCount = 5; std::vector cf_opts; std::vector comparators; @@ -2535,6 +2538,53 @@ TEST_P(ColumnFamilyTest, CreateAndDropRace) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); } +TEST_P(ColumnFamilyTest, CreateAndDropPeriodicRace) { + // This is a mini-stress test looking for inconsistency between the set of + // CFs in the DB, particularly whether any use preserve_internal_time_seconds, + // and whether that is accurately reflected in the periodic task setup. + constexpr size_t kNumThreads = 12; + std::vector threads; + bool last_cf_on = Random::GetTLSInstance()->OneIn(2); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::RegisterRecordSeqnoTimeWorker:BeforePeriodicTaskType", + [&](void* /*arg*/) { std::this_thread::yield(); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_EQ(column_family_options_.preserve_internal_time_seconds, 0U); + ColumnFamilyOptions other_opts = column_family_options_; + ColumnFamilyOptions last_opts = column_family_options_; + (last_cf_on ? last_opts : other_opts).preserve_internal_time_seconds = + 1000000; + Open(); + + for (size_t i = 0; i < kNumThreads; i++) { + threads.emplace_back([this, &other_opts, i]() { + ColumnFamilyHandle* cfh; + ASSERT_OK(db_->CreateColumnFamily(other_opts, std::to_string(i), &cfh)); + ASSERT_OK(db_->DropColumnFamily(cfh)); + ASSERT_OK(db_->DestroyColumnFamilyHandle(cfh)); + }); + } + + ColumnFamilyHandle* last_cfh; + ASSERT_OK(db_->CreateColumnFamily(last_opts, "last", &last_cfh)); + + for (auto& t : threads) { + t.join(); + } + + bool task_enabled = dbfull()->TEST_GetPeriodicTaskScheduler().TEST_HasTask( + PeriodicTaskType::kRecordSeqnoTime); + ASSERT_EQ(last_cf_on, task_enabled); + + ASSERT_OK(db_->DropColumnFamily(last_cfh)); + ASSERT_OK(db_->DestroyColumnFamilyHandle(last_cfh)); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); +} + TEST_P(ColumnFamilyTest, WriteStallSingleColumnFamily) { const uint64_t kBaseRate = 800000u; db_options_.delayed_write_rate = kBaseRate; diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 0b8d21790..140d334ce 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -817,6 +817,10 @@ Status DBImpl::StartPeriodicTaskScheduler() { } Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { + if (!from_db_open) { + options_mutex_.AssertHeld(); + } + uint64_t min_preserve_seconds = std::numeric_limits::max(); uint64_t max_preserve_seconds = std::numeric_limits::min(); bool mapping_was_empty = false; @@ -840,11 +844,6 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { } mapping_was_empty = seqno_to_time_mapping_.Empty(); } - // FIXME: because we released the db mutex, there's a race here where - // if e.g. I create or drop two column families in parallel, I might end up - // with the periodic task scheduler in the wrong state. We don't want to - // just keep holding the mutex, however, because of global timer and mutex - // in PeriodicTaskScheduler. uint64_t seqno_time_cadence = 0; if (min_preserve_seconds != std::numeric_limits::max()) { @@ -855,6 +854,9 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { SeqnoToTimeMapping::kMaxSeqnoTimePairsPerCF; } + TEST_SYNC_POINT_CALLBACK( + "DBImpl::RegisterRecordSeqnoTimeWorker:BeforePeriodicTaskType", nullptr); + Status s; if (seqno_time_cadence == 0) { s = periodic_task_scheduler_.Unregister(PeriodicTaskType::kRecordSeqnoTime); @@ -918,6 +920,7 @@ Status DBImpl::CancelPeriodicTaskScheduler() { // esitmate the total size of stats_history_ size_t DBImpl::EstimateInMemoryStatsHistorySize() const { + stats_history_mutex_.AssertHeld(); size_t size_total = sizeof(std::map>); if (stats_history_.size() == 0) return size_total; @@ -1208,6 +1211,7 @@ Status DBImpl::SetOptions( return Status::InvalidArgument("empty input"); } + InstrumentedMutexLock ol(&options_mutex_); MutableCFOptions new_options; Status s; Status persist_options_status; @@ -1266,6 +1270,7 @@ Status DBImpl::SetDBOptions( return Status::InvalidArgument("empty input"); } + InstrumentedMutexLock ol(&options_mutex_); MutableDBOptions new_options; Status s; Status persist_options_status = Status::OK(); @@ -3362,6 +3367,7 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options, const std::string& column_family, ColumnFamilyHandle** handle) { assert(handle != nullptr); + InstrumentedMutexLock ol(&options_mutex_); Status s = CreateColumnFamilyImpl(cf_options, column_family, handle); if (s.ok()) { s.UpdateIfOk(WrapUpCreateColumnFamilies({&cf_options})); @@ -3374,6 +3380,7 @@ Status DBImpl::CreateColumnFamilies( const std::vector& column_family_names, std::vector* handles) { assert(handles != nullptr); + InstrumentedMutexLock ol(&options_mutex_); handles->clear(); size_t num_cf = column_family_names.size(); Status s; @@ -3397,6 +3404,7 @@ Status DBImpl::CreateColumnFamilies( const std::vector& column_families, std::vector* handles) { assert(handles != nullptr); + InstrumentedMutexLock ol(&options_mutex_); handles->clear(); size_t num_cf = column_families.size(); Status s; @@ -3423,6 +3431,7 @@ Status DBImpl::CreateColumnFamilies( Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, const std::string& column_family_name, ColumnFamilyHandle** handle) { + options_mutex_.AssertHeld(); // TODO: plumb Env::IOActivity const ReadOptions read_options; Status s; @@ -3514,6 +3523,7 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { assert(column_family != nullptr); + InstrumentedMutexLock ol(&options_mutex_); Status s = DropColumnFamilyImpl(column_family); if (s.ok()) { s = WriteOptionsFile(true /*need_mutex_lock*/, @@ -3524,6 +3534,7 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { Status DBImpl::DropColumnFamilies( const std::vector& column_families) { + InstrumentedMutexLock ol(&options_mutex_); Status s; bool success_once = false; for (auto* handle : column_families) { @@ -5164,6 +5175,8 @@ Status DestroyDB(const std::string& dbname, const Options& options, Status DBImpl::WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread) { + options_mutex_.AssertHeld(); + WriteThread::Writer w; if (need_mutex_lock) { mutex_.Lock(); diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 5e7e87bb7..73f4d2e5d 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2379,9 +2379,19 @@ class DBImpl : public DB { // Lock over the persistent DB state. Non-nullptr iff successfully acquired. FileLock* db_lock_; - // In addition to mutex_, log_write_mutex_ protected writes to stats_history_ + // Guards changes to DB and CF options to ensure consistency between + // * In-memory options objects + // * Settings in effect + // * Options file contents + // while allowing the DB mutex to be released during slow operations like + // persisting options file or modifying global periodic task timer. + // Always acquired *before* DB mutex when this one is applicable. + InstrumentedMutex options_mutex_; + + // Guards reads and writes to in-memory stats_history_. InstrumentedMutex stats_history_mutex_; - // In addition to mutex_, log_write_mutex_ protected writes to logs_ and + + // In addition to mutex_, log_write_mutex_ protects writes to logs_ and // logfile_number_. With two_write_queues it also protects alive_log_files_, // and log_empty_. Refer to the definition of each variable below for more // details. diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index 670bc7887..8be960c7b 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -311,6 +311,7 @@ SeqnoToTimeMapping DBImpl::TEST_GetSeqnoToTimeMapping() const { size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const { + InstrumentedMutexLock l(&const_cast(this)->stats_history_mutex_); return EstimateInMemoryStatsHistorySize(); } } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index d48f66ae5..89a9f33e1 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -871,7 +871,8 @@ Status DBImpl::PersistentStatsProcessFormatVersion() { if (s.ok()) { ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); - s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle); + s = CreateColumnFamilyImpl(cfo, kPersistentStatsColumnFamilyName, + &handle); } if (s.ok()) { persist_stats_cf_handle_ = static_cast(handle); @@ -924,7 +925,7 @@ Status DBImpl::InitPersistStatsColumnFamily() { ColumnFamilyHandle* handle = nullptr; ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); - s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle); + s = CreateColumnFamilyImpl(cfo, kPersistentStatsColumnFamilyName, &handle); persist_stats_cf_handle_ = static_cast(handle); mutex_.Lock(); } @@ -1988,6 +1989,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath(); RecoveryContext recovery_ctx; + impl->options_mutex_.Lock(); impl->mutex_.Lock(); // Handles create_if_missing, error_if_exists @@ -2124,7 +2126,6 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, // The WriteOptionsFile() will release and lock the mutex internally. persist_options_status = impl->WriteOptionsFile( false /*need_mutex_lock*/, false /*need_enter_write_thread*/); - *dbptr = impl; impl->opened_successfully_ = true; impl->DeleteObsoleteFiles(); @@ -2245,10 +2246,10 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, if (s.ok()) { s = impl->StartPeriodicTaskScheduler(); } - if (s.ok()) { s = impl->RegisterRecordSeqnoTimeWorker(/*from_db_open=*/true); } + impl->options_mutex_.Unlock(); if (!s.ok()) { for (auto* h : *handles) { delete h; diff --git a/db/db_options_test.cc b/db/db_options_test.cc index e709dcaaa..7e77ac55e 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -19,6 +19,7 @@ #include "rocksdb/convenience.h" #include "rocksdb/rate_limiter.h" #include "rocksdb/stats_history.h" +#include "rocksdb/utilities/options_util.h" #include "test_util/mock_time_env.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" @@ -741,6 +742,55 @@ TEST_F(DBOptionsTest, SetStatsDumpPeriodSec) { Close(); } +TEST_F(DBOptionsTest, SetStatsDumpPeriodSecRace) { + // This is a mini-stress test looking for inconsistency between the reported + // state of the option and the behavior in effect for the DB, after the last + // modification to that option (indefinite inconsistency). + std::vector threads; + for (int i = 0; i < 12; i++) { + threads.emplace_back([this, i]() { + ASSERT_OK(dbfull()->SetDBOptions( + {{"stats_dump_period_sec", i % 2 ? "100" : "0"}})); + }); + } + + for (auto& t : threads) { + t.join(); + } + + bool stats_dump_set = dbfull()->GetDBOptions().stats_dump_period_sec > 0; + bool task_enabled = dbfull()->TEST_GetPeriodicTaskScheduler().TEST_HasTask( + PeriodicTaskType::kDumpStats); + + ASSERT_EQ(stats_dump_set, task_enabled); +} + +TEST_F(DBOptionsTest, SetOptionsAndFileRace) { + // This is a mini-stress test looking for inconsistency between the reported + // state of the option and what is persisted in the options file, after the + // last modification to that option (indefinite inconsistency). + std::vector threads; + for (int i = 0; i < 12; i++) { + threads.emplace_back([this, i]() { + ASSERT_OK(dbfull()->SetOptions({{"ttl", std::to_string(i * 100)}})); + }); + } + + for (auto& t : threads) { + t.join(); + } + + auto setting_in_mem = dbfull()->GetOptions().ttl; + + std::vector cf_descs; + DBOptions db_options; + ConfigOptions cfg; + cfg.env = env_; + ASSERT_OK(LoadLatestOptions(cfg, dbname_, &db_options, &cf_descs, nullptr)); + ASSERT_EQ(cf_descs.size(), 1); + ASSERT_EQ(setting_in_mem, cf_descs[0].options.ttl); +} + TEST_F(DBOptionsTest, SetOptionsStatsPersistPeriodSec) { Options options; options.create_if_missing = true; diff --git a/monitoring/instrumented_mutex.h b/monitoring/instrumented_mutex.h index b97d2502e..33e242759 100644 --- a/monitoring/instrumented_mutex.h +++ b/monitoring/instrumented_mutex.h @@ -46,7 +46,7 @@ class InstrumentedMutex { void Unlock() { mutex_.Unlock(); } - void AssertHeld() { mutex_.AssertHeld(); } + void AssertHeld() const { mutex_.AssertHeld(); } private: void LockInternal(); diff --git a/port/port_example.h b/port/port_example.h index 794149a69..2a19ffee0 100644 --- a/port/port_example.h +++ b/port/port_example.h @@ -43,7 +43,7 @@ class Mutex { // Optionally crash if this thread does not hold this mutex. // The implementation must be fast, especially if NDEBUG is // defined. The implementation is allowed to skip all checks. - void AssertHeld(); + void AssertHeld() const; }; class CondVar { diff --git a/port/port_posix.cc b/port/port_posix.cc index 3872293b8..749ad5d60 100644 --- a/port/port_posix.cc +++ b/port/port_posix.cc @@ -100,7 +100,7 @@ bool Mutex::TryLock() { return ret; } -void Mutex::AssertHeld() { +void Mutex::AssertHeld() const { #ifndef NDEBUG assert(locked_); #endif diff --git a/port/port_posix.h b/port/port_posix.h index e49818604..95641c0c5 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -109,9 +109,9 @@ class Mutex { bool TryLock(); - // this will assert if the mutex is not locked - // it does NOT verify that mutex is held by a calling thread - void AssertHeld(); + // This will fail assertion if the mutex is not locked. + // It does NOT verify that mutex is held by a calling thread. + void AssertHeld() const; // Also implement std Lockable inline void lock() { Lock(); } @@ -139,7 +139,7 @@ class RWMutex { void WriteLock(); void ReadUnlock(); void WriteUnlock(); - void AssertHeld() {} + void AssertHeld() const {} private: pthread_rwlock_t mu_; // the underlying platform mutex diff --git a/port/win/port_win.h b/port/win/port_win.h index 621f05370..4aa10d005 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -117,7 +117,7 @@ class Mutex { // this will assert if the mutex is not locked // it does NOT verify that mutex is held by a calling thread - void AssertHeld() { + void AssertHeld() const { #ifndef NDEBUG assert(locked_); #endif @@ -159,7 +159,7 @@ class RWMutex { void WriteUnlock() { ReleaseSRWLockExclusive(&srwLock_); } // Empty as in POSIX - void AssertHeld() {} + void AssertHeld() const {} private: SRWLOCK srwLock_; diff --git a/unreleased_history/bug_fixes/options_race.md b/unreleased_history/bug_fixes/options_race.md new file mode 100644 index 000000000..42b89b8ea --- /dev/null +++ b/unreleased_history/bug_fixes/options_race.md @@ -0,0 +1 @@ +* Fixed a rare race bug involving a concurrent combination of Create/DropColumnFamily and/or Set(DB)Options that could lead to inconsistency between (a) the DB's reported options state, (b) the DB options in effect, and (c) the latest persisted OPTIONS file. diff --git a/util/distributed_mutex.h b/util/distributed_mutex.h index 1734269cc..e3450d753 100644 --- a/util/distributed_mutex.h +++ b/util/distributed_mutex.h @@ -28,7 +28,7 @@ class DMutex : public folly::DistributedMutex { explicit DMutex(bool IGNORED_adaptive = false) { (void)IGNORED_adaptive; } // currently no-op - void AssertHeld() {} + void AssertHeld() const {} }; using DMutexLock = std::lock_guard; From 90e160733ef27321c2a10b64225702da0dd548db Mon Sep 17 00:00:00 2001 From: anand76 Date: Thu, 12 Oct 2023 15:09:40 -0700 Subject: [PATCH 198/386] Fix runtime error in UpdateTieredCache due to integer underflow (#11949) Summary: With the introduction of the `UpdateTieredCache` API, its possible to dynamically change the compressed secondary cache ratio of the total cache capacity. In order to optimize performance, we avoid using a mutex when inserting/releasing placeholder entries, which can result in some inaccuracy in the accounting during the dynamic update. This inaccuracy was causing a runtime error due to an integer underflow in `UpdateCacheReservationRatio`, causing ubsan crash tests to fail. This PR fixes it by explicitly checking for the underflow. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11949 Test Plan: 1. Added a unit test that fails without the fix 2. Run ubsan_crash Reviewed By: akankshamahajan15 Differential Revision: D50240217 Pulled By: anand1976 fbshipit-source-id: d2f7b79da54eec8b61aec2cc1f2943da5d5847ac --- cache/compressed_secondary_cache_test.cc | 21 ++++++++ cache/secondary_cache_adapter.cc | 50 +++++++++++++++---- cache/secondary_cache_adapter.h | 2 +- .../sec_cache_reservation_underflow.md | 1 + 4 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 unreleased_history/bug_fixes/sec_cache_reservation_underflow.md diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index d82be1073..71702b29f 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -1312,6 +1312,27 @@ TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) { ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(0)); } +TEST_P(CompressedSecCacheTestWithTiered, + DynamicUpdateWithReservationUnderflow) { + std::shared_ptr tiered_cache = GetTieredCache(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"CacheWithSecondaryAdapter::Release:ChargeSecCache1", + "CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"}, + {"CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End", + "CacheWithSecondaryAdapter::Release:ChargeSecCache2"}}); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + port::Thread reserve_release_thread([&]() { + EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(50), Status::OK()); + EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(0), Status::OK()); + }); + ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.01)); + reserve_release_thread.join(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + + ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.3)); +} + INSTANTIATE_TEST_CASE_P( CompressedSecCacheTests, CompressedSecCacheTestWithTiered, ::testing::Values( diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index b378197cc..70571f5b2 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -5,8 +5,11 @@ #include "cache/secondary_cache_adapter.h" +#include + #include "cache/tiered_secondary_cache.h" #include "monitoring/perf_context_imp.h" +#include "test_util/sync_point.h" #include "util/cast_util.h" namespace ROCKSDB_NAMESPACE { @@ -100,7 +103,8 @@ CacheWithSecondaryAdapter::CacheWithSecondaryAdapter( // secondary cache is freed from the reservation. s = pri_cache_res_->UpdateCacheReservation(sec_capacity); assert(s.ok()); - sec_cache_res_ratio_ = (double)sec_capacity / target_->GetCapacity(); + sec_cache_res_ratio_.store((double)sec_capacity / target_->GetCapacity(), + std::memory_order_relaxed); } } @@ -233,7 +237,8 @@ Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value, CompressionType type) { Status s = target_->Insert(key, value, helper, charge, handle, priority); if (s.ok() && value == nullptr && distribute_cache_res_) { - size_t sec_charge = static_cast(charge * (sec_cache_res_ratio_)); + size_t sec_charge = static_cast( + charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed))); s = secondary_cache_->Deflate(sec_charge); assert(s.ok()); s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/false); @@ -282,7 +287,10 @@ bool CacheWithSecondaryAdapter::Release(Handle* handle, ObjectPtr v = target_->Value(handle); if (v == nullptr && distribute_cache_res_) { size_t charge = target_->GetCharge(handle); - size_t sec_charge = static_cast(charge * (sec_cache_res_ratio_)); + size_t sec_charge = static_cast( + charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed))); + TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache1"); + TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache2"); Status s = secondary_cache_->Inflate(sec_charge); assert(s.ok()); s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/true); @@ -433,7 +441,9 @@ const char* CacheWithSecondaryAdapter::Name() const { // where the new capacity < total cache reservations. void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { size_t sec_capacity = static_cast( - capacity * (distribute_cache_res_ ? sec_cache_res_ratio_ : 0.0)); + capacity * (distribute_cache_res_ + ? sec_cache_res_ratio_.load(std::memory_order_relaxed) + : 0.0)); size_t old_sec_capacity = 0; if (distribute_cache_res_) { @@ -493,7 +503,8 @@ void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { // in the future. Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( double compressed_secondary_ratio) { - if (!distribute_cache_res_ || sec_cache_res_ratio_ == 0.0) { + if (!distribute_cache_res_ || + sec_cache_res_ratio_.load(std::memory_order_relaxed) == 0.0) { return Status::NotSupported(); } @@ -507,13 +518,33 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( return s; } - size_t old_sec_reserved = - old_sec_capacity - pri_cache_res_->GetTotalMemoryUsed(); + TEST_SYNC_POINT( + "CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"); + + // There's a possible race condition here. Since the read of pri_cache_res_ + // memory used (secondary cache usage charged to primary cache), and the + // change to sec_cache_res_ratio_ are not guarded by a mutex, its possible + // that an Insert/Release in another thread might decrease/increase the + // pri_cache_res_ reservation by the wrong amount. This should not be a + // problem because updating the sec/pri ratio is a rare operation, and + // the worst that can happen is we may over/under charge the secondary + // cache usage by a little bit. But we do need to protect against + // underflow of old_sec_reserved. + // TODO: Make the accounting more accurate by tracking the total memory + // reservation on the primary cache. This will also allow us to remove + // the restriction of not being able to change the sec/pri ratio from + // 0.0 to higher. + size_t sec_charge_to_pri = pri_cache_res_->GetTotalMemoryUsed(); + size_t old_sec_reserved = (old_sec_capacity > sec_charge_to_pri) + ? (old_sec_capacity - sec_charge_to_pri) + : 0; // Calculate the new secondary cache reservation size_t sec_reserved = static_cast( old_sec_reserved * - (double)(compressed_secondary_ratio / sec_cache_res_ratio_)); - sec_cache_res_ratio_ = compressed_secondary_ratio; + (double)(compressed_secondary_ratio / + sec_cache_res_ratio_.load(std::memory_order_relaxed))); + sec_cache_res_ratio_.store(compressed_secondary_ratio, + std::memory_order_relaxed); if (sec_capacity > old_sec_capacity) { // We're increasing the ratio, thus ending up with a larger secondary // cache and a smaller usable primary cache capacity. Similar to @@ -553,6 +584,7 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( } } + TEST_SYNC_POINT("CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End"); #ifndef NDEBUG // As mentioned in the function comments, we may accumulate some erros when // the ratio is changed. We set a flag here which disables some assertions diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index 34d52a665..0d5f2d6ea 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -80,7 +80,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper { std::shared_ptr pri_cache_res_; // Fraction of a cache memory reservation to be assigned to the secondary // cache - double sec_cache_res_ratio_; + std::atomic sec_cache_res_ratio_; port::Mutex mutex_; #ifndef NDEBUG bool ratio_changed_ = false; diff --git a/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md b/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md new file mode 100644 index 000000000..571100a3e --- /dev/null +++ b/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md @@ -0,0 +1 @@ +Fixed a possible underflow when computing the compressed secondary cache share of memory reservations while updating the compressed secondary to total block cache ratio. From 648fe25bc02900266658a89987ed66fe092b7f5e Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Thu, 12 Oct 2023 15:26:10 -0700 Subject: [PATCH 199/386] Always clear files marked for compaction in `ComputeCompactionScore()` (#11946) Summary: We were seeing the following stress test failures: ```LevelCompactionBuilder::PickFileToCompact(const rocksdb::autovector >&, bool): Assertion `!level_file.second->being_compacted' failed``` This can happen when we are picking a file to be compacted from some files marked for compaction, but that file is already being_compacted. We prevent this by always calling `ComputeCompactionScore()` after we pick a compaction and mark some files as being_compacted. However, if SetOptions() is called to disable marking certain files to be compacted, say `enable_blob_garbage_collection`, we currently just skip the relevant logic in `ComputeCompactionScore()` without clearing the existing files already marked for compaction. This PR fixes this issue by already clearing these files. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11946 Test Plan: existing tests. Reviewed By: akankshamahajan15 Differential Revision: D50232608 Pulled By: cbi42 fbshipit-source-id: 11e4fb5e9d48b0f946ad33b18f7c005f0161f496 --- db/db_impl/db_impl.cc | 3 ++- db/version_set.cc | 59 ++++++++++++++++++++++-------------------- db/version_set.h | 8 +++--- db/version_set_test.cc | 33 +++++++++++++++-------- 4 files changed, 60 insertions(+), 43 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 140d334ce..3ae70b209 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -4114,7 +4114,8 @@ void DBImpl::ReleaseSnapshot(const Snapshot* s) { CfdList cf_scheduled; for (auto* cfd : *versions_->GetColumnFamilySet()) { if (!cfd->ioptions()->allow_ingest_behind) { - cfd->current()->storage_info()->UpdateOldestSnapshot(oldest_snapshot); + cfd->current()->storage_info()->UpdateOldestSnapshot( + oldest_snapshot, /*allow_ingest_behind=*/false); if (!cfd->current() ->storage_info() ->BottommostFilesMarkedForCompaction() diff --git a/db/version_set.cc b/db/version_set.cc index 41e90e13d..95df9fb8d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3582,26 +3582,16 @@ void VersionStorageInfo::ComputeCompactionScore( } } ComputeFilesMarkedForCompaction(max_output_level); - if (!immutable_options.allow_ingest_behind) { - ComputeBottommostFilesMarkedForCompaction(); - } - if (mutable_cf_options.ttl > 0 && - compaction_style_ == kCompactionStyleLevel) { - ComputeExpiredTtlFiles(immutable_options, mutable_cf_options.ttl); - } - if (mutable_cf_options.periodic_compaction_seconds > 0) { - ComputeFilesMarkedForPeriodicCompaction( - immutable_options, mutable_cf_options.periodic_compaction_seconds, - max_output_level); - } - - if (mutable_cf_options.enable_blob_garbage_collection && - mutable_cf_options.blob_garbage_collection_age_cutoff > 0.0 && - mutable_cf_options.blob_garbage_collection_force_threshold < 1.0) { - ComputeFilesMarkedForForcedBlobGC( - mutable_cf_options.blob_garbage_collection_age_cutoff, - mutable_cf_options.blob_garbage_collection_force_threshold); - } + ComputeBottommostFilesMarkedForCompaction( + immutable_options.allow_ingest_behind); + ComputeExpiredTtlFiles(immutable_options, mutable_cf_options.ttl); + ComputeFilesMarkedForPeriodicCompaction( + immutable_options, mutable_cf_options.periodic_compaction_seconds, + max_output_level); + ComputeFilesMarkedForForcedBlobGC( + mutable_cf_options.blob_garbage_collection_age_cutoff, + mutable_cf_options.blob_garbage_collection_force_threshold, + mutable_cf_options.enable_blob_garbage_collection); EstimateCompactionBytesNeeded(mutable_cf_options); } @@ -3631,9 +3621,10 @@ void VersionStorageInfo::ComputeFilesMarkedForCompaction(int last_level) { void VersionStorageInfo::ComputeExpiredTtlFiles( const ImmutableOptions& ioptions, const uint64_t ttl) { - assert(ttl > 0); - expired_ttl_files_.clear(); + if (ttl == 0 || compaction_style_ != CompactionStyle::kCompactionStyleLevel) { + return; + } int64_t _current_time; auto status = ioptions.clock->GetCurrentTime(&_current_time); @@ -3658,9 +3649,10 @@ void VersionStorageInfo::ComputeExpiredTtlFiles( void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction( const ImmutableOptions& ioptions, const uint64_t periodic_compaction_seconds, int last_level) { - assert(periodic_compaction_seconds > 0); - files_marked_for_periodic_compaction_.clear(); + if (periodic_compaction_seconds == 0) { + return; + } int64_t temp_current_time; auto status = ioptions.clock->GetCurrentTime(&temp_current_time); @@ -3714,8 +3706,14 @@ void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction( void VersionStorageInfo::ComputeFilesMarkedForForcedBlobGC( double blob_garbage_collection_age_cutoff, - double blob_garbage_collection_force_threshold) { + double blob_garbage_collection_force_threshold, + bool enable_blob_garbage_collection) { files_marked_for_forced_blob_gc_.clear(); + if (!(enable_blob_garbage_collection && + blob_garbage_collection_age_cutoff > 0.0 && + blob_garbage_collection_force_threshold < 1.0)) { + return; + } if (blob_files_.empty()) { return; @@ -4172,17 +4170,22 @@ void VersionStorageInfo::GenerateFileLocationIndex() { } } -void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum) { +void VersionStorageInfo::UpdateOldestSnapshot(SequenceNumber seqnum, + bool allow_ingest_behind) { assert(seqnum >= oldest_snapshot_seqnum_); oldest_snapshot_seqnum_ = seqnum; if (oldest_snapshot_seqnum_ > bottommost_files_mark_threshold_) { - ComputeBottommostFilesMarkedForCompaction(); + ComputeBottommostFilesMarkedForCompaction(allow_ingest_behind); } } -void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction() { +void VersionStorageInfo::ComputeBottommostFilesMarkedForCompaction( + bool allow_ingest_behind) { bottommost_files_marked_for_compaction_.clear(); bottommost_files_mark_threshold_ = kMaxSequenceNumber; + if (allow_ingest_behind) { + return; + } // If a file's creation time is larger than creation_time_ub, // it is too new to be marked for compaction. int64_t creation_time_ub = 0; diff --git a/db/version_set.h b/db/version_set.h index 6774cfcd1..bfc63e351 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -228,7 +228,7 @@ class VersionStorageInfo { // eligible for compaction. // // REQUIRES: DB mutex held - void ComputeBottommostFilesMarkedForCompaction(); + void ComputeBottommostFilesMarkedForCompaction(bool allow_ingest_behind); // This computes files_marked_for_forced_blob_gc_ and is called by // ComputeCompactionScore() @@ -236,14 +236,16 @@ class VersionStorageInfo { // REQUIRES: DB mutex held void ComputeFilesMarkedForForcedBlobGC( double blob_garbage_collection_age_cutoff, - double blob_garbage_collection_force_threshold); + double blob_garbage_collection_force_threshold, + bool enable_blob_garbage_collection); bool level0_non_overlapping() const { return level0_non_overlapping_; } // Updates the oldest snapshot and related internal state, like the bottommost // files marked for compaction. // REQUIRES: DB mutex held - void UpdateOldestSnapshot(SequenceNumber oldest_snapshot_seqnum); + void UpdateOldestSnapshot(SequenceNumber oldest_snapshot_seqnum, + bool allow_ingest_behind); int MaxInputLevel() const; int MaxOutputLevel(bool allow_ingest_behind) const; diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 59b946151..2526e752f 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -584,7 +584,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCEmpty) { constexpr double age_cutoff = 0.5; constexpr double force_threshold = 0.75; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -668,7 +669,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) { { constexpr double age_cutoff = 0.1; constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -679,7 +681,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) { { constexpr double age_cutoff = 0.5; constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -690,7 +693,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) { { constexpr double age_cutoff = 1.0; constexpr double force_threshold = 0.6; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -701,7 +705,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCSingleBatch) { { constexpr double age_cutoff = 1.0; constexpr double force_threshold = 0.5; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); ASSERT_EQ(ssts_to_be_compacted.size(), 1); @@ -815,7 +820,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { { constexpr double age_cutoff = 0.1; constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -826,7 +832,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { { constexpr double age_cutoff = 0.25; constexpr double force_threshold = 0.0; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -837,7 +844,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { { constexpr double age_cutoff = 0.5; constexpr double force_threshold = 0.6; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -848,7 +856,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { { constexpr double age_cutoff = 0.5; constexpr double force_threshold = 0.5; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); ASSERT_EQ(ssts_to_be_compacted.size(), 2); @@ -877,7 +886,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { { constexpr double age_cutoff = 0.75; constexpr double force_threshold = 0.6; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); ASSERT_TRUE(vstorage_.FilesMarkedForForcedBlobGC().empty()); } @@ -888,7 +898,8 @@ TEST_F(VersionStorageInfoTest, ForcedBlobGCMultipleBatches) { { constexpr double age_cutoff = 0.75; constexpr double force_threshold = 0.5; - vstorage_.ComputeFilesMarkedForForcedBlobGC(age_cutoff, force_threshold); + vstorage_.ComputeFilesMarkedForForcedBlobGC( + age_cutoff, force_threshold, /*enable_blob_garbage_collection=*/true); auto ssts_to_be_compacted = vstorage_.FilesMarkedForForcedBlobGC(); ASSERT_EQ(ssts_to_be_compacted.size(), 2); From 261e9be7b3d26f22084655cee76f32c304c6e2e9 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Thu, 12 Oct 2023 16:22:49 -0700 Subject: [PATCH 200/386] Resolve BaseDeltaIterator's value in UpdateCurrent (#11947) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11947 The patch is a small refactoring of `BaseDeltaIterator`: instead of determining the iterator's value during the `value()` call, it is resolved up front in `UpdateCurrent()`. This has multiple benefits: the value is now computed only once even if `value()` is called multiple times for the same iterator position (note that with the previous code, merges for example would get performed multiple times in this case), it makes it possible to remove the `mutable` modifiers from the `status_` and `merge_result_` members, and it also serves as groundwork for adding wide-column support to `WriteBatchWithIndex`. Reviewed By: jaykorean Differential Revision: D50236117 fbshipit-source-id: ae3d05863f811e9bac4c09edc49eca5f37e072a5 --- .../write_batch_with_index_internal.cc | 94 ++++++++++++------- .../write_batch_with_index_internal.h | 10 +- 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index 751d14adb..11feb5a7e 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -149,36 +149,6 @@ Slice BaseDeltaIterator::key() const { : delta_iterator_->Entry().key; } -Slice BaseDeltaIterator::value() const { - if (current_at_base_) { - return base_iterator_->value(); - } else { - WriteEntry delta_entry = delta_iterator_->Entry(); - if (merge_context_.GetNumOperands() == 0) { - return delta_entry.value; - } else if (delta_entry.type == kDeleteRecord || - delta_entry.type == kSingleDeleteRecord) { - status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - column_family_, delta_entry.key, merge_context_, &merge_result_); - } else if (delta_entry.type == kPutRecord) { - status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - column_family_, delta_entry.key, delta_entry.value, merge_context_, - &merge_result_); - } else if (delta_entry.type == kMergeRecord) { - if (equal_keys_) { - status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - column_family_, delta_entry.key, base_iterator_->value(), - merge_context_, &merge_result_); - } else { - status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - column_family_, delta_entry.key, merge_context_, &merge_result_); - } - } - - return merge_result_; - } -} - Slice BaseDeltaIterator::timestamp() const { return current_at_base_ ? base_iterator_->timestamp() : Slice(); } @@ -279,10 +249,64 @@ void BaseDeltaIterator::AdvanceBase() { bool BaseDeltaIterator::BaseValid() const { return base_iterator_->Valid(); } bool BaseDeltaIterator::DeltaValid() const { return delta_iterator_->Valid(); } + +void BaseDeltaIterator::ResetValue() { value_.clear(); } + +void BaseDeltaIterator::SetValueFromBase() { + assert(current_at_base_); + assert(BaseValid()); + assert(value_.empty()); + + value_ = base_iterator_->value(); +} + +void BaseDeltaIterator::SetValueFromDelta() { + assert(!current_at_base_); + assert(DeltaValid()); + assert(value_.empty()); + + WriteEntry delta_entry = delta_iterator_->Entry(); + + if (merge_context_.GetNumOperands() == 0) { + value_ = delta_entry.value; + + return; + } + + if (delta_entry.type == kDeleteRecord || + delta_entry.type == kSingleDeleteRecord) { + status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family_, delta_entry.key, merge_context_, &merge_result_); + } else if (delta_entry.type == kPutRecord) { + status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + column_family_, delta_entry.key, delta_entry.value, merge_context_, + &merge_result_); + } else if (delta_entry.type == kMergeRecord) { + if (equal_keys_) { + status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( + column_family_, delta_entry.key, base_iterator_->value(), + merge_context_, &merge_result_); + } else { + status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family_, delta_entry.key, merge_context_, &merge_result_); + } + } else { + status_ = Status::NotSupported("Unsupported entry type for merge"); + } + + if (!status_.ok()) { + return; + } + + value_ = merge_result_; +} + void BaseDeltaIterator::UpdateCurrent() { // Suppress false positive clang analyzer warnings. #ifndef __clang_analyzer__ status_ = Status::OK(); + ResetValue(); + while (true) { auto delta_result = WBWIIteratorImpl::kNotFound; WriteEntry delta_entry; @@ -321,11 +345,13 @@ void BaseDeltaIterator::UpdateCurrent() { AdvanceDelta(); } else { current_at_base_ = false; + SetValueFromDelta(); return; } } else if (!DeltaValid()) { // Delta has finished. current_at_base_ = true; + SetValueFromBase(); return; } else { int compare = @@ -339,6 +365,7 @@ void BaseDeltaIterator::UpdateCurrent() { if (delta_result != WBWIIteratorImpl::kDeleted || merge_context_.GetNumOperands() > 0) { current_at_base_ = false; + SetValueFromDelta(); return; } // Delta is less advanced and is delete. @@ -348,6 +375,7 @@ void BaseDeltaIterator::UpdateCurrent() { } } else { current_at_base_ = true; + SetValueFromBase(); return; } } @@ -458,10 +486,10 @@ WBWIIteratorImpl::Result WBWIIteratorImpl::FindLatestUpdate( } Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset, - WriteType* type, Slice* Key, + WriteType* type, Slice* key, Slice* value, Slice* blob, Slice* xid) const { - if (type == nullptr || Key == nullptr || value == nullptr || + if (type == nullptr || key == nullptr || value == nullptr || blob == nullptr || xid == nullptr) { return Status::InvalidArgument("Output parameters cannot be null"); } @@ -477,7 +505,7 @@ Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset, Slice input = Slice(rep_.data() + data_offset, rep_.size() - data_offset); char tag; uint32_t column_family; - Status s = ReadRecordFromWriteBatch(&input, &tag, &column_family, Key, value, + Status s = ReadRecordFromWriteBatch(&input, &tag, &column_family, key, value, blob, xid); if (!s.ok()) { return s; diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index 35d550767..d8bab54ed 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -47,7 +47,7 @@ class BaseDeltaIterator : public Iterator { void Next() override; void Prev() override; Slice key() const override; - Slice value() const override; + Slice value() const override { return value_; } Slice timestamp() const override; Status status() const override; void Invalidate(Status s); @@ -59,19 +59,23 @@ class BaseDeltaIterator : public Iterator { void AdvanceBase(); bool BaseValid() const; bool DeltaValid() const; + void ResetValue(); + void SetValueFromBase(); + void SetValueFromDelta(); void UpdateCurrent(); bool forward_; bool current_at_base_; bool equal_keys_; - mutable Status status_; + Status status_; ColumnFamilyHandle* column_family_; std::unique_ptr base_iterator_; std::unique_ptr delta_iterator_; const Comparator* comparator_; // not owned const Slice* iterate_upper_bound_; MergeContext merge_context_; - mutable std::string merge_result_; + std::string merge_result_; + Slice value_; }; // Key used by skip list, as the binary searchable index of WriteBatchWithIndex. From 6e3429b8a6a53d5e477074057b5f27218063b5f2 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Thu, 12 Oct 2023 16:55:25 -0700 Subject: [PATCH 201/386] Fix data race in accessing `recovery_in_prog_` (#11950) Summary: We saw the following TSAN stress test failure: ``` WARNING: ThreadSanitizer: data race (pid=17523) Write of size 1 at 0x7b8c000008b9 by thread T4243 (mutexes: write M0): #0 rocksdb::ErrorHandler::RecoverFromRetryableBGIOError() fbcode/internal_repo_rocksdb/repo/db/error_handler.cc:742 (db_stress+0x95f954) (BuildId: 35795dfb86ddc9c4f20ddf08a491f24d) https://github.com/facebook/rocksdb/issues/1 std::thread::_State_impl>>::_M_run() fbcode/third-party-buck/platform010/build/libgcc/include/c++/trunk/bits/invoke.h:74 (db_stress+0x95fc2b) (BuildId: 35795dfb86ddc9c4f20ddf08a491f24d) https://github.com/facebook/rocksdb/issues/2 execute_native_thread_routine /home/engshare/third-party2/libgcc/11.x/src/gcc-11.x/x86_64-facebook-linux/libstdc++-v3/src/c++11/../../../.././libstdc++-v3/src/c++11/thread.cc:82:18 (libstdc++.so.6+0xdf4e4) (BuildId: 452d1cdae868baeeb2fdf1ab140f1c219bf50c6e) Previous read of size 1 at 0x7b8c000008b9 by thread T22: #0 rocksdb::DBImpl::SyncClosedLogs(rocksdb::JobContext*, rocksdb::VersionEdit*) fbcode/internal_repo_rocksdb/repo/db/error_handler.h:76 (db_stress+0x84f69c) (BuildId: 35795dfb86ddc9c4f20ddf08a491f24d) ``` This is due to a data race in accessing `recovery_in_prog_`. This PR fixes it by accessing `recovery_in_prog_` under db mutex before calling `SyncClosedLogs()`. I think the original PR https://github.com/facebook/rocksdb/pull/10489 intended to clear the error if it's a recovery flush. So ideally we can also just check flush reason. I plan to keep a safer change in this PR and make that change in the future if needed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11950 Test Plan: check future TSAN stress test results. Reviewed By: anand1976 Differential Revision: D50242255 Pulled By: cbi42 fbshipit-source-id: 0d487948ef9546b038a34460f3bb037f6e5bfc58 --- db/db_impl/db_impl.h | 3 ++- db/db_impl/db_impl_compaction_flush.cc | 15 ++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 73f4d2e5d..19e82bc9c 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1864,7 +1864,8 @@ class DBImpl : public DB { void ReleaseFileNumberFromPendingOutputs( std::unique_ptr::iterator>& v); - IOStatus SyncClosedLogs(JobContext* job_context, VersionEdit* synced_wals); + IOStatus SyncClosedLogs(JobContext* job_context, VersionEdit* synced_wals, + bool error_recovery_in_prog); // Flush the in-memory write buffer to storage. Switches to a new // log-file/memtable and writes a new descriptor iff successful. Then diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 91b85de87..bdf4f3894 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -113,7 +113,8 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT( } IOStatus DBImpl::SyncClosedLogs(JobContext* job_context, - VersionEdit* synced_wals) { + VersionEdit* synced_wals, + bool error_recovery_in_prog) { TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start"); InstrumentedMutexLock l(&log_write_mutex_); autovector logs_to_sync; @@ -139,7 +140,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context, ROCKS_LOG_INFO(immutable_db_options_.info_log, "[JOB %d] Syncing log #%" PRIu64, job_context->job_id, log->get_log_number()); - if (error_handler_.IsRecoveryInProgress()) { + if (error_recovery_in_prog) { log->file()->reset_seen_error(); } io_s = log->file()->Sync(immutable_db_options_.use_fsync); @@ -148,7 +149,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context, } if (immutable_db_options_.recycle_log_file_num > 0) { - if (error_handler_.IsRecoveryInProgress()) { + if (error_recovery_in_prog) { log->file()->reset_seen_error(); } io_s = log->Close(); @@ -262,8 +263,10 @@ Status DBImpl::FlushMemTableToOutputFile( // SyncClosedLogs() may unlock and re-lock the log_write_mutex multiple // times. VersionEdit synced_wals; + bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress(); mutex_.Unlock(); - log_io_s = SyncClosedLogs(job_context, &synced_wals); + log_io_s = + SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog); mutex_.Lock(); if (log_io_s.ok() && synced_wals.IsWalAddition()) { const ReadOptions read_options(Env::IOActivity::kFlush); @@ -547,8 +550,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( // TODO (yanqin) investigate whether we should sync the closed logs for // single column family case. VersionEdit synced_wals; + bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress(); mutex_.Unlock(); - log_io_s = SyncClosedLogs(job_context, &synced_wals); + log_io_s = + SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog); mutex_.Lock(); if (log_io_s.ok() && synced_wals.IsWalAddition()) { const ReadOptions read_options(Env::IOActivity::kFlush); From dc576af0fde98ed38d544c380d0ae39a0bc8b347 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 13 Oct 2023 09:52:33 -0700 Subject: [PATCH 202/386] AutoHCC - fix a rare loop condition in Lookup (#11948) Summary: Saw this in stress test: ``` db_stress: cache/clock_cache.cc:3152:[...] Assertion `i < 0x2000' failed. ``` The problem is related to Lookups on a chain currently involved in a Grow operation. To avoid Lookup waiting on Grow, Lookup is able to walk a chain whose first part is already migrated and tail is not yet migrated, so is mixed with entries with a different destination home (according to `home_shift`) than what we're looking for. This is fine until we save one of these entries as a safe point in the chain to backtrack to (`read_ref_on_chain`) in case of concurrent modification and end up backtracking to it. In that case, we can get stuck on the wrong destination chain and keep trying to backtrack to an entry that is supposed to be on the correct chain but is not (anymore). For some reason I haven't quite worked out, I believe it's usually able to recover after some 1000+ looop iterations, so reproducibility depends on the threshold at which we consider a Lookup loop to be too many iterations for a plausibly valid Lookup. Detecting and working around this case is relatively simple. We can (and must) keep going on the chain but ensure we don't save it as a safe entry to backtrack to. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11948 Test Plan: The problem could be reproduced in a few minutes with this (debug build): ``` $ while ./cache_bench -cache_type=auto_hyper_clock_cache -histograms=0 -cache_size=80000000 -threads=32 -populate_cache=0 -ops_per_thread=10000 -degenerate_hash_bits=6 -num_shard_bits=0; do :; done ``` At least with a lower threshold on suspiciously high number of iterations. I've lowered the thresholds quite a bit and no longer able to reproduce a failure. Reviewed By: jowlyzhang Differential Revision: D50236574 Pulled By: pdillinger fbshipit-source-id: 2cb54a4e02bb51d5933eea41fcd489ab9d34aa96 --- cache/clock_cache.cc | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index e8bce7a5b..b70d225bb 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -2099,8 +2099,8 @@ void AutoHyperClockTable::StartInsert(InsertState& state) { // and a larger limit is used to break cycles should they occur in production. #define CHECK_TOO_MANY_ITERATIONS(i) \ { \ - assert(i < 0x2000); \ - if (UNLIKELY(i >= 0x8000)) { \ + assert(i < 512); \ + if (UNLIKELY(i >= 4096)) { \ std::terminate(); \ } \ } @@ -3228,7 +3228,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( // Follow the next and check for full key match, home match, or neither h = &arr[GetNextFromNextWithShift(next_with_shift)]; bool full_match_or_unknown = false; - if (MatchAndRef(&hashed_key, *h, home_shift, home, + if (MatchAndRef(&hashed_key, *h, shift, effective_home, &full_match_or_unknown)) { // Got a read ref on next (h). // @@ -3253,13 +3253,13 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( // ownership of one for eviction. In rare cases, we might // double-clock-update some entries (ok as long as it's rare). - // With new usable read ref, can release old one if applicable - if (read_ref_on_chain) { - // Pretend we never took the reference. - Unref(*read_ref_on_chain); - } if (full_match_or_unknown) { // Full match. + // Release old read ref on chain if applicable + if (read_ref_on_chain) { + // Pretend we never took the reference. + Unref(*read_ref_on_chain); + } // Update the hit bit if (eviction_callback_) { h->meta.fetch_or(uint64_t{1} << ClockHandle::kHitBitShift, @@ -3267,8 +3267,26 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( } // All done. return h; + } else if (UNLIKELY(shift != home_shift) && + home != BottomNBits(h->hashed_key[1], home_shift)) { + // This chain is in a Grow operation and we've landed on an entry + // that belongs to the wrong destination chain. We can keep going, but + // there's a chance we'll need to backtrack back *before* this entry, + // if the Grow finishes before this Lookup. We cannot save this entry + // for backtracking because it might soon or already be on the wrong + // chain. + // NOTE: if we simply backtrack rather than continuing, we would + // be in a wait loop (not allowed in Lookup!) until the other thread + // finishes its Grow. + Unref(*h); } else { - // Correct home location, so we are on the right chain + // Correct home location, so we are on the right chain. + // With new usable read ref, can release old one (if applicable). + if (read_ref_on_chain) { + // Pretend we never took the reference. + Unref(*read_ref_on_chain); + } + // And keep the new one. read_ref_on_chain = h; } } else { From c9d8e6a5bf5fb67fd343738083e4cdb42854e2b0 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 13 Oct 2023 15:58:03 -0700 Subject: [PATCH 203/386] AttributeGroups - MultiGetEntity Implementation (#11925) Summary: Introducing the notion of AttributeGroup by adding the `MultiGetEntity()` API retrieving `PinnableAttributeGroups`. An "attribute group" refers to a logical grouping of wide-column entities within RocksDB. These attribute groups are implemented using column families. Users can store WideColumns in different CFs for various reasons (e.g. similar access patterns, same types, etc.). This new API `MultiGetEntity()` takes keys and `PinnableAttributeGroups` per key. `PinnableAttributeGroups` is just a list of `PinnableAttributeGroup`s in which we have `ColumnFamilyHandle*`, `Status`, and `PinnableWideColumns`. Let's say a user stored "hot" wide columns in column family "hot_data_cf" and "cold" wide columns in column family "cold_data_cf" and all other columns in "common_cf". Prior to this PR, if the user wants to query for two keys, "key_1" and "key_2" and but only interested in "common_cf" and "hot_data_cf" for "key_1", and "common_cf" and "cold_data_cf" for "key_2", the user would have to construct input like `keys = ["key_1", "key_1", "key_2", "key_2"]`, `column_families = ["common_cf", "hot_data_cf", "common_cf", "cold_data_cf"]` and get the flat list of `PinnableWideColumns` to find the corresponding combo. With the new `MultiGetEntity()` introduced in this PR, users can now query only `["common_cf", "hot_data_cf"]` for `"key_1"`, and only `["common_cf", "cold_data_cf"]` for `"key_2"`. The user will get `PinnableAttributeGroups` for each key, and `PinnableAttributeGroups` gives a list of `PinnableAttributeGroup`s where the user can find column family and corresponding `PinnableWideColumns` and the `Status`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11925 Test Plan: - `DBWideBasicTest::MultiCFMultiGetEntityAsPinnableAttributeGroups` added will enable this new API in the `db_stress` in a separate PR Reviewed By: ltamasi Differential Revision: D50017414 Pulled By: jaykorean fbshipit-source-id: 643611d1273c574bc81b94c6f5aeea24b40c4586 --- db/db_impl/db_impl.cc | 53 +++++++- db/db_impl/db_impl.h | 3 + db/wide/db_wide_basic_test.cc | 128 ++++++++++++++++++ include/rocksdb/db.h | 28 ++++ include/rocksdb/wide_columns.h | 40 ++++++ .../new_features/attribute_group_multiget.md | 1 + 6 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/new_features/attribute_group_multiget.md diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 3ae70b209..f24653015 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -13,7 +13,6 @@ #include #endif -#include #include #include #include @@ -3342,6 +3341,58 @@ void DBImpl::MultiGetEntity(const ReadOptions& _read_options, statuses, sorted_input); } +void DBImpl::MultiGetEntity(const ReadOptions& _read_options, size_t num_keys, + const Slice* keys, + PinnableAttributeGroups* results) { + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kMultiGetEntity) { + Status s = Status::InvalidArgument( + "Can only call MultiGetEntity with ReadOptions::io_activity` is " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGetEntity`"); + for (size_t i = 0; i < num_keys; ++i) { + for (size_t j = 0; j < results[i].size(); ++j) { + results[i][j].SetStatus(s); + } + } + return; + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kMultiGetEntity; + } + + std::vector column_families; + std::vector all_keys; + size_t total_count = 0; + + for (size_t i = 0; i < num_keys; ++i) { + for (size_t j = 0; j < results[i].size(); ++j) { + // Adding the same key slice for different CFs + all_keys.emplace_back(keys[i]); + column_families.emplace_back(results[i][j].column_family()); + ++total_count; + } + } + std::vector statuses(total_count); + std::vector columns(total_count); + MultiGetCommon(read_options, total_count, column_families.data(), + all_keys.data(), + /* values */ nullptr, columns.data(), + /* timestamps */ nullptr, statuses.data(), + /* sorted_input */ false); + + // Set results + size_t index = 0; + for (size_t i = 0; i < num_keys; ++i) { + for (size_t j = 0; j < results[i].size(); ++j) { + results[i][j].Reset(); + results[i][j].SetStatus(std::move(statuses[index])); + results[i][j].SetColumns(std::move(columns[index])); + ++index; + } + } +} + Status DBImpl::WrapUpCreateColumnFamilies( const std::vector& cf_options) { // NOTE: this function is skipped for create_missing_column_families and diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 19e82bc9c..4c595076c 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -311,6 +311,9 @@ class DBImpl : public DB { ColumnFamilyHandle** column_families, const Slice* keys, PinnableWideColumns* results, Status* statuses, bool sorted_input) override; + void MultiGetEntity(const ReadOptions& options, size_t num_keys, + const Slice* keys, + PinnableAttributeGroups* results) override; virtual Status CreateColumnFamily(const ColumnFamilyOptions& cf_options, const std::string& column_family, diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 2067b6c19..03edf26b6 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -270,6 +270,134 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntity) { ASSERT_EQ(results[1].columns(), second_columns); } +TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { + Options options = GetDefaultOptions(); + CreateAndReopenWithCF({"hot_cf", "cold_cf"}, options); + + constexpr int DEFAULT_CF_HANDLE_INDEX = 0; + constexpr int HOT_CF_HANDLE_INDEX = 1; + constexpr int COLD_CF_HANDLE_INDEX = 2; + + constexpr char first_key[] = "first"; + WideColumns first_default_columns{ + {"default_cf_col_1_name", "first_key_default_cf_col_1_value"}, + {"default_cf_col_2_name", "first_key_default_cf_col_2_value"}}; + WideColumns first_hot_columns{ + {"hot_cf_col_1_name", "first_key_hot_cf_col_1_value"}, + {"hot_cf_col_2_name", "first_key_hot_cf_col_2_value"}}; + WideColumns first_cold_columns{ + {"cold_cf_col_1_name", "first_key_cold_cf_col_1_value"}}; + constexpr char second_key[] = "second"; + WideColumns second_hot_columns{ + {"hot_cf_col_1_name", "second_key_hot_cf_col_1_value"}}; + WideColumns second_cold_columns{ + {"cold_cf_col_1_name", "second_key_cold_cf_col_1_value"}}; + + // TODO - update this to use the multi-attribute-group PutEntity when ready + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[DEFAULT_CF_HANDLE_INDEX], + first_key, first_default_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[HOT_CF_HANDLE_INDEX], + first_key, first_hot_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[COLD_CF_HANDLE_INDEX], + first_key, first_cold_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[HOT_CF_HANDLE_INDEX], + second_key, second_hot_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[COLD_CF_HANDLE_INDEX], + second_key, second_cold_columns)); + + constexpr size_t num_keys = 2; + std::array keys = {first_key, second_key}; + std::vector all_cfs = handles_; + std::vector default_and_hot_cfs{ + {handles_[DEFAULT_CF_HANDLE_INDEX], handles_[HOT_CF_HANDLE_INDEX]}}; + std::vector hot_and_cold_cfs{ + {handles_[HOT_CF_HANDLE_INDEX], handles_[COLD_CF_HANDLE_INDEX]}}; + auto create_result = + [](const std::vector& column_families) + -> PinnableAttributeGroups { + PinnableAttributeGroups result; + for (size_t i = 0; i < column_families.size(); ++i) { + result.emplace_back(column_families[i]); + } + return result; + }; + { + // Check for invalid argument + ReadOptions read_options; + read_options.io_activity = Env::IOActivity::kGetEntity; + std::vector results; + for (size_t i = 0; i < num_keys; ++i) { + results.emplace_back(create_result(all_cfs)); + } + db_->MultiGetEntity(read_options, num_keys, keys.data(), results.data()); + for (size_t i = 0; i < num_keys; ++i) { + for (size_t j = 0; j < all_cfs.size(); ++j) { + ASSERT_NOK(results[i][j].status()); + ASSERT_TRUE(results[i][j].status().IsInvalidArgument()); + } + } + } + { + // Case 1. Get first key from default cf and hot_cf and second key from + // hot_cf and cold_cf + std::vector results; + PinnableAttributeGroups first_key_result = + create_result(default_and_hot_cfs); + PinnableAttributeGroups second_key_result = create_result(hot_and_cold_cfs); + results.emplace_back(std::move(first_key_result)); + results.emplace_back(std::move(second_key_result)); + + db_->MultiGetEntity(ReadOptions(), num_keys, keys.data(), results.data()); + ASSERT_EQ(2, results.size()); + // We expect to get values for all keys and CFs + for (size_t i = 0; i < num_keys; ++i) { + for (size_t j = 0; j < 2; ++j) { + ASSERT_OK(results[i][j].status()); + } + } + // verify values for first key (default cf and hot cf) + ASSERT_EQ(2, results[0].size()); + ASSERT_EQ(first_default_columns, results[0][0].columns()); + ASSERT_EQ(first_hot_columns, results[0][1].columns()); + + // verify values for second key (hot cf and cold cf) + ASSERT_EQ(2, results[1].size()); + ASSERT_EQ(second_hot_columns, results[1][0].columns()); + ASSERT_EQ(second_cold_columns, results[1][1].columns()); + } + { + // Case 2. Get first key and second key from all cfs. For the second key, we + // don't expect to get columns from default cf. + std::vector results; + PinnableAttributeGroups first_key_result = create_result(all_cfs); + PinnableAttributeGroups second_key_result = create_result(all_cfs); + results.emplace_back(std::move(first_key_result)); + results.emplace_back(std::move(second_key_result)); + + db_->MultiGetEntity(ReadOptions(), num_keys, keys.data(), results.data()); + // verify first key + for (size_t i = 0; i < all_cfs.size(); ++i) { + ASSERT_OK(results[0][i].status()); + } + ASSERT_EQ(3, results[0].size()); + ASSERT_EQ(first_default_columns, results[0][0].columns()); + ASSERT_EQ(first_hot_columns, results[0][1].columns()); + ASSERT_EQ(first_cold_columns, results[0][2].columns()); + + // verify second key + // key does not exist in default cf + ASSERT_NOK(results[1][0].status()); + ASSERT_TRUE(results[1][0].status().IsNotFound()); + ASSERT_TRUE(results[1][0].columns().empty()); + + // key exists in hot_cf and cold_cf + ASSERT_OK(results[1][1].status()); + ASSERT_EQ(second_hot_columns, results[1][1].columns()); + ASSERT_OK(results[1][2].status()); + ASSERT_EQ(second_cold_columns, results[1][2].columns()); + } +} + TEST_F(DBWideBasicTest, MergePlainKeyValue) { Options options = GetDefaultOptions(); options.create_if_missing = true; diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 781e3f277..a536a76c1 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -862,6 +862,34 @@ class DB { } } + // Batched MultiGet-like API that returns attribute groups. + // An "attribute group" refers to a logical grouping of wide-column entities + // within RocksDB. These attribute groups are implemented using column + // families. Attribute group allows users to group wide-columns based on + // various criteria, such as similar access patterns or data types + // + // The input is a list of keys and PinnableAttributeGroups. For any given + // keys[i] (where 0 <= i < num_keys), results[i] will contain result for the + // ith key. Each result will be returned as PinnableAttributeGroups. + // PinnableAttributeGroups is a vector of PinnableAttributeGroup. Each + // PinnableAttributeGroup will contain a ColumnFamilyHandle pointer, Status + // and PinnableWideColumns. + // + // Note that it is the caller's responsibility to ensure that + // "keys" and "results" have the same "num_keys" number of objects. Also + // PinnableAttributeGroup needs to have ColumnFamilyHandle pointer set + // properly to get the corresponding wide columns from the column family. + virtual void MultiGetEntity(const ReadOptions& /* options */, size_t num_keys, + const Slice* /* keys */, + PinnableAttributeGroups* results) { + for (size_t i = 0; i < num_keys; ++i) { + for (size_t j = 0; j < results[i].size(); ++j) { + results[i][j].SetStatus( + Status::NotSupported("MultiGetEntity not supported")); + } + } + } + // If the key definitely does not exist in the database, then this method // returns false, else true. If the caller wants to obtain value when the key // is found in memory, a bool for 'value_found' must be passed. 'value_found' diff --git a/include/rocksdb/wide_columns.h b/include/rocksdb/wide_columns.h index 1da66b73c..c4353134a 100644 --- a/include/rocksdb/wide_columns.h +++ b/include/rocksdb/wide_columns.h @@ -16,6 +16,8 @@ namespace ROCKSDB_NAMESPACE { +class ColumnFamilyHandle; + // Class representing a wide column, which is defined as a pair of column name // and column value. class WideColumn { @@ -218,4 +220,42 @@ inline bool operator!=(const PinnableWideColumns& lhs, return !(lhs == rhs); } +// Class representing attribute group. Attribute group is a logical grouping of +// wide-column entities by leveraging Column Families. Wide-columns returned +// from the query are pinnable. +class PinnableAttributeGroup { + public: + ColumnFamilyHandle* column_family() const { return column_family_; } + const Status& status() const { return status_; } + const WideColumns& columns() const { return columns_.columns(); } + + explicit PinnableAttributeGroup(ColumnFamilyHandle* column_family) + : column_family_(column_family), status_(Status::OK()) {} + + void SetStatus(const Status& status); + void SetColumns(PinnableWideColumns&& columns); + + void Reset(); + + private: + ColumnFamilyHandle* column_family_; + Status status_; + PinnableWideColumns columns_; +}; + +inline void PinnableAttributeGroup::SetStatus(const Status& status) { + status_ = status; +} +inline void PinnableAttributeGroup::SetColumns(PinnableWideColumns&& columns) { + columns_ = std::move(columns); +} + +inline void PinnableAttributeGroup::Reset() { + SetStatus(Status::OK()); + columns_.Reset(); +} + +// A collection of Attribute Groups. +using PinnableAttributeGroups = std::vector; + } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/new_features/attribute_group_multiget.md b/unreleased_history/new_features/attribute_group_multiget.md new file mode 100644 index 000000000..4a7ae818f --- /dev/null +++ b/unreleased_history/new_features/attribute_group_multiget.md @@ -0,0 +1 @@ +Introduce AttributeGroup by adding the first AttributeGroup support API, MultiGetEntity(). Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. More APIs to support AttributeGroup will come soon, including GetEntity, PutEntity, and others. From 50b0879d5052bc332c20e9c9d2f92283e5b93e77 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Sat, 14 Oct 2023 10:08:03 -0700 Subject: [PATCH 204/386] Do not fail stress test when file ingestion return injected error (#11956) Summary: Currently, if file ingestion hit injected error, stress test is considered failed since it prints a message to stderr containing the keyword "error" and db_crashtest.py looks for it in stderr. This PR fixes it by print injected error to stdout. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11956 Test Plan: Check future stress test runs. Reviewed By: akankshamahajan15 Differential Revision: D50293537 Pulled By: cbi42 fbshipit-source-id: e74915b1b3c6876a61ab6933c4529780362ec02b --- db_stress_tool/no_batched_ops_stress.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 42cc8f302..302903939 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1563,9 +1563,11 @@ class NonBatchedOpsStressTest : public StressTest { {sst_filename}, IngestExternalFileOptions()); } if (!s.ok()) { - fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str()); if (!s.IsIOError() || !std::strstr(s.getState(), "injected")) { + fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str()); thread->shared->SafeTerminate(); + } else { + fprintf(stdout, "file ingestion error: %s\n", s.ToString().c_str()); } } else { for (size_t i = 0; i < pending_expected_values.size(); ++i) { From f3aef8cad7ba550743ce8b0755c767917a92fd5f Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Sat, 14 Oct 2023 12:00:31 -0700 Subject: [PATCH 205/386] Add write operation to tracer only after successful callback (#11954) Summary: We saw optimistic transaction stress test failures like the following: ``` Verification failed for column family 0 key 000000000001E9AF000000000000012B00000000000000B5 (12535491): value_from_db: 010000000504070609080B0A0D0C0F0E111013121514171619181B1A1D1C1F1E212023222524272629282B2A2D2C2F2E313033323534373639383B3A3D3C3F3E, value_from_expected: , msg: Iterator verification: Unexpected value found``` ``` With ajkr's repro (see test plan), I found that we record duplicated writes to tracer when an optimistic transaction conflict checking fails. This PR fixes it by checking callback status before record a write operation to tracer. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11954 Test Plan: this reproduces the failure consistently ``` #!/bin/bash db=/dev/shm/rocksdb_crashtest_blackbox exp=/dev/shm/rocksdb_crashtest_expected rm -rf $db $exp && mkdir -p $exp && while ./db_stress \ --atomic_flush=1 \ --clear_column_family_one_in=0 \ --db=$db \ --db_write_buffer_size=2097152 \ --delpercent=0 \ --delrangepercent=0 \ --destroy_db_initially=0 \ --disable_wal=1 \ --expected_values_dir=$exp \ --iterpercent=0 \ --max_bytes_for_level_base=2097152 \ --max_key=250000 \ --memtable_prefix_bloom_size_ratio=0.5 \ --memtable_whole_key_filtering=1 \ --occ_lock_bucket_count=100 \ --occ_validation_policy=0 \ --ops_per_thread=10 \ --prefixpercent=0 \ --readpercent=0 \ --reopen=0 \ --target_file_size_base=524288 \ --test_batches_snapshots=0 \ --use_optimistic_txn=1 \ --use_txn=1 \ --value_size_mult=32 \ --write_buffer_size=524288 \ --writepercent=100 ; do : ; done ``` Reviewed By: akankshamahajan15 Differential Revision: D50284976 Pulled By: cbi42 fbshipit-source-id: 793e3cee186c8b4f406b29166efd8d9028695206 --- db/db_impl/db_impl_write.cc | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 505a37883..b3af38d21 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -403,17 +403,6 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, IOStatus io_s; Status pre_release_cb_status; if (status.ok()) { - // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock - // grabs but does not seem thread-safe. - if (tracer_) { - InstrumentedMutexLock lock(&trace_mutex_); - if (tracer_ && tracer_->IsWriteOrderPreserved()) { - for (auto* writer : write_group) { - // TODO: maybe handle the tracing status? - tracer_->Write(writer->batch).PermitUncheckedError(); - } - } - } // Rules for when we can update the memtable concurrently // 1. supported by memtable // 2. Puts are not okay if inplace_update_support @@ -446,6 +435,20 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, } } } + // TODO: this use of operator bool on `tracer_` can avoid unnecessary lock + // grabs but does not seem thread-safe. + if (tracer_) { + InstrumentedMutexLock lock(&trace_mutex_); + if (tracer_ && tracer_->IsWriteOrderPreserved()) { + for (auto* writer : write_group) { + if (writer->CallbackFailed()) { + continue; + } + // TODO: maybe handle the tracing status? + tracer_->Write(writer->batch).PermitUncheckedError(); + } + } + } // Note about seq_per_batch_: either disableWAL is set for the entire write // group or not. In either case we inc seq for each write batch with no // failed callback. This means that there could be a batch with From 9ded9f789ff9805236669963c3f4bde811d71f54 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Sat, 14 Oct 2023 13:33:55 -0700 Subject: [PATCH 206/386] Fix db_stress FaultInjectionTestFS set up before DB open (#11958) Summary: We saw frequent stress test failures with error messages like: ``` Verification failed for column family 0 key ...: value_from_db: , value_from_expected: ..., msg: GetEntity verification: Value not found: NotFound: ``` One cause for this is that data in WAL is lost after a crash. We initialize FaultInjectionTestFS to be not direct writable when write_fault_injection is enabled (see code change). This can cause the first WAL created during DB open to be lost if a db_stress is killed before the first WAL is synced. This PR initializes FaultInjectionTestFS to be direct writable. Note that FaultInjectionTestFS will be configured propertly for write fault injection after DB open in `RunStressTestImpl()`. So this change should not affect write fault injection coverage. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11958 Test Plan: a repro for the above bug: ``` Simulate crash before first WAL is sealed: --- a/db_stress_tool/db_stress_driver.cc +++ b/db_stress_tool/db_stress_driver.cc @@ -256,6 +256,7 @@ bool RunStressTestImpl(SharedState* shared) { fprintf(stderr, "Verification failed :(\n"); return false; } + exit(1); return true; } ./db_stress --clear_column_family_one_in=0 --column_families=1 --preserve_internal_time_seconds=60 --destroy_db_initially=0 --db=/dev/shm/rocksdb_crashtest_blackbox --db_write_buffer_size=2097152 --destroy_db_initially=0 --expected_values_dir=/dev/shm/rocksdb_crashtest_expected --reopen=0 --test_batches_snapshots=0 --threads=1 --ops_per_thread=100 --write_fault_one_in=1000 --sync_fault_injection=0 ./db_stress_main --clear_column_family_one_in=0 --column_families=1 --preserve_internal_time_seconds=60 --destroy_db_initially=0 --db=/dev/shm/rocksdb_crashtest_blackbox --db_write_buffer_size=2097152 --destroy_db_initially=0 --expected_values_dir=/dev/shm/rocksdb_crashtest_expected --reopen=0 --test_batches_snapshots=0 --sync_fault_injection=1 ``` Reviewed By: akankshamahajan15 Differential Revision: D50300347 Pulled By: cbi42 fbshipit-source-id: 3a4881d72197f5ece82364382a0100912e16c2d6 --- db_stress_tool/db_stress_tool.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc index 3c50cdb88..9c24e2c42 100644 --- a/db_stress_tool/db_stress_tool.cc +++ b/db_stress_tool/db_stress_tool.cc @@ -88,11 +88,11 @@ int db_stress_tool(int argc, char** argv) { FaultInjectionTestFS* fs = new FaultInjectionTestFS(raw_env->GetFileSystem()); fault_fs_guard.reset(fs); - if (FLAGS_write_fault_one_in) { - fault_fs_guard->SetFilesystemDirectWritable(false); - } else { - fault_fs_guard->SetFilesystemDirectWritable(true); - } + // Set it to direct writable here to not lose files created during DB open + // when no open fault injection is not enabled. + // This will be overwritten in StressTest::Open() for open fault injection + // and in RunStressTestImpl() for proper write fault injection setup. + fault_fs_guard->SetFilesystemDirectWritable(true); fault_env_guard = std::make_shared(raw_env, fault_fs_guard); raw_env = fault_env_guard.get(); From 2fd850c7ebb6498a972a7b528408bca2ec697b9e Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 16 Oct 2023 08:58:47 -0700 Subject: [PATCH 207/386] Remove write queue synchronization from WriteOptionsFile (#11951) Summary: This has become obsolete with the new `options_mutex_` in https://github.com/facebook/rocksdb/pull/11929 * Remove now-unnecessary parameter from WriteOptionsFile * Rename (and negate) other parameter for better clarity (the caller shouldn't tell the callee what the callee needs, just what the caller knows, provides, and requests) * Move a ROCKS_LOG_WARN (I/O) in WriteOptionsFile to outside of holding DB mutex. * Also *avoid* (but not always eliminate) write queue synchronization in SetDBOptions. Still needed if there was a change to WAL size limit or other configuration. * Improve some comments Pull Request resolved: https://github.com/facebook/rocksdb/pull/11951 Test Plan: existing unit tests and TSAN crash test local run Reviewed By: ajkr Differential Revision: D50247904 Pulled By: pdillinger fbshipit-source-id: 7dfe445c705ec013886a2adb7c50abe50d83af69 --- db/db_impl/db_impl.cc | 83 ++++++++++++++++++-------------------- db/db_impl/db_impl.h | 8 ++-- db/db_impl/db_impl_open.cc | 4 +- 3 files changed, 45 insertions(+), 50 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index f24653015..88880996e 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1230,8 +1230,7 @@ Status DBImpl::SetOptions( // thread. InstallSuperVersionAndScheduleWork(cfd, &sv_context, new_options); - persist_options_status = WriteOptionsFile( - false /*need_mutex_lock*/, true /*need_enter_write_thread*/); + persist_options_status = WriteOptionsFile(true /*db_mutex_already_held*/); bg_cv_.SignalAll(); } } @@ -1273,7 +1272,8 @@ Status DBImpl::SetDBOptions( MutableDBOptions new_options; Status s; Status persist_options_status = Status::OK(); - bool wal_changed = false; + bool wal_size_option_changed = false; + bool wal_other_option_changed = false; WriteContext write_context; { InstrumentedMutexLock l(&mutex_); @@ -1374,8 +1374,10 @@ Status DBImpl::SetDBOptions( table_cache_.get()->SetCapacity(new_options.max_open_files == -1 ? TableCache::kInfiniteCapacity : new_options.max_open_files - 10); - wal_changed = mutable_db_options_.wal_bytes_per_sync != - new_options.wal_bytes_per_sync; + wal_other_option_changed = mutable_db_options_.wal_bytes_per_sync != + new_options.wal_bytes_per_sync; + wal_size_option_changed = mutable_db_options_.max_total_wal_size != + new_options.max_total_wal_size; mutable_db_options_ = new_options; file_options_for_compaction_ = FileOptions(new_db_options); file_options_for_compaction_ = fs_->OptimizeForCompactionTableWrite( @@ -1386,19 +1388,21 @@ Status DBImpl::SetDBOptions( file_options_for_compaction_, immutable_db_options_); file_options_for_compaction_.compaction_readahead_size = mutable_db_options_.compaction_readahead_size; - WriteThread::Writer w; - write_thread_.EnterUnbatched(&w, &mutex_); - if (total_log_size_ > GetMaxTotalWalSize() || wal_changed) { - Status purge_wal_status = SwitchWAL(&write_context); - if (!purge_wal_status.ok()) { - ROCKS_LOG_WARN(immutable_db_options_.info_log, - "Unable to purge WAL files in SetDBOptions() -- %s", - purge_wal_status.ToString().c_str()); + if (wal_other_option_changed || wal_size_option_changed) { + WriteThread::Writer w; + write_thread_.EnterUnbatched(&w, &mutex_); + if (wal_other_option_changed || + total_log_size_ > GetMaxTotalWalSize()) { + Status purge_wal_status = SwitchWAL(&write_context); + if (!purge_wal_status.ok()) { + ROCKS_LOG_WARN(immutable_db_options_.info_log, + "Unable to purge WAL files in SetDBOptions() -- %s", + purge_wal_status.ToString().c_str()); + } } + write_thread_.ExitUnbatched(&w); } - persist_options_status = WriteOptionsFile( - false /*need_mutex_lock*/, false /*need_enter_write_thread*/); - write_thread_.ExitUnbatched(&w); + persist_options_status = WriteOptionsFile(true /*db_mutex_already_held*/); } else { // To get here, we must have had invalid options and will not attempt to // persist the options, which means the status is "OK/Uninitialized. @@ -3406,8 +3410,7 @@ Status DBImpl::WrapUpCreateColumnFamilies( } } // Attempt both follow-up actions even if one fails - Status s = WriteOptionsFile(true /*need_mutex_lock*/, - true /*need_enter_write_thread*/); + Status s = WriteOptionsFile(false /*db_mutex_already_held*/); if (register_worker) { s.UpdateIfOk(RegisterRecordSeqnoTimeWorker(/*from_db_open=*/false)); } @@ -3577,8 +3580,7 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { InstrumentedMutexLock ol(&options_mutex_); Status s = DropColumnFamilyImpl(column_family); if (s.ok()) { - s = WriteOptionsFile(true /*need_mutex_lock*/, - true /*need_enter_write_thread*/); + s = WriteOptionsFile(false /*db_mutex_already_held*/); } return s; } @@ -3596,8 +3598,8 @@ Status DBImpl::DropColumnFamilies( success_once = true; } if (success_once) { - Status persist_options_status = WriteOptionsFile( - true /*need_mutex_lock*/, true /*need_enter_write_thread*/); + Status persist_options_status = + WriteOptionsFile(false /*db_mutex_already_held*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } @@ -5225,18 +5227,13 @@ Status DestroyDB(const std::string& dbname, const Options& options, return result; } -Status DBImpl::WriteOptionsFile(bool need_mutex_lock, - bool need_enter_write_thread) { +Status DBImpl::WriteOptionsFile(bool db_mutex_already_held) { options_mutex_.AssertHeld(); - WriteThread::Writer w; - if (need_mutex_lock) { - mutex_.Lock(); - } else { + if (db_mutex_already_held) { mutex_.AssertHeld(); - } - if (need_enter_write_thread) { - write_thread_.EnterUnbatched(&w, &mutex_); + } else { + mutex_.Lock(); } std::vector cf_names; @@ -5251,10 +5248,10 @@ Status DBImpl::WriteOptionsFile(bool need_mutex_lock, cf_opts.push_back(cfd->GetLatestCFOptions()); } - // Unlock during expensive operations. New writes cannot get here - // because the single write thread ensures all new writes get queued. DBOptions db_options = BuildDBOptions(immutable_db_options_, mutable_db_options_); + + // Unlock during expensive operations. mutex_.Unlock(); TEST_SYNC_POINT("DBImpl::WriteOptionsFile:1"); @@ -5279,22 +5276,22 @@ Status DBImpl::WriteOptionsFile(bool need_mutex_lock, } } - // restore lock - if (!need_mutex_lock) { - mutex_.Lock(); - } - if (need_enter_write_thread) { - write_thread_.ExitUnbatched(&w); - } if (!s.ok()) { ROCKS_LOG_WARN(immutable_db_options_.info_log, "Unnable to persist options -- %s", s.ToString().c_str()); if (immutable_db_options_.fail_if_options_file_error) { - return Status::IOError("Unable to persist options.", - s.ToString().c_str()); + s = Status::IOError("Unable to persist options.", s.ToString().c_str()); + } else { + // Ignore error + s = Status::OK(); } } - return Status::OK(); + + // Restore lock if appropriate + if (db_mutex_already_held) { + mutex_.Lock(); + } + return s; } namespace { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 4c595076c..cf6d1d300 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1397,11 +1397,9 @@ class DBImpl : public DB { std::unordered_set files_to_delete_; }; - // Except in DB::Open(), WriteOptionsFile can only be called when: - // Persist options to options file. - // If need_mutex_lock = false, the method will lock DB mutex. - // If need_enter_write_thread = false, the method will enter write thread. - Status WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread); + // Persist options to options file. Must be holding options_mutex_. + // Will lock DB mutex if !db_mutex_already_held. + Status WriteOptionsFile(bool db_mutex_already_held); Status CompactRangeInternal(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 89a9f33e1..b38eddf1a 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -2124,8 +2124,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, if (s.ok()) { // Persist RocksDB Options before scheduling the compaction. // The WriteOptionsFile() will release and lock the mutex internally. - persist_options_status = impl->WriteOptionsFile( - false /*need_mutex_lock*/, false /*need_enter_write_thread*/); + persist_options_status = + impl->WriteOptionsFile(true /*db_mutex_already_held*/); *dbptr = impl; impl->opened_successfully_ = true; impl->DeleteObsoleteFiles(); From 25d4379cc856d16bcb7c2768c957062675879b7a Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Mon, 16 Oct 2023 10:21:35 -0700 Subject: [PATCH 208/386] Make rate limiter single burst bytes runtime changeable (#11923) Summary: Context/Summary: as titled Pull Request resolved: https://github.com/facebook/rocksdb/pull/11923 Test Plan: new UT Reviewed By: ajkr Differential Revision: D49941161 Pulled By: hx235 fbshipit-source-id: f75a4d07f3cdd86863ea22c57f2bcd3a621baaf3 --- include/rocksdb/rate_limiter.h | 9 ++++ .../new_features/runtime_burst_bytes.md | 1 + util/rate_limiter.cc | 49 ++++++++++++++++--- util/rate_limiter_impl.h | 7 ++- util/rate_limiter_test.cc | 29 +++++++++++ 5 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 unreleased_history/new_features/runtime_burst_bytes.md diff --git a/include/rocksdb/rate_limiter.h b/include/rocksdb/rate_limiter.h index 9cad6edf4..3515b1e95 100644 --- a/include/rocksdb/rate_limiter.h +++ b/include/rocksdb/rate_limiter.h @@ -40,6 +40,15 @@ class RateLimiter { // REQUIRED: bytes_per_second > 0 virtual void SetBytesPerSecond(int64_t bytes_per_second) = 0; + // This API allows user to dynamically change the max bytes can be granted in + // a single refill period (i.e, burst) + // + // REQUIRED: single_burst_bytes > 0. Otherwise `Status::InvalidArgument` will + // be returned. + virtual Status SetSingleBurstBytes(int64_t /* single_burst_bytes */) { + return Status::NotSupported(); + } + // Deprecated. New RateLimiter derived classes should override // Request(const int64_t, const Env::IOPriority, Statistics*) or // Request(const int64_t, const Env::IOPriority, Statistics*, OpType) diff --git a/unreleased_history/new_features/runtime_burst_bytes.md b/unreleased_history/new_features/runtime_burst_bytes.md new file mode 100644 index 000000000..26f1a8c87 --- /dev/null +++ b/unreleased_history/new_features/runtime_burst_bytes.md @@ -0,0 +1 @@ +Users can now change the max bytes granted in a single refill period (i.e, burst) during runtime by `SetSingleBurstBytes()` for RocksDB rate limiter diff --git a/util/rate_limiter.cc b/util/rate_limiter.cc index 12eef1311..e92b3bf76 100644 --- a/util/rate_limiter.cc +++ b/util/rate_limiter.cc @@ -107,6 +107,23 @@ void GenericRateLimiter::SetBytesPerSecondLocked(int64_t bytes_per_second) { std::memory_order_relaxed); } +Status GenericRateLimiter::SetSingleBurstBytes(int64_t single_burst_bytes) { + if (single_burst_bytes <= 0) { + return Status::InvalidArgument( + "`single_burst_bytes` must be greater than 0"); + } + + MutexLock g(&request_mutex_); + SetSingleBurstBytesLocked(single_burst_bytes); + return Status::OK(); +} + +void GenericRateLimiter::SetSingleBurstBytesLocked(int64_t single_burst_bytes) { + refill_bytes_per_period_.store(single_burst_bytes, std::memory_order_relaxed); + refill_period_us_.store(CalculateRefillPeriodUsLocked(single_burst_bytes), + std::memory_order_relaxed); +} + void GenericRateLimiter::Request(int64_t bytes, const Env::IOPriority pri, Statistics* stats) { assert(bytes <= refill_bytes_per_period_.load(std::memory_order_relaxed)); @@ -120,7 +137,8 @@ void GenericRateLimiter::Request(int64_t bytes, const Env::IOPriority pri, static const int kRefillsPerTune = 100; std::chrono::microseconds now(NowMicrosMonotonicLocked()); if (now - tuned_time_ >= - kRefillsPerTune * std::chrono::microseconds(refill_period_us_)) { + kRefillsPerTune * std::chrono::microseconds(refill_period_us_.load( + std::memory_order_relaxed))) { Status s = TuneLocked(); s.PermitUncheckedError(); //**TODO: What to do on error? } @@ -261,7 +279,8 @@ GenericRateLimiter::GeneratePriorityIterationOrderLocked() { void GenericRateLimiter::RefillBytesAndGrantRequestsLocked() { TEST_SYNC_POINT_CALLBACK( "GenericRateLimiter::RefillBytesAndGrantRequestsLocked", &request_mutex_); - next_refill_us_ = NowMicrosMonotonicLocked() + refill_period_us_; + next_refill_us_ = NowMicrosMonotonicLocked() + + refill_period_us_.load(std::memory_order_relaxed); // Carry over the left over quota from the last period auto refill_bytes_per_period = refill_bytes_per_period_.load(std::memory_order_relaxed); @@ -299,13 +318,28 @@ void GenericRateLimiter::RefillBytesAndGrantRequestsLocked() { int64_t GenericRateLimiter::CalculateRefillBytesPerPeriodLocked( int64_t rate_bytes_per_sec) { + int64_t refill_period_us = refill_period_us_.load(std::memory_order_relaxed); if (std::numeric_limits::max() / rate_bytes_per_sec < - refill_period_us_) { + refill_period_us) { + // Avoid unexpected result in the overflow case. The result now is still + // inaccurate but is a number that is large enough. + return std::numeric_limits::max() / kMicrosecondsPerSecond; + } else { + return rate_bytes_per_sec * refill_period_us / kMicrosecondsPerSecond; + } +} + +int64_t GenericRateLimiter::CalculateRefillPeriodUsLocked( + int64_t single_burst_bytes) { + int64_t rate_bytes_per_sec = + rate_bytes_per_sec_.load(std::memory_order_relaxed); + if (std::numeric_limits::max() / single_burst_bytes < + kMicrosecondsPerSecond) { // Avoid unexpected result in the overflow case. The result now is still // inaccurate but is a number that is large enough. - return std::numeric_limits::max() / 1000000; + return std::numeric_limits::max() / rate_bytes_per_sec; } else { - return rate_bytes_per_sec * refill_period_us_ / 1000000; + return single_burst_bytes * kMicrosecondsPerSecond / rate_bytes_per_sec; } } @@ -320,10 +354,11 @@ Status GenericRateLimiter::TuneLocked() { std::chrono::microseconds prev_tuned_time = tuned_time_; tuned_time_ = std::chrono::microseconds(NowMicrosMonotonicLocked()); + int64_t refill_period_us = refill_period_us_.load(std::memory_order_relaxed); int64_t elapsed_intervals = (tuned_time_ - prev_tuned_time + - std::chrono::microseconds(refill_period_us_) - + std::chrono::microseconds(refill_period_us) - std::chrono::microseconds(1)) / - std::chrono::microseconds(refill_period_us_); + std::chrono::microseconds(refill_period_us); // We tune every kRefillsPerTune intervals, so the overflow and division-by- // zero conditions should never happen. assert(num_drains_ <= std::numeric_limits::max() / 100); diff --git a/util/rate_limiter_impl.h b/util/rate_limiter_impl.h index 4c078f5a0..c6786b048 100644 --- a/util/rate_limiter_impl.h +++ b/util/rate_limiter_impl.h @@ -36,6 +36,8 @@ class GenericRateLimiter : public RateLimiter { // This API allows user to dynamically change rate limiter's bytes per second. virtual void SetBytesPerSecond(int64_t bytes_per_second) override; + virtual Status SetSingleBurstBytes(int64_t single_burst_bytes) override; + // Request for token to write bytes. If this request can not be satisfied, // the call is blocked. Caller is responsible to make sure // bytes <= GetSingleBurstBytes() and bytes >= 0. Negative bytes @@ -102,11 +104,14 @@ class GenericRateLimiter : public RateLimiter { } private: + static constexpr int kMicrosecondsPerSecond = 1000000; void RefillBytesAndGrantRequestsLocked(); std::vector GeneratePriorityIterationOrderLocked(); int64_t CalculateRefillBytesPerPeriodLocked(int64_t rate_bytes_per_sec); + int64_t CalculateRefillPeriodUsLocked(int64_t single_burst_bytes); Status TuneLocked(); void SetBytesPerSecondLocked(int64_t bytes_per_second); + void SetSingleBurstBytesLocked(int64_t single_burst_bytes); uint64_t NowMicrosMonotonicLocked() { return clock_->NowNanos() / std::milli::den; @@ -115,7 +120,7 @@ class GenericRateLimiter : public RateLimiter { // This mutex guard all internal states mutable port::Mutex request_mutex_; - const int64_t refill_period_us_; + std::atomic refill_period_us_; std::atomic rate_bytes_per_sec_; std::atomic refill_bytes_per_period_; diff --git a/util/rate_limiter_test.cc b/util/rate_limiter_test.cc index f31981a5c..16e7623ac 100644 --- a/util/rate_limiter_test.cc +++ b/util/rate_limiter_test.cc @@ -553,6 +553,35 @@ TEST_F(RateLimiterTest, WaitHangingBug) { } } +TEST_F(RateLimiterTest, RuntimeSingleBurstBytesChange) { + constexpr int kMicrosecondsPerSecond = 1000000; + + const int64_t kRateBytesPerSec = 400; + + const int64_t kOldSingleBurstBytes = 100; + const int64_t kOldRefillPeriodUs = + kOldSingleBurstBytes * kMicrosecondsPerSecond / kRateBytesPerSec; + const int64_t kNewSingleBurstBytes = kOldSingleBurstBytes * 2; + + SpecialEnv special_env(Env::Default(), /*time_elapse_only_sleep*/ true); + std::unique_ptr limiter(new GenericRateLimiter( + kRateBytesPerSec, kOldRefillPeriodUs, 10 /* fairness */, + RateLimiter::Mode::kWritesOnly, special_env.GetSystemClock(), + false /* auto_tuned */)); + + ASSERT_EQ(kOldSingleBurstBytes, limiter->GetSingleBurstBytes()); + + ASSERT_TRUE(limiter->SetSingleBurstBytes(0).IsInvalidArgument()); + ASSERT_OK(limiter->SetSingleBurstBytes(kNewSingleBurstBytes)); + ASSERT_EQ(kNewSingleBurstBytes, limiter->GetSingleBurstBytes()); + + // If the updated single burst bytes is not reflected in the bytes + // granting process, this request will hang forever. + limiter->Request(limiter->GetSingleBurstBytes() /* bytes */, + Env::IOPriority::IO_USER, nullptr /* stats */, + RateLimiter::OpType::kWrite); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 018eede679f6df74f78b158caf21f76c00c1bad7 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan <43301668+akankshamahajan15@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:14:58 -0700 Subject: [PATCH 209/386] Remove assertion from PrefetchAsync (#11965) Summary: Remove assertion from PrefetchAsync (roundup_len2 >= alignment) as for non direct_io, buffer size can be less than alignment resulting in assertion. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11965 Test Plan: Ran the issue causing db_stress without this assertion and the verification completes successfully. Reviewed By: anand1976 Differential Revision: D50328955 Pulled By: akankshamahajan15 fbshipit-source-id: 65f55ca230d2bbc63f4e2cc34c7273b22b515879 --- file/file_prefetch_buffer.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index f09e57033..70338d5b5 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -938,8 +938,6 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment); uint64_t roundup_len2 = roundup_end2 - rounddown_start2; - assert(roundup_len2 >= alignment); - CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, false, chunk_len2); assert(chunk_len2 == 0); From 84af7cf0bdc16a15e67bd12f9b316d8e76c87a4f Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 16 Oct 2023 17:28:36 -0700 Subject: [PATCH 210/386] Sanitize db_stress arguments when secondary_cache_uri is not empty (#11967) Summary: When `secondary_cache_uri` is non-empty and the `cache_type` is not a tiered cache, then sanitize `compressed_secondary_cache_size` to 0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11967 Test Plan: Run crash test Reviewed By: akankshamahajan15 Differential Revision: D50346157 Pulled By: anand1976 fbshipit-source-id: 57bcbad2ec81fa736f1539a0a41ed6854ded2077 --- tools/db_crashtest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 5c37b7b95..9bdb5a386 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -700,6 +700,10 @@ def finalize_and_sanitize(src_params): else: dest_params["compressed_secondary_cache_ratio"] = 0.0 dest_params["cache_type"] = dest_params["cache_type"].replace("tiered_", "") + else: + if dest_params["secondary_cache_uri"]: + dest_params["compressed_secondary_cache_size"] = 0 + dest_params["compressed_secondary_cache_ratio"] = 0.0 if dest_params["use_write_buffer_manager"]: if (dest_params["cache_size"] <= 0 or dest_params["db_write_buffer_size"] <= 0): From 2296c624fa0fd72f61eb706c56bb4fc53ddf7ce6 Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Tue, 17 Oct 2023 10:04:35 -0700 Subject: [PATCH 211/386] Perform java static checks in CI (#11221) Summary: Integrate pmd on the Java API to catch and report common Java coding problems; fix or suppress a basic set of PMD checks. Link pmd into java build / CI Add a pmd dependency to maven Add a jpmd target to Makefile which runs pmd Add a workflow to Circle CI which runs pmd Configure an initial default pmd for CI Repair lots of very simple PMD reports generated when we apply pmd-rules.xml Repair or exception for PMD rules in the CloseResource category, which finds unclosed AutoCloseable resources. We special-case the configuration of CloseResource and use the // NOPMD comment in source the avoid reports where we are the API creating an AutoCloseable, and therefore returning an unclosed resource is correct behaviour. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11221 Reviewed By: akankshamahajan15 Differential Revision: D50369930 Pulled By: jowlyzhang fbshipit-source-id: a41c36b44b3bab7644df3e9cc16afbdf33b84f6b --- .circleci/config.yml | 38 +++++ Makefile | 3 + java/Makefile | 5 + java/pmd-rules.xml | 62 +++++++ java/pom.xml.template | 51 +++++- java/spotbugs-exclude.xml | 151 ++++++++++++++++++ .../AbstractCompactionFilterFactory.java | 5 +- .../rocksdb/AbstractComparatorJniBridge.java | 3 + .../org/rocksdb/AbstractEventListener.java | 20 ++- .../org/rocksdb/AbstractMutableOptions.java | 36 +++-- .../main/java/org/rocksdb/AbstractSlice.java | 16 +- .../java/org/rocksdb/AbstractTraceWriter.java | 2 + .../java/org/rocksdb/AbstractWalFilter.java | 6 +- .../java/org/rocksdb/BackupEngineOptions.java | 10 +- .../org/rocksdb/BlockBasedTableConfig.java | 13 +- .../main/java/org/rocksdb/BloomFilter.java | 5 +- .../java/org/rocksdb/ByteBufferGetStatus.java | 1 + .../main/java/org/rocksdb/ChecksumType.java | 2 +- .../org/rocksdb/ColumnFamilyDescriptor.java | 6 +- .../java/org/rocksdb/ColumnFamilyHandle.java | 2 +- .../org/rocksdb/ColumnFamilyMetaData.java | 2 + .../java/org/rocksdb/ColumnFamilyOptions.java | 8 +- .../java/org/rocksdb/CompactionStyle.java | 4 +- .../main/java/org/rocksdb/ConfigOptions.java | 1 - java/src/main/java/org/rocksdb/DBOptions.java | 4 +- .../main/java/org/rocksdb/EncodingType.java | 2 +- java/src/main/java/org/rocksdb/Env.java | 3 +- .../main/java/org/rocksdb/EventListener.java | 6 +- .../rocksdb/HashLinkedListMemTableConfig.java | 2 +- .../rocksdb/HashSkipListMemTableConfig.java | 2 +- .../main/java/org/rocksdb/LevelMetaData.java | 1 + .../java/org/rocksdb/LiveFileMetaData.java | 2 + java/src/main/java/org/rocksdb/LogFile.java | 1 + .../src/main/java/org/rocksdb/MemoryUtil.java | 4 +- .../org/rocksdb/NativeComparatorWrapper.java | 14 +- .../java/org/rocksdb/NativeLibraryLoader.java | 107 ++++++------- .../org/rocksdb/OptimisticTransactionDB.java | 2 + .../main/java/org/rocksdb/OptionString.java | 23 +-- java/src/main/java/org/rocksdb/Options.java | 7 +- .../main/java/org/rocksdb/OptionsUtil.java | 1 + java/src/main/java/org/rocksdb/PerfLevel.java | 2 +- .../main/java/org/rocksdb/ReadOptions.java | 12 +- .../java/org/rocksdb/RocksCallbackObject.java | 2 +- java/src/main/java/org/rocksdb/RocksDB.java | 123 +++++++------- .../java/org/rocksdb/RocksDBException.java | 2 +- .../java/org/rocksdb/SstFileMetaData.java | 19 +-- .../main/java/org/rocksdb/SstFileReader.java | 1 - .../main/java/org/rocksdb/SstFileWriter.java | 3 +- .../java/org/rocksdb/StatisticsCollector.java | 1 + java/src/main/java/org/rocksdb/Status.java | 4 +- .../java/org/rocksdb/TableProperties.java | 3 + .../main/java/org/rocksdb/ThreadStatus.java | 1 + .../main/java/org/rocksdb/Transaction.java | 56 +++---- .../main/java/org/rocksdb/TransactionDB.java | 15 +- java/src/main/java/org/rocksdb/TtlDB.java | 2 + .../java/org/rocksdb/WBWIRocksIterator.java | 7 +- .../java/org/rocksdb/WriteBufferManager.java | 1 - .../org/rocksdb/util/BytewiseComparator.java | 4 +- .../java/org/rocksdb/util/Environment.java | 17 +- .../java/org/rocksdb/util/IntComparator.java | 2 +- .../util/ReverseBytewiseComparator.java | 4 +- .../test/java/org/rocksdb/DBOptionsTest.java | 2 + .../test/java/org/rocksdb/RocksDBTest.java | 53 ++++++ .../org/rocksdb/util/EnvironmentTest.java | 16 +- 64 files changed, 705 insertions(+), 280 deletions(-) create mode 100644 java/pmd-rules.xml create mode 100644 java/spotbugs-exclude.xml diff --git a/.circleci/config.yml b/.circleci/config.yml index 3e10aedde..10fe22c6f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -105,6 +105,15 @@ commands: path: /tmp/core_dumps when: on_fail + post-pmd-steps: + steps: + - store_artifacts: + path: /home/circleci/project/java/target/pmd.xml + when: on_fail + - store_artifacts: + path: /home/circleci/project/java/target/site + when: on_fail + upgrade-cmake: steps: - run: @@ -127,6 +136,13 @@ commands: command: | HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags + install-maven: + steps: + - run: + name: Install maven + command: | + sudo apt-get update -y && sudo apt-get install -y maven + setup-folly: steps: - run: @@ -589,6 +605,27 @@ jobs: command: make V=1 J=8 -j8 jtest - post-steps + build-linux-java-pmd: + machine: + image: ubuntu-2004:202111-02 + resource_class: large + environment: + JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 + steps: + - install-maven + - pre-steps + - run: + name: "Set Java Environment" + command: | + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV + which java && java -version + which javac && javac -version + - run: + name: "PMD RocksDBJava" + command: make V=1 J=8 -j8 jpmd + - post-pmd-steps + build-linux-java-static: executor: linux-docker resource_class: large @@ -853,6 +890,7 @@ workflows: - build-macos-java - build-macos-java-static - build-macos-java-static-universal + - build-linux-java-pmd jobs-macos: jobs: - build-macos diff --git a/Makefile b/Makefile index 71c96f284..ff140b1eb 100644 --- a/Makefile +++ b/Makefile @@ -2419,6 +2419,9 @@ jtest_run: jtest: rocksdbjava cd java;$(MAKE) sample test +jpmd: rocksdbjava rocksdbjavageneratepom + cd java;$(MAKE) pmd + jdb_bench: cd java;$(MAKE) db_bench; diff --git a/java/Makefile b/java/Makefile index 847b18cc4..ede740afa 100644 --- a/java/Makefile +++ b/java/Makefile @@ -267,6 +267,8 @@ JAVADOC_CMD := javadoc endif endif +MAVEN_CMD := mvn + # Look for the Java version (1.6->6, 1.7->7, 1.8->8, 11.0->11, 13.0->13, 15.0->15 etc..) JAVAC_VERSION := $(shell $(JAVAC_CMD) -version 2>&1) JAVAC_MAJOR_VERSION := $(word 2,$(subst ., ,$(JAVAC_VERSION))) @@ -455,3 +457,6 @@ run_plugin_test: db_bench: java $(AM_V_GEN)mkdir -p $(BENCHMARK_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(BENCHMARK_MAIN_CLASSES) $(BENCHMARK_MAIN_SRC)/org/rocksdb/benchmark/*.java + +pmd: + $(MAVEN_CMD) pmd:pmd pmd:cpd pmd:check diff --git a/java/pmd-rules.xml b/java/pmd-rules.xml new file mode 100644 index 000000000..b710277f1 --- /dev/null +++ b/java/pmd-rules.xml @@ -0,0 +1,62 @@ + + + + + + Custom rules for checking RocksDB + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/java/pom.xml.template b/java/pom.xml.template index 8a1981c66..9dd9c74f3 100644 --- a/java/pom.xml.template +++ b/java/pom.xml.template @@ -140,7 +140,44 @@ - + + com.github.spotbugs + spotbugs-maven-plugin + 4.7.2.1 + + spotbugs-exclude.xml + + + + + com.github.spotbugs + spotbugs + 4.7.3 + + + + + org.apache.maven.plugins + maven-pmd-plugin + 3.20.0 + + + + check + cpd-check + + + + + + + /pmd-rules.xml + + + + + + @@ -174,5 +211,15 @@ 1.10.19 test - + + + + + + org.apache.maven.plugins + maven-jxr-plugin + 3.3.0 + + + diff --git a/java/spotbugs-exclude.xml b/java/spotbugs-exclude.xml new file mode 100644 index 000000000..bc3d5ea9a --- /dev/null +++ b/java/spotbugs-exclude.xml @@ -0,0 +1,151 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java b/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java index 4bb985a34..728cda8c1 100644 --- a/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java +++ b/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java @@ -31,8 +31,9 @@ protected long initializeNative(final long... nativeParameterHandles) { * * @return native handle of the CompactionFilter */ - private long createCompactionFilter(final boolean fullCompaction, - final boolean manualCompaction) { + @SuppressWarnings({"PMD.UnusedPrivateMethod", "PMD.CloseResource"}) + private long createCompactionFilter( + final boolean fullCompaction, final boolean manualCompaction) { final T filter = createCompactionFilter( new AbstractCompactionFilter.Context(fullCompaction, manualCompaction)); diff --git a/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java b/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java index 2d1bf702b..d0ceef93d 100644 --- a/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java +++ b/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java @@ -37,6 +37,7 @@ class AbstractComparatorJniBridge { * * @return the result of the comparison */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private static int compareInternal(final AbstractComparator comparator, final ByteBuffer a, final int aLen, final ByteBuffer b, final int bLen) { if (aLen != -1) { @@ -80,6 +81,7 @@ private static int compareInternal(final AbstractComparator comparator, final By * @return either {@code startLen} if the start key is unchanged, otherwise * the new length of the start key */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private static int findShortestSeparatorInternal(final AbstractComparator comparator, final ByteBuffer start, final int startLen, final ByteBuffer limit, final int limitLen) { if (startLen != -1) { @@ -108,6 +110,7 @@ private static int findShortestSeparatorInternal(final AbstractComparator compar * * @return either keyLen if the key is unchanged, otherwise the new length of the key */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private static int findShortSuccessorInternal( final AbstractComparator comparator, final ByteBuffer key, final int keyLen) { if (keyLen != -1) { diff --git a/java/src/main/java/org/rocksdb/AbstractEventListener.java b/java/src/main/java/org/rocksdb/AbstractEventListener.java index d640d3423..c9371c45e 100644 --- a/java/src/main/java/org/rocksdb/AbstractEventListener.java +++ b/java/src/main/java/org/rocksdb/AbstractEventListener.java @@ -10,6 +10,7 @@ /** * Base class for Event Listeners. */ +@SuppressWarnings("PMD.AvoidDuplicateLiterals") public abstract class AbstractEventListener extends RocksCallbackObject implements EventListener { public enum EnabledEventCallback { ON_FLUSH_COMPLETED((byte) 0x0), @@ -58,7 +59,7 @@ byte getValue() { * @throws IllegalArgumentException if the value is unknown. */ static EnabledEventCallback fromValue(final byte value) { - for (final EnabledEventCallback enabledEventCallback : EnabledEventCallback.values()) { + for (final EnabledEventCallback enabledEventCallback : values()) { if (enabledEventCallback.value == value) { return enabledEventCallback; } @@ -124,8 +125,9 @@ public void onFlushCompleted(final RocksDB db, final FlushJobInfo flushJobInfo) * @param dbHandle native handle of the database * @param flushJobInfo the flush job info */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private void onFlushCompletedProxy(final long dbHandle, final FlushJobInfo flushJobInfo) { - final RocksDB db = new RocksDB(dbHandle); + final RocksDB db = new RocksDB(dbHandle); // NOPMD - CloseResource db.disOwnNativeHandle(); // we don't own this! onFlushCompleted(db, flushJobInfo); } @@ -142,8 +144,9 @@ public void onFlushBegin(final RocksDB db, final FlushJobInfo flushJobInfo) { * @param dbHandle native handle of the database * @param flushJobInfo the flush job info */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private void onFlushBeginProxy(final long dbHandle, final FlushJobInfo flushJobInfo) { - final RocksDB db = new RocksDB(dbHandle); + final RocksDB db = new RocksDB(dbHandle); // NOPMD - CloseResource db.disOwnNativeHandle(); // we don't own this! onFlushBegin(db, flushJobInfo); } @@ -165,9 +168,10 @@ public void onCompactionBegin(final RocksDB db, final CompactionJobInfo compacti * @param dbHandle native handle of the database * @param compactionJobInfo the flush job info */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private void onCompactionBeginProxy( final long dbHandle, final CompactionJobInfo compactionJobInfo) { - final RocksDB db = new RocksDB(dbHandle); + final RocksDB db = new RocksDB(dbHandle); // NOPMD - CloseResource db.disOwnNativeHandle(); // we don't own this! onCompactionBegin(db, compactionJobInfo); } @@ -184,9 +188,10 @@ public void onCompactionCompleted(final RocksDB db, final CompactionJobInfo comp * @param dbHandle native handle of the database * @param compactionJobInfo the flush job info */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private void onCompactionCompletedProxy( final long dbHandle, final CompactionJobInfo compactionJobInfo) { - final RocksDB db = new RocksDB(dbHandle); + final RocksDB db = new RocksDB(dbHandle); // NOPMD - CloseResource db.disOwnNativeHandle(); // we don't own this! onCompactionCompleted(db, compactionJobInfo); } @@ -225,9 +230,10 @@ public void onExternalFileIngested( * @param dbHandle native handle of the database * @param externalFileIngestionInfo the flush job info */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private void onExternalFileIngestedProxy( final long dbHandle, final ExternalFileIngestionInfo externalFileIngestionInfo) { - final RocksDB db = new RocksDB(dbHandle); + final RocksDB db = new RocksDB(dbHandle); // NOPMD - CloseResource db.disOwnNativeHandle(); // we don't own this! onExternalFileIngested(db, externalFileIngestionInfo); } @@ -245,6 +251,7 @@ public void onBackgroundError( * @param reasonByte byte value representing error reason * @param backgroundError status with error code */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private void onBackgroundErrorProxy(final byte reasonByte, final Status backgroundError) { onBackgroundError(BackgroundErrorReason.fromValue(reasonByte), backgroundError); } @@ -307,6 +314,7 @@ public boolean onErrorRecoveryBegin( * @param reasonByte byte value representing error reason * @param backgroundError status with error code */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private boolean onErrorRecoveryBeginProxy(final byte reasonByte, final Status backgroundError) { return onErrorRecoveryBegin(BackgroundErrorReason.fromValue(reasonByte), backgroundError); } diff --git a/java/src/main/java/org/rocksdb/AbstractMutableOptions.java b/java/src/main/java/org/rocksdb/AbstractMutableOptions.java index 1a6251bd4..ff9b8569f 100644 --- a/java/src/main/java/org/rocksdb/AbstractMutableOptions.java +++ b/java/src/main/java/org/rocksdb/AbstractMutableOptions.java @@ -3,12 +3,18 @@ import java.util.*; -public abstract class AbstractMutableOptions { - +/** + * This class is not strictly abstract in Java language terms, so we do not declare it as such. + * The name remains {@code AbstractMutableOptions} to reflect the underlying C++ name. + * The constructor is protected, so it will always be used as a base class. + */ +public class AbstractMutableOptions { protected static final String KEY_VALUE_PAIR_SEPARATOR = ";"; protected static final char KEY_VALUE_SEPARATOR = '='; static final String INT_ARRAY_INT_SEPARATOR = ":"; + private static final String HAS_NOT_BEEN_SET = " has not been set"; + protected final String[] keys; private final String[] values; @@ -18,15 +24,18 @@ public abstract class AbstractMutableOptions { * @param keys the keys * @param values the values */ + @SuppressWarnings("PMD.ArrayIsStoredDirectly") protected AbstractMutableOptions(final String[] keys, final String[] values) { this.keys = keys; this.values = values; } + @SuppressWarnings("PMD.MethodReturnsInternalArray") String[] getKeys() { return keys; } + @SuppressWarnings("PMD.MethodReturnsInternalArray") String[] getValues() { return values; } @@ -106,7 +115,7 @@ protected double getDouble(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); + throw new NoSuchElementException(key.name() + HAS_NOT_BEEN_SET); } return value.asDouble(); } @@ -125,7 +134,7 @@ protected long getLong(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); + throw new NoSuchElementException(key.name() + HAS_NOT_BEEN_SET); } return value.asLong(); } @@ -144,7 +153,7 @@ protected int getInt(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); + throw new NoSuchElementException(key.name() + HAS_NOT_BEEN_SET); } return value.asInt(); } @@ -163,7 +172,7 @@ protected boolean getBoolean(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); + throw new NoSuchElementException(key.name() + HAS_NOT_BEEN_SET); } return value.asBoolean(); } @@ -182,7 +191,7 @@ protected int[] getIntArray(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if(value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); + throw new NoSuchElementException(key.name() + HAS_NOT_BEEN_SET); } return value.asIntArray(); } @@ -202,7 +211,7 @@ protected > N getEnum(final K key) throws NoSuchElementException, NumberFormatException { final MutableOptionValue value = options.get(key); if (value == null) { - throw new NoSuchElementException(key.name() + " has not been set"); + throw new NoSuchElementException(key.name() + HAS_NOT_BEEN_SET); } if (!(value instanceof MutableOptionValue.MutableOptionEnumValue)) { @@ -225,7 +234,7 @@ private long parseAsLong(final String value) { } catch (final NumberFormatException nfe) { final double doubleValue = Double.parseDouble(value); if (doubleValue != Math.round(doubleValue)) - throw new IllegalArgumentException("Unable to parse or round " + value + " to long"); + throw new IllegalArgumentException("Unable to parse or round " + value + " to long", nfe); return Math.round(doubleValue); } } @@ -243,7 +252,7 @@ private int parseAsInt(final String value) { } catch (final NumberFormatException nfe) { final double doubleValue = Double.parseDouble(value); if (doubleValue != Math.round(doubleValue)) - throw new IllegalArgumentException("Unable to parse or round " + value + " to int"); + throw new IllegalArgumentException("Unable to parse or round " + value + " to int", nfe); return (int) Math.round(doubleValue); } } @@ -287,6 +296,7 @@ protected U fromParsed(final List options, final boolean ign * @return the same object, after adding options * @throws IllegalArgumentException if the key is unknown, or a value has the wrong type/form */ + @SuppressWarnings("PMD.AvoidLiteralsInIfCondition") private U fromOptionString(final OptionString.Entry option, final boolean ignoreUnknown) throws IllegalArgumentException { Objects.requireNonNull(option.key); @@ -340,12 +350,12 @@ private U fromOptionString(final OptionString.Entry option, final boolean ignore case ENUM: final String optionName = key.name(); - if (optionName.equals("prepopulate_blob_cache")) { + if ("prepopulate_blob_cache".equals(optionName)) { final PrepopulateBlobCache prepopulateBlobCache = PrepopulateBlobCache.getFromInternal(valueStr); return setEnum(key, prepopulateBlobCache); - } else if (optionName.equals("compression") - || optionName.equals("blob_compression_type")) { + } else if ("compression".equals(optionName) + || "blob_compression_type".equals(optionName)) { final CompressionType compressionType = CompressionType.getFromInternal(valueStr); return setEnum(key, compressionType); } else { diff --git a/java/src/main/java/org/rocksdb/AbstractSlice.java b/java/src/main/java/org/rocksdb/AbstractSlice.java index 0681b6758..f321b9910 100644 --- a/java/src/main/java/org/rocksdb/AbstractSlice.java +++ b/java/src/main/java/org/rocksdb/AbstractSlice.java @@ -119,14 +119,16 @@ public String toString() { */ public int compare(final AbstractSlice other) { assert (other != null); - if(!isOwningHandle()) { - return other.isOwningHandle() ? -1 : 0; + if (isOwningHandle() && other.isOwningHandle()) { + return compare0(getNativeHandle(), other.getNativeHandle()); + } + if (!isOwningHandle() && !other.isOwningHandle()) { + return 0; + } + if (isOwningHandle()) { + return 1; } else { - if(!other.isOwningHandle()) { - return 1; - } else { - return compare0(getNativeHandle(), other.getNativeHandle()); - } + return -1; } } diff --git a/java/src/main/java/org/rocksdb/AbstractTraceWriter.java b/java/src/main/java/org/rocksdb/AbstractTraceWriter.java index 13edfbd84..e235c9296 100644 --- a/java/src/main/java/org/rocksdb/AbstractTraceWriter.java +++ b/java/src/main/java/org/rocksdb/AbstractTraceWriter.java @@ -25,6 +25,7 @@ protected long initializeNative(final long... nativeParameterHandles) { * {@link Status.Code#getValue()} and the second byte is the * {@link Status.SubCode#getValue()}. */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private short writeProxy(final long sliceHandle) { try { write(new Slice(sliceHandle)); @@ -41,6 +42,7 @@ private short writeProxy(final long sliceHandle) { * {@link Status.Code#getValue()} and the second byte is the * {@link Status.SubCode#getValue()}. */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private short closeWriterProxy() { try { closeWriter(); diff --git a/java/src/main/java/org/rocksdb/AbstractWalFilter.java b/java/src/main/java/org/rocksdb/AbstractWalFilter.java index fc77eab8e..92180f90e 100644 --- a/java/src/main/java/org/rocksdb/AbstractWalFilter.java +++ b/java/src/main/java/org/rocksdb/AbstractWalFilter.java @@ -30,9 +30,9 @@ protected long initializeNative(final long... nativeParameterHandles) { * {@link WalFilter.LogRecordFoundResult#walProcessingOption} * {@link WalFilter.LogRecordFoundResult#batchChanged}. */ - private short logRecordFoundProxy(final long logNumber, - final String logFileName, final long batchHandle, - final long newBatchHandle) { + @SuppressWarnings("PMD.UnusedPrivateMethod") + private short logRecordFoundProxy(final long logNumber, final String logFileName, + final long batchHandle, final long newBatchHandle) { final LogRecordFoundResult logRecordFoundResult = logRecordFound( logNumber, logFileName, new WriteBatch(batchHandle), new WriteBatch(newBatchHandle)); diff --git a/java/src/main/java/org/rocksdb/BackupEngineOptions.java b/java/src/main/java/org/rocksdb/BackupEngineOptions.java index 2a358faac..7747b944f 100644 --- a/java/src/main/java/org/rocksdb/BackupEngineOptions.java +++ b/java/src/main/java/org/rocksdb/BackupEngineOptions.java @@ -228,10 +228,9 @@ public boolean backupLogFiles() { * * @return instance of current BackupEngineOptions. */ - public BackupEngineOptions setBackupRateLimit(long backupRateLimit) { + public BackupEngineOptions setBackupRateLimit(final long backupRateLimit) { assert(isOwningHandle()); - backupRateLimit = (backupRateLimit <= 0) ? 0 : backupRateLimit; - setBackupRateLimit(nativeHandle_, backupRateLimit); + setBackupRateLimit(nativeHandle_, (backupRateLimit <= 0) ? 0 : backupRateLimit); return this; } @@ -286,10 +285,9 @@ public RateLimiter backupRateLimiter() { * * @return instance of current BackupEngineOptions. */ - public BackupEngineOptions setRestoreRateLimit(long restoreRateLimit) { + public BackupEngineOptions setRestoreRateLimit(final long restoreRateLimit) { assert(isOwningHandle()); - restoreRateLimit = (restoreRateLimit <= 0) ? 0 : restoreRateLimit; - setRestoreRateLimit(nativeHandle_, restoreRateLimit); + setRestoreRateLimit(nativeHandle_, (restoreRateLimit <= 0) ? 0 : restoreRateLimit); return this; } diff --git a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java index ea9a766ad..c82c3ea10 100644 --- a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ b/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java @@ -11,7 +11,7 @@ */ // TODO(AR) should be renamed BlockBasedTableOptions public class BlockBasedTableConfig extends TableFormatConfig { - + @SuppressWarnings("PMD.NullAssignment") public BlockBasedTableConfig() { //TODO(AR) flushBlockPolicyFactory cacheIndexAndFilterBlocks = false; @@ -87,11 +87,12 @@ private BlockBasedTableConfig(final boolean cacheIndexAndFilterBlocks, this.enableIndexCompression = enableIndexCompression; this.blockAlign = blockAlign; this.indexShortening = IndexShorteningMode.values()[indexShortening]; - Filter filterPolicy = FilterPolicyType.values()[filterPolicyType].createFilter( - filterPolicyHandle, filterPolicyConfigValue); - if (filterPolicy != null) { - filterPolicy.disOwnNativeHandle(); - this.setFilterPolicy(filterPolicy); + try (Filter filterPolicy = FilterPolicyType.values()[filterPolicyType].createFilter( + filterPolicyHandle, filterPolicyConfigValue)) { + if (filterPolicy != null) { + filterPolicy.disOwnNativeHandle(); + this.setFilterPolicy(filterPolicy); + } } } diff --git a/java/src/main/java/org/rocksdb/BloomFilter.java b/java/src/main/java/org/rocksdb/BloomFilter.java index 0d1a5ad3b..c08966c0e 100644 --- a/java/src/main/java/org/rocksdb/BloomFilter.java +++ b/java/src/main/java/org/rocksdb/BloomFilter.java @@ -80,18 +80,19 @@ public BloomFilter(final double bitsPerKey) { * @param bitsPerKey number of bits to use * @param IGNORED_useBlockBasedMode obsolete, ignored parameter */ + @SuppressWarnings("PMD.UnusedFormalParameter") public BloomFilter(final double bitsPerKey, final boolean IGNORED_useBlockBasedMode) { this(bitsPerKey); } + @SuppressWarnings("PMD.") @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - BloomFilter that = (BloomFilter) o; - return bitsPerKey == that.bitsPerKey; + return bitsPerKey == ((BloomFilter) o).bitsPerKey; } @Override diff --git a/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java b/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java index f918a8d03..4ab9e8475 100644 --- a/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java +++ b/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java @@ -42,6 +42,7 @@ public class ByteBufferGetStatus { * * @param status the status of the request to fetch into the buffer */ + @SuppressWarnings("PMD.NullAssignment") ByteBufferGetStatus(final Status status) { this.status = status; this.requiredSize = 0; diff --git a/java/src/main/java/org/rocksdb/ChecksumType.java b/java/src/main/java/org/rocksdb/ChecksumType.java index e03fa14ba..5b3d22492 100644 --- a/java/src/main/java/org/rocksdb/ChecksumType.java +++ b/java/src/main/java/org/rocksdb/ChecksumType.java @@ -37,7 +37,7 @@ public byte getValue() { return value_; } - private ChecksumType(final byte value) { + ChecksumType(final byte value) { value_ = value; } diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java b/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java index 125a8dcf8..dd9567829 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java @@ -33,8 +33,9 @@ public ColumnFamilyDescriptor(final byte[] columnFamilyName) { * column family. * @since 3.10.0 */ - public ColumnFamilyDescriptor(final byte[] columnFamilyName, - final ColumnFamilyOptions columnFamilyOptions) { + @SuppressWarnings("PMD.ArrayIsStoredDirectly") + public ColumnFamilyDescriptor( + final byte[] columnFamilyName, final ColumnFamilyOptions columnFamilyOptions) { columnFamilyName_ = columnFamilyName; columnFamilyOptions_ = columnFamilyOptions; } @@ -45,6 +46,7 @@ public ColumnFamilyDescriptor(final byte[] columnFamilyName, * @return column family name. * @since 3.10.0 */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public byte[] getName() { return columnFamilyName_; } diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java b/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java index 32ea4b04d..9fd63e768 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java @@ -102,7 +102,7 @@ public boolean equals(final Object o) { return false; } - final ColumnFamilyHandle that = (ColumnFamilyHandle) o; + @SuppressWarnings("PMD.CloseResource") final ColumnFamilyHandle that = (ColumnFamilyHandle) o; try { return rocksDB_.nativeHandle_ == that.rocksDB_.nativeHandle_ && getID() == that.getID() && diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java b/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java index 191904017..9b6d1a70c 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java @@ -11,6 +11,7 @@ /** * The metadata that describes a column family. */ +@SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") public class ColumnFamilyMetaData { private final long size; private final long fileCount; @@ -55,6 +56,7 @@ public long fileCount() { * * @return the name */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public byte[] name() { return name; } diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java index 65a2dc5d6..607a17936 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java @@ -15,10 +15,9 @@ * As a descendant of {@link AbstractNativeReference}, this class is {@link AutoCloseable} * and will be automatically released if opened in the preamble of a try with resources block. */ -public class ColumnFamilyOptions extends RocksObject - implements ColumnFamilyOptionsInterface, - MutableColumnFamilyOptionsInterface { - +public class ColumnFamilyOptions + extends RocksObject implements ColumnFamilyOptionsInterface, + MutableColumnFamilyOptionsInterface { /** * Construct ColumnFamilyOptions. *

@@ -1013,6 +1012,7 @@ public ColumnFamilyOptions setEnableBlobFiles(final boolean enableBlobFiles) { * * @return true iff blob files are currently enabled */ + @Override public boolean enableBlobFiles() { return enableBlobFiles(nativeHandle_); } diff --git a/java/src/main/java/org/rocksdb/CompactionStyle.java b/java/src/main/java/org/rocksdb/CompactionStyle.java index 794074df6..7b955a7a2 100644 --- a/java/src/main/java/org/rocksdb/CompactionStyle.java +++ b/java/src/main/java/org/rocksdb/CompactionStyle.java @@ -5,8 +5,6 @@ package org.rocksdb; -import java.util.List; - /** * Enum CompactionStyle *

@@ -25,7 +23,7 @@ * the old data, so it's basically a TTL compaction style. *

  • NONE - Disable background compaction. * Compaction jobs are submitted - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, + * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, java.util.List, int, int, * CompactionJobInfo)} ()}.
  • * * diff --git a/java/src/main/java/org/rocksdb/ConfigOptions.java b/java/src/main/java/org/rocksdb/ConfigOptions.java index fa69b9896..b3b5423c8 100644 --- a/java/src/main/java/org/rocksdb/ConfigOptions.java +++ b/java/src/main/java/org/rocksdb/ConfigOptions.java @@ -7,7 +7,6 @@ package org.rocksdb; public class ConfigOptions extends RocksObject { - /** * Construct with default Options */ diff --git a/java/src/main/java/org/rocksdb/DBOptions.java b/java/src/main/java/org/rocksdb/DBOptions.java index 4d66b15f2..de10c0585 100644 --- a/java/src/main/java/org/rocksdb/DBOptions.java +++ b/java/src/main/java/org/rocksdb/DBOptions.java @@ -16,9 +16,7 @@ * and will be automatically released if opened in the preamble of a try with resources block. */ public class DBOptions extends RocksObject - implements DBOptionsInterface, - MutableDBOptionsInterface { - + implements DBOptionsInterface, MutableDBOptionsInterface { /** * Construct DBOptions. *

    diff --git a/java/src/main/java/org/rocksdb/EncodingType.java b/java/src/main/java/org/rocksdb/EncodingType.java index c2790c195..e93ffcc23 100644 --- a/java/src/main/java/org/rocksdb/EncodingType.java +++ b/java/src/main/java/org/rocksdb/EncodingType.java @@ -47,7 +47,7 @@ public byte getValue() { return value_; } - private EncodingType(final byte value) { + EncodingType(final byte value) { value_ = value; } diff --git a/java/src/main/java/org/rocksdb/Env.java b/java/src/main/java/org/rocksdb/Env.java index 9499cf3b6..6783d8158 100644 --- a/java/src/main/java/org/rocksdb/Env.java +++ b/java/src/main/java/org/rocksdb/Env.java @@ -26,8 +26,9 @@ public abstract class Env extends RocksObject { * * @return the default {@link org.rocksdb.RocksEnv} instance. */ + @SuppressWarnings({"PMD.CloseResource", "PMD.AssignmentInOperand"}) public static Env getDefault() { - RocksEnv defaultEnv = null; + RocksEnv defaultEnv; RocksEnv newDefaultEnv = null; while ((defaultEnv = SINGULAR_DEFAULT_ENV.get()) == null) { diff --git a/java/src/main/java/org/rocksdb/EventListener.java b/java/src/main/java/org/rocksdb/EventListener.java index 27652eaf8..a26325806 100644 --- a/java/src/main/java/org/rocksdb/EventListener.java +++ b/java/src/main/java/org/rocksdb/EventListener.java @@ -5,8 +5,6 @@ package org.rocksdb; -import java.util.List; - /** * EventListener class contains a set of callback functions that will * be called when specific RocksDB event happens such as flush. It can @@ -16,14 +14,14 @@ * Note that callback functions should not run for an extended period of * time before the function returns, otherwise RocksDB may be blocked. * For example, it is not suggested to do - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, + * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, java.util.List, int, int, * CompactionJobInfo)} (as it may run for a long while) or issue many of * {@link RocksDB#put(ColumnFamilyHandle, WriteOptions, byte[], byte[])} * (as Put may be blocked in certain cases) in the same thread in the * EventListener callback. *

    * However, doing - * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, List, int, int, + * {@link RocksDB#compactFiles(CompactionOptions, ColumnFamilyHandle, java.util.List, int, int, * CompactionJobInfo)} and {@link RocksDB#put(ColumnFamilyHandle, WriteOptions, byte[], byte[])} in * another thread is considered safe. *

    diff --git a/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java b/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java index 4bc860d1c..a9868df57 100644 --- a/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java +++ b/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java @@ -15,7 +15,7 @@ * and post a warning in the LOG. */ public class HashLinkedListMemTableConfig extends MemTableConfig { - public static final long DEFAULT_BUCKET_COUNT = 50000; + public static final long DEFAULT_BUCKET_COUNT = 50_000; public static final long DEFAULT_HUGE_PAGE_TLB_SIZE = 0; public static final int DEFAULT_BUCKET_ENTRIES_LOG_THRES = 4096; public static final boolean diff --git a/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java b/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java index 7cfa1c0df..80d6b7115 100644 --- a/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java +++ b/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java @@ -15,7 +15,7 @@ * and post a warning in the LOG. */ public class HashSkipListMemTableConfig extends MemTableConfig { - public static final int DEFAULT_BUCKET_COUNT = 1000000; + public static final int DEFAULT_BUCKET_COUNT = 1_000_000; public static final int DEFAULT_BRANCHING_FACTOR = 4; public static final int DEFAULT_HEIGHT = 4; diff --git a/java/src/main/java/org/rocksdb/LevelMetaData.java b/java/src/main/java/org/rocksdb/LevelMetaData.java index c5685098b..424bcb026 100644 --- a/java/src/main/java/org/rocksdb/LevelMetaData.java +++ b/java/src/main/java/org/rocksdb/LevelMetaData.java @@ -11,6 +11,7 @@ /** * The metadata that describes a level. */ +@SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") public class LevelMetaData { private final int level; private final long size; diff --git a/java/src/main/java/org/rocksdb/LiveFileMetaData.java b/java/src/main/java/org/rocksdb/LiveFileMetaData.java index 35d883e18..4c670e3ae 100644 --- a/java/src/main/java/org/rocksdb/LiveFileMetaData.java +++ b/java/src/main/java/org/rocksdb/LiveFileMetaData.java @@ -8,6 +8,7 @@ /** * The full set of metadata associated with each SST file. */ +@SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") public class LiveFileMetaData extends SstFileMetaData { private final byte[] columnFamilyName; private final int level; @@ -40,6 +41,7 @@ private LiveFileMetaData( * * @return the name of the column family */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public byte[] columnFamilyName() { return columnFamilyName; } diff --git a/java/src/main/java/org/rocksdb/LogFile.java b/java/src/main/java/org/rocksdb/LogFile.java index ef24a6427..5ee2c9fcc 100644 --- a/java/src/main/java/org/rocksdb/LogFile.java +++ b/java/src/main/java/org/rocksdb/LogFile.java @@ -5,6 +5,7 @@ package org.rocksdb; +@SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") public class LogFile { private final String pathName; private final long logNumber; diff --git a/java/src/main/java/org/rocksdb/MemoryUtil.java b/java/src/main/java/org/rocksdb/MemoryUtil.java index 15b9f001a..dac6d9b84 100644 --- a/java/src/main/java/org/rocksdb/MemoryUtil.java +++ b/java/src/main/java/org/rocksdb/MemoryUtil.java @@ -27,7 +27,9 @@ public class MemoryUtil { * @param caches Set of caches to collect memory usage for. * @return Map from {@link MemoryUsageType} to memory usage as a {@link Long}. */ - public static Map getApproximateMemoryUsageByType(final List dbs, final Set caches) { + @SuppressWarnings("PMD.CloseResource") + public static Map getApproximateMemoryUsageByType( + final List dbs, final Set caches) { final int dbCount = (dbs == null) ? 0 : dbs.size(); final int cacheCount = (caches == null) ? 0 : caches.size(); final long[] dbHandles = new long[dbCount]; diff --git a/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java b/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java index 5ee042a86..b270b8d36 100644 --- a/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java +++ b/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java @@ -15,6 +15,8 @@ */ public abstract class NativeComparatorWrapper extends AbstractComparator { + static final String NATIVE_CODE_IMPLEMENTATION_SHOULD_NOT_BE_CALLED = + "This should not be called. Implementation is in Native code"; @Override final ComparatorType getComparatorType() { @@ -23,26 +25,22 @@ final ComparatorType getComparatorType() { @Override public final String name() { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); + throw new IllegalStateException(NATIVE_CODE_IMPLEMENTATION_SHOULD_NOT_BE_CALLED); } @Override public final int compare(final ByteBuffer s1, final ByteBuffer s2) { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); + throw new IllegalStateException(NATIVE_CODE_IMPLEMENTATION_SHOULD_NOT_BE_CALLED); } @Override public final void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); + throw new IllegalStateException(NATIVE_CODE_IMPLEMENTATION_SHOULD_NOT_BE_CALLED); } @Override public final void findShortSuccessor(final ByteBuffer key) { - throw new IllegalStateException("This should not be called. " + - "Implementation is in Native code"); + throw new IllegalStateException(NATIVE_CODE_IMPLEMENTATION_SHOULD_NOT_BE_CALLED); } /** diff --git a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java index b97cf28b9..6fe97994d 100644 --- a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/src/main/java/org/rocksdb/NativeLibraryLoader.java @@ -16,13 +16,17 @@ public class NativeLibraryLoader { private static final NativeLibraryLoader instance = new NativeLibraryLoader(); private static boolean initialized = false; - private static final String sharedLibraryName = Environment.getSharedLibraryName("rocksdb"); - private static final String jniLibraryName = Environment.getJniLibraryName("rocksdb"); + private static final String ROCKSDB_LIBRARY_NAME = "rocksdb"; + + private static final String sharedLibraryName = + Environment.getSharedLibraryName(ROCKSDB_LIBRARY_NAME); + private static final String jniLibraryName = Environment.getJniLibraryName(ROCKSDB_LIBRARY_NAME); private static final /* @Nullable */ String fallbackJniLibraryName = - Environment.getFallbackJniLibraryName("rocksdb"); - private static final String jniLibraryFileName = Environment.getJniLibraryFileName("rocksdb"); + Environment.getFallbackJniLibraryName(ROCKSDB_LIBRARY_NAME); + private static final String jniLibraryFileName = + Environment.getJniLibraryFileName(ROCKSDB_LIBRARY_NAME); private static final /* @Nullable */ String fallbackJniLibraryFileName = - Environment.getFallbackJniLibraryFileName("rocksdb"); + Environment.getFallbackJniLibraryFileName(ROCKSDB_LIBRARY_NAME); private static final String tempFilePrefix = "librocksdbjni"; private static final String tempFileSuffix = Environment.getJniLibraryExtension(); @@ -51,6 +55,7 @@ public static NativeLibraryLoader getInstance() { * * @throws java.io.IOException if a filesystem operation fails. */ + @SuppressWarnings("PMD.EmptyCatchBlock") public synchronized void loadLibrary(final String tmpDir) throws IOException { try { // try dynamic library @@ -104,64 +109,58 @@ void loadLibraryFromJar(final String tmpDir) } } - File loadLibraryFromJarToTemp(final String tmpDir) - throws IOException { - InputStream is = null; - try { - // attempt to look up the static library in the jar file - String libraryFileName = jniLibraryFileName; - is = getClass().getClassLoader().getResourceAsStream(libraryFileName); - - if (is == null) { - // is there a fallback we can try - if (fallbackJniLibraryFileName == null) { - throw new RuntimeException(libraryFileName + " was not found inside JAR."); - } - - // attempt to look up the fallback static library in the jar file - libraryFileName = fallbackJniLibraryFileName; - is = getClass().getClassLoader().getResourceAsStream(libraryFileName); - if (is == null) { - throw new RuntimeException(libraryFileName + " was not found inside JAR."); - } + private File createTemp(final String tmpDir, final String libraryFileName) throws IOException { + // create a temporary file to copy the library to + final File temp; + if (tmpDir == null || tmpDir.isEmpty()) { + temp = File.createTempFile(tempFilePrefix, tempFileSuffix); + } else { + final File parentDir = new File(tmpDir); + if (!parentDir.exists()) { + throw new RuntimeException( + "Directory: " + parentDir.getAbsolutePath() + " does not exist!"); } - - // create a temporary file to copy the library to - final File temp; - if (tmpDir == null || tmpDir.isEmpty()) { - temp = File.createTempFile(tempFilePrefix, tempFileSuffix); - } else { - final File parentDir = new File(tmpDir); - if (!parentDir.exists()) { - throw new RuntimeException( - "Directory: " + parentDir.getAbsolutePath() + " does not exist!"); - } - temp = new File(parentDir, libraryFileName); - if (temp.exists() && !temp.delete()) { - throw new RuntimeException( - "File: " + temp.getAbsolutePath() + " already exists and cannot be removed."); - } - if (!temp.createNewFile()) { - throw new RuntimeException("File: " + temp.getAbsolutePath() + " could not be created."); - } + temp = new File(parentDir, libraryFileName); + if (temp.exists() && !temp.delete()) { + throw new RuntimeException( + "File: " + temp.getAbsolutePath() + " already exists and cannot be removed."); } - if (!temp.exists()) { - throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); - } else { - temp.deleteOnExit(); + if (!temp.createNewFile()) { + throw new RuntimeException("File: " + temp.getAbsolutePath() + " could not be created."); } + } + if (temp.exists()) { + temp.deleteOnExit(); + return temp; + } else { + throw new RuntimeException("File " + temp.getAbsolutePath() + " does not exist."); + } + } - // copy the library from the Jar file to the temp destination - Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + @SuppressWarnings({"PMD.UseProperClassLoader", "PMD.UseTryWithResources"}) + File loadLibraryFromJarToTemp(final String tmpDir) throws IOException { + try (InputStream is = getClass().getClassLoader().getResourceAsStream(jniLibraryFileName)) { + if (is != null) { + final File temp = createTemp(tmpDir, jniLibraryFileName); + Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + return temp; + } + } - // return the temporary library file - return temp; + if (fallbackJniLibraryFileName == null) { + throw new RuntimeException(fallbackJniLibraryFileName + " was not found inside JAR."); + } - } finally { + try (InputStream is = + getClass().getClassLoader().getResourceAsStream(fallbackJniLibraryFileName)) { if (is != null) { - is.close(); + final File temp = createTemp(tmpDir, fallbackJniLibraryFileName); + Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + return temp; } } + + throw new RuntimeException(jniLibraryFileName + " was not found inside JAR."); } /** diff --git a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java b/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java index ac3cdc210..80d3c720b 100644 --- a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java +++ b/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java @@ -107,6 +107,7 @@ public static OptimisticTransactionDB open(final DBOptions dbOptions, * * @throws RocksDBException if an error occurs whilst closing. */ + @Override public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { @@ -128,6 +129,7 @@ public void closeE() throws RocksDBException { *

    * See also {@link #close()}. */ + @SuppressWarnings("PMD.EmptyCatchBlock") @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { diff --git a/java/src/main/java/org/rocksdb/OptionString.java b/java/src/main/java/org/rocksdb/OptionString.java index 61d2a94fe..bcbf1d152 100644 --- a/java/src/main/java/org/rocksdb/OptionString.java +++ b/java/src/main/java/org/rocksdb/OptionString.java @@ -9,6 +9,7 @@ import java.util.List; import java.util.Objects; +@SuppressWarnings("PMD.AvoidStringBufferField") public class OptionString { private static final char kvPairSeparator = ';'; private static final char kvSeparator = '='; @@ -39,6 +40,7 @@ public static Value fromComplex(final List complex) { return new Value(null, complex); } + @Override public String toString() { final StringBuilder sb = new StringBuilder(); if (isList()) { @@ -68,6 +70,7 @@ private Entry(final String key, final Value value) { this.value = value; } + @Override public String toString() { return "" + key + "=" + value; } @@ -75,6 +78,8 @@ public String toString() { static class Parser { static class Exception extends RuntimeException { + private static final long serialVersionUID = 752283782841276408L; + public Exception(final String s) { super(s); } @@ -122,7 +127,7 @@ private boolean hasNext() { return (sb.length() > 0); } - private boolean is(final char c) { + private boolean isChar(final char c) { return (sb.length() > 0 && sb.charAt(0) == c); } @@ -151,10 +156,10 @@ private String parseKey() { } private String parseSimpleValue() { - if (is(wrappedValueBegin)) { + if (isChar(wrappedValueBegin)) { next(); final String result = parseSimpleValue(); - if (!is(wrappedValueEnd)) { + if (!isChar(wrappedValueEnd)) { exception("Expected to end a wrapped value with " + wrappedValueEnd); } next(); @@ -172,7 +177,7 @@ private List parseList() { final List list = new ArrayList<>(1); while (true) { list.add(parseSimpleValue()); - if (!is(arrayValueSeparator)) + if (!isChar(arrayValueSeparator)) break; next(); @@ -188,7 +193,7 @@ private Entry parseOption() { } final String key = parseKey(); skipWhite(); - if (is(kvSeparator)) { + if (isChar(kvSeparator)) { next(); } else { exception("Expected = separating key and value"); @@ -200,12 +205,12 @@ private Entry parseOption() { private Value parseValue() { skipWhite(); - if (is(complexValueBegin)) { + if (isChar(complexValueBegin)) { next(); skipWhite(); final Value value = Value.fromComplex(parseComplex()); skipWhite(); - if (is(complexValueEnd)) { + if (isChar(complexValueEnd)) { next(); skipWhite(); } else { @@ -214,7 +219,7 @@ private Value parseValue() { return value; } else if (isValueChar()) { return Value.fromList(parseList()); - } else if (is(kvPairSeparator)) { + } else if (isChar(kvPairSeparator)) { // e.g. empty vector embedded in a struct option looks like // struct_opt = {vector_opt=;...} final List entries = new ArrayList<>(); @@ -232,7 +237,7 @@ private List parseComplex() { if (hasNext()) { entries.add(parseOption()); skipWhite(); - while (is(kvPairSeparator)) { + while (isChar(kvPairSeparator)) { next(); skipWhite(); if (!isKeyChar()) { diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/rocksdb/Options.java index 7ba0f8b93..29f5e8e0d 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/rocksdb/Options.java @@ -16,11 +16,8 @@ * and will be automatically released if opened in the preamble of a try with resources block. */ public class Options extends RocksObject - implements DBOptionsInterface, - MutableDBOptionsInterface, - ColumnFamilyOptionsInterface, - MutableColumnFamilyOptionsInterface { - + implements DBOptionsInterface, MutableDBOptionsInterface, + ColumnFamilyOptionsInterface, MutableColumnFamilyOptionsInterface { /** * Converts the input properties into a Options-style formatted string * @param properties The set of properties to convert diff --git a/java/src/main/java/org/rocksdb/OptionsUtil.java b/java/src/main/java/org/rocksdb/OptionsUtil.java index e242cae9f..4168921f2 100644 --- a/java/src/main/java/org/rocksdb/OptionsUtil.java +++ b/java/src/main/java/org/rocksdb/OptionsUtil.java @@ -89,6 +89,7 @@ public static String getLatestOptionsFileName(final String dbPath, final Env env private static void loadTableFormatConfig(final List cfDescs) { for (final ColumnFamilyDescriptor columnFamilyDescriptor : cfDescs) { + @SuppressWarnings("PMD.CloseResource") final ColumnFamilyOptions columnFamilyOptions = columnFamilyDescriptor.getOptions(); columnFamilyOptions.setFetchedTableFormatConfig( readTableFormatConfig(columnFamilyOptions.nativeHandle_)); diff --git a/java/src/main/java/org/rocksdb/PerfLevel.java b/java/src/main/java/org/rocksdb/PerfLevel.java index a5b452640..332e6d7d9 100644 --- a/java/src/main/java/org/rocksdb/PerfLevel.java +++ b/java/src/main/java/org/rocksdb/PerfLevel.java @@ -39,7 +39,7 @@ public enum PerfLevel { */ @Deprecated OUT_OF_BOUNDS((byte) 6); - private PerfLevel(byte _value) { + PerfLevel(byte _value) { this._value = _value; } diff --git a/java/src/main/java/org/rocksdb/ReadOptions.java b/java/src/main/java/org/rocksdb/ReadOptions.java index 65b781d16..481101fc9 100644 --- a/java/src/main/java/org/rocksdb/ReadOptions.java +++ b/java/src/main/java/org/rocksdb/ReadOptions.java @@ -573,14 +573,14 @@ public ReadOptions setAutoPrefixMode(final boolean mode) { * @see #iterStartTs() * @return Reference to timestamp or null if there is no timestamp defined. */ + @SuppressWarnings("PMD.ConfusingTernary") public Slice timestamp() { assert (isOwningHandle()); final long timestampSliceHandle = timestamp(nativeHandle_); - if (timestampSliceHandle != 0) { - return new Slice(timestampSliceHandle); - } else { + if (timestampSliceHandle == 0) { return null; } + return new Slice(timestampSliceHandle); } /** @@ -623,14 +623,14 @@ public ReadOptions setTimestamp(final AbstractSlice timestamp) { * @return Reference to lower bound timestamp or null if there is no lower bound timestamp * defined. */ + @SuppressWarnings("PMD.ConfusingTernary") public Slice iterStartTs() { assert (isOwningHandle()); final long iterStartTsHandle = iterStartTs(nativeHandle_); - if (iterStartTsHandle != 0) { - return new Slice(iterStartTsHandle); - } else { + if (iterStartTsHandle == 0) { return null; } + return new Slice(iterStartTsHandle); } /** diff --git a/java/src/main/java/org/rocksdb/RocksCallbackObject.java b/java/src/main/java/org/rocksdb/RocksCallbackObject.java index 2b9de4b8e..2c4547b12 100644 --- a/java/src/main/java/org/rocksdb/RocksCallbackObject.java +++ b/java/src/main/java/org/rocksdb/RocksCallbackObject.java @@ -39,7 +39,7 @@ protected RocksCallbackObject(final long... nativeParameterHandles) { static /* @Nullable */ long[] toNativeHandleList( /* @Nullable */ final List objectList) { if (objectList == null) { - return null; + return new long[0]; } final int len = objectList.size(); final long[] handleList = new long[len]; diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 7863790c1..a23821a92 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -31,6 +31,14 @@ private enum LibraryState { private static final AtomicReference libraryLoaded = new AtomicReference<>(LibraryState.NOT_LOADED); + + static { + RocksDB.loadLibrary(); + } + + static final String PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD = + "Performance optimization for a very specific workload"; + private final List ownedColumnFamilyHandles = new ArrayList<>(); /** @@ -40,6 +48,7 @@ private enum LibraryState { * java.io.tmpdir, however, you can override this temporary location by * setting the environment variable ROCKSDB_SHAREDLIB_DIR. */ + @SuppressWarnings("PMD.EmptyCatchBlock") public static void loadLibrary() { if (libraryLoaded.get() == LibraryState.LOADED) { return; @@ -89,6 +98,7 @@ public static void loadLibrary() { * @param paths a list of strings where each describes a directory * of a library. */ + @SuppressWarnings("PMD.EmptyCatchBlock") public static void loadLibrary(final List paths) { if (libraryLoaded.get() == LibraryState.LOADED) { return; @@ -171,9 +181,10 @@ protected RocksDB(final long nativeHandle) { */ public static RocksDB open(final String path) throws RocksDBException { RocksDB.loadLibrary(); - final Options options = new Options(); - options.setCreateIfMissing(true); - return open(options, path); + try (Options options = new Options()) { + options.setCreateIfMissing(true); + return open(options, path); + } } /** @@ -209,8 +220,9 @@ public static RocksDB open(final String path, final List columnFamilyDescriptors, final List columnFamilyHandles) throws RocksDBException { - final DBOptions options = new DBOptions(); - return open(options, path, columnFamilyDescriptors, columnFamilyHandles); + try (DBOptions options = new DBOptions()) { + return open(options, path, columnFamilyDescriptors, columnFamilyHandles); + } } /** @@ -303,7 +315,8 @@ public static RocksDB open(final DBOptions options, final String path, db.storeOptionsInstance(options); for (int i = 1; i < handles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(db, handles[i]); + final ColumnFamilyHandle columnFamilyHandle = // NOPMD - CloseResource + new ColumnFamilyHandle(db, handles[i]); columnFamilyHandles.add(columnFamilyHandle); } @@ -329,8 +342,9 @@ public static RocksDB openReadOnly(final String path) RocksDB.loadLibrary(); // This allows to use the rocksjni default Options instead of // the c++ one. - final Options options = new Options(); - return openReadOnly(options, path); + try (Options options = new Options()) { + return openReadOnly(options, path); + } } /** @@ -405,8 +419,9 @@ public static RocksDB openReadOnly(final String path, throws RocksDBException { // This allows to use the rocksjni default Options instead of // the c++ one. - final DBOptions options = new DBOptions(); - return openReadOnly(options, path, columnFamilyDescriptors, columnFamilyHandles, false); + try (DBOptions options = new DBOptions()) { + return openReadOnly(options, path, columnFamilyDescriptors, columnFamilyHandles, false); + } } /** @@ -484,7 +499,8 @@ public static RocksDB openReadOnly(final DBOptions options, final String path, db.storeOptionsInstance(options); for (int i = 1; i < handles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(db, handles[i]); + final ColumnFamilyHandle columnFamilyHandle = // NOPMD - CloseResource + new ColumnFamilyHandle(db, handles[i]); columnFamilyHandles.add(columnFamilyHandle); } @@ -580,7 +596,8 @@ public static RocksDB openAsSecondary(final DBOptions options, final String path db.storeOptionsInstance(options); for (int i = 1; i < handles.length; i++) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(db, handles[i]); + final ColumnFamilyHandle columnFamilyHandle = // NOPMD - CloseResource + new ColumnFamilyHandle(db, handles[i]); columnFamilyHandles.add(columnFamilyHandle); } @@ -603,7 +620,8 @@ public static RocksDB openAsSecondary(final DBOptions options, final String path * @throws RocksDBException if an error occurs whilst closing. */ public void closeE() throws RocksDBException { - for (final ColumnFamilyHandle columnFamilyHandle : ownedColumnFamilyHandles) { + for (final ColumnFamilyHandle columnFamilyHandle : // NOPMD - CloseResource + ownedColumnFamilyHandles) { columnFamilyHandle.close(); } ownedColumnFamilyHandles.clear(); @@ -628,9 +646,11 @@ public void closeE() throws RocksDBException { *

    * See also {@link #close()}. */ + @SuppressWarnings("PMD.EmptyCatchBlock") @Override public void close() { - for (final ColumnFamilyHandle columnFamilyHandle : ownedColumnFamilyHandles) { + for (final ColumnFamilyHandle columnFamilyHandle : // NOPMD - CloseResource + ownedColumnFamilyHandles) { columnFamilyHandle.close(); } ownedColumnFamilyHandles.clear(); @@ -706,7 +726,7 @@ public List createColumnFamilies( final List columnFamilyHandles = new ArrayList<>(cfHandles.length); for (final long cfHandle : cfHandles) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, cfHandle); + final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, cfHandle); // NOPMD columnFamilyHandles.add(columnFamilyHandle); } ownedColumnFamilyHandles.addAll(columnFamilyHandles); @@ -739,7 +759,7 @@ public List createColumnFamilies( final List columnFamilyHandles = new ArrayList<>(cfHandles.length); for (final long cfHandle : cfHandles) { - final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, cfHandle); + final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, cfHandle); // NOPMD columnFamilyHandles.add(columnFamilyHandle); } ownedColumnFamilyHandles.addAll(columnFamilyHandles); @@ -783,7 +803,7 @@ public void dropColumnFamilies( */ public void destroyColumnFamilyHandle(final ColumnFamilyHandle columnFamilyHandle) { for (int i = 0; i < ownedColumnFamilyHandles.size(); ++i) { - final ColumnFamilyHandle ownedHandle = ownedColumnFamilyHandles.get(i); + final ColumnFamilyHandle ownedHandle = ownedColumnFamilyHandles.get(i); // NOPMD if (ownedHandle.equals(columnFamilyHandle)) { columnFamilyHandle.close(); ownedColumnFamilyHandles.remove(i); @@ -1273,7 +1293,7 @@ public int get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions op * @throws RocksDBException thrown if error happens in underlying * native library. */ - @Experimental("Performance optimization for a very specific workload") + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) public void singleDelete(final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, key, key.length); } @@ -1300,9 +1320,9 @@ public void singleDelete(final byte[] key) throws RocksDBException { * @throws RocksDBException thrown if error happens in underlying * native library. */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) + throws RocksDBException { singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } @@ -1331,9 +1351,8 @@ public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, * @throws RocksDBException thrown if error happens in underlying * native library. */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final WriteOptions writeOpt, final byte[] key) - throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final WriteOptions writeOpt, final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, writeOpt.nativeHandle_, key, key.length); } @@ -1362,9 +1381,9 @@ public void singleDelete(final WriteOptions writeOpt, final byte[] key) * @throws RocksDBException thrown if error happens in underlying * native library. */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final WriteOptions writeOpt, final byte[] key) throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpt, + final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, writeOpt.nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_); } @@ -2131,7 +2150,7 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, */ public List multiGetAsList(final List keys) throws RocksDBException { - assert(keys.size() != 0); + assert (!keys.isEmpty()); final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); final int[] keyOffsets = new int[keysArray.length]; @@ -2167,7 +2186,7 @@ public List multiGetAsList( final List columnFamilyHandleList, final List keys) throws RocksDBException, IllegalArgumentException { - assert(keys.size() != 0); + assert (!keys.isEmpty()); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size() != columnFamilyHandleList.size()) { @@ -2204,7 +2223,7 @@ public List multiGetAsList( */ public List multiGetAsList(final ReadOptions opt, final List keys) throws RocksDBException { - assert(keys.size() != 0); + assert (!keys.isEmpty()); final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); final int[] keyOffsets = new int[keysArray.length]; @@ -2240,7 +2259,7 @@ public List multiGetAsList(final ReadOptions opt, public List multiGetAsList(final ReadOptions opt, final List columnFamilyHandleList, final List keys) throws RocksDBException { - assert(keys.size() != 0); + assert (!keys.isEmpty()); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size()!=columnFamilyHandleList.size()){ @@ -2277,10 +2296,11 @@ public List multiGetAsList(final ReadOptions opt, */ public List multiGetByteBuffers( final List keys, final List values) throws RocksDBException { - final ReadOptions readOptions = new ReadOptions(); - final List columnFamilyHandleList = new ArrayList<>(1); - columnFamilyHandleList.add(getDefaultColumnFamily()); - return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); + try (ReadOptions readOptions = new ReadOptions()) { + final List columnFamilyHandleList = new ArrayList<>(1); + columnFamilyHandleList.add(getDefaultColumnFamily()); + return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); + } } /** @@ -2320,8 +2340,9 @@ public List multiGetByteBuffers(final ReadOptions readOptio public List multiGetByteBuffers( final List columnFamilyHandleList, final List keys, final List values) throws RocksDBException { - final ReadOptions readOptions = new ReadOptions(); - return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); + try (ReadOptions readOptions = new ReadOptions()) { + return multiGetByteBuffers(readOptions, columnFamilyHandleList, keys, values); + } } /** @@ -2344,7 +2365,7 @@ public List multiGetByteBuffers( public List multiGetByteBuffers(final ReadOptions readOptions, final List columnFamilyHandleList, final List keys, final List values) throws RocksDBException { - assert (keys.size() != 0); + assert (!keys.isEmpty()); // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. @@ -3769,7 +3790,7 @@ public Env getEnv() { */ public void flush(final FlushOptions flushOptions) throws RocksDBException { - flush(flushOptions, (List) null); + flush(flushOptions, Collections.singletonList(getDefaultColumnFamily())); } /** @@ -4295,10 +4316,9 @@ public void tryCatchUpWithPrimary() throws RocksDBException { * @throws RocksDBException thrown if error happens in underlying * native library. */ - public void deleteFilesInRanges(final ColumnFamilyHandle columnFamily, - final List ranges, final boolean includeEnd) - throws RocksDBException { - if (ranges.size() == 0) { + public void deleteFilesInRanges(final ColumnFamilyHandle columnFamily, final List ranges, + final boolean includeEnd) throws RocksDBException { + if (ranges.isEmpty()) { return; } if ((ranges.size() % 2) != 0) { @@ -4330,7 +4350,7 @@ public static void destroyDB(final String path, final Options options) private /* @Nullable */ long[] toNativeHandleList( /* @Nullable */ final List objectList) { if (objectList == null) { - return null; + return new long[0]; } final int len = objectList.size(); final long[] handleList = new long[len]; @@ -4340,6 +4360,7 @@ public static void destroyDB(final String path, final Options options) return handleList; } + @SuppressWarnings({"PMD.ForLoopVariableCount", "PMD.AvoidReassigningLoopVariables"}) private static long[] toRangeSliceHandles(final List ranges) { final long[] rangeSliceHandles = new long[ranges.size() * 2]; for (int i = 0, j = 0; i < ranges.size(); i++) { @@ -4360,12 +4381,6 @@ private static void checkBounds(final int offset, final int len, final int size) } } - private static int computeCapacityHint(final int estimatedNumberOfItems) { - // Default load factor for HashMap is 0.75, so N * 1.5 will be at the load - // limit. We add +1 for a buffer. - return (int)Math.ceil(estimatedNumberOfItems * 1.5 + 1.0); - } - // native methods private static native long open(final long optionsHandle, final String path) throws RocksDBException; @@ -4711,12 +4726,10 @@ public String toString() { return getMajor() + "." + getMinor() + "." + getPatch(); } - private static Version fromEncodedVersion(int encodedVersion) { + private static Version fromEncodedVersion(final int encodedVersion) { final byte patch = (byte) (encodedVersion & 0xff); - encodedVersion >>= 8; - final byte minor = (byte) (encodedVersion & 0xff); - encodedVersion >>= 8; - final byte major = (byte) (encodedVersion & 0xff); + final byte minor = (byte) (encodedVersion >> 8 & 0xff); + final byte major = (byte) (encodedVersion >> 16 & 0xff); return new Version(major, minor, patch); } diff --git a/java/src/main/java/org/rocksdb/RocksDBException.java b/java/src/main/java/org/rocksdb/RocksDBException.java index 8b035f458..9df411d12 100644 --- a/java/src/main/java/org/rocksdb/RocksDBException.java +++ b/java/src/main/java/org/rocksdb/RocksDBException.java @@ -10,7 +10,7 @@ * type is used to describe an internal error from the c++ rocksdb library. */ public class RocksDBException extends Exception { - + private static final long serialVersionUID = -5187634878466267120L; /* @Nullable */ private final Status status; /** diff --git a/java/src/main/java/org/rocksdb/SstFileMetaData.java b/java/src/main/java/org/rocksdb/SstFileMetaData.java index a04d05cb5..88ea8152a 100644 --- a/java/src/main/java/org/rocksdb/SstFileMetaData.java +++ b/java/src/main/java/org/rocksdb/SstFileMetaData.java @@ -36,18 +36,11 @@ public class SstFileMetaData { * @param numEntries the number of entries * @param numDeletions the number of deletions */ - protected SstFileMetaData( - final String fileName, - final String path, - final long size, - final long smallestSeqno, - final long largestSeqno, - final byte[] smallestKey, - final byte[] largestKey, - final long numReadsSampled, - final boolean beingCompacted, - final long numEntries, - final long numDeletions) { + @SuppressWarnings("PMD.ArrayIsStoredDirectly") + protected SstFileMetaData(final String fileName, final String path, final long size, + final long smallestSeqno, final long largestSeqno, final byte[] smallestKey, + final byte[] largestKey, final long numReadsSampled, final boolean beingCompacted, + final long numEntries, final long numDeletions) { this.fileName = fileName; this.path = path; this.size = size; @@ -111,6 +104,7 @@ public long largestSeqno() { * * @return the smallest user defined key */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public byte[] smallestKey() { return smallestKey; } @@ -120,6 +114,7 @@ public byte[] smallestKey() { * * @return the largest user defined key */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public byte[] largestKey() { return largestKey; } diff --git a/java/src/main/java/org/rocksdb/SstFileReader.java b/java/src/main/java/org/rocksdb/SstFileReader.java index d7d5b400c..939d39375 100644 --- a/java/src/main/java/org/rocksdb/SstFileReader.java +++ b/java/src/main/java/org/rocksdb/SstFileReader.java @@ -6,7 +6,6 @@ package org.rocksdb; public class SstFileReader extends RocksObject { - public SstFileReader(final Options options) { super(newSstFileReader(options.nativeHandle_)); } diff --git a/java/src/main/java/org/rocksdb/SstFileWriter.java b/java/src/main/java/org/rocksdb/SstFileWriter.java index 985dc619a..d5766bffb 100644 --- a/java/src/main/java/org/rocksdb/SstFileWriter.java +++ b/java/src/main/java/org/rocksdb/SstFileWriter.java @@ -13,7 +13,6 @@ * sequence number = 0. */ public class SstFileWriter extends RocksObject { - /** * SstFileWriter Constructor. * @@ -196,6 +195,8 @@ public long fileSize() throws RocksDBException { return fileSize(nativeHandle_); } + @SuppressWarnings("PMD.UnusedPrivateMethod") + // (AP) Should we expose a constructor wrapping this ? private static native long newSstFileWriter(final long envOptionsHandle, final long optionsHandle, final long userComparatorHandle, final byte comparatorType); diff --git a/java/src/main/java/org/rocksdb/StatisticsCollector.java b/java/src/main/java/org/rocksdb/StatisticsCollector.java index fd00f85b2..dd0d98fe5 100644 --- a/java/src/main/java/org/rocksdb/StatisticsCollector.java +++ b/java/src/main/java/org/rocksdb/StatisticsCollector.java @@ -61,6 +61,7 @@ public void shutDown(final int shutdownTimeout) throws InterruptedException { _executorService.awaitTermination(shutdownTimeout, TimeUnit.MILLISECONDS); } + @SuppressWarnings("PMD.CloseResource") private Runnable collectStatistics() { return () -> { while (_isRunning) { diff --git a/java/src/main/java/org/rocksdb/Status.java b/java/src/main/java/org/rocksdb/Status.java index 5c50e700f..5f751f422 100644 --- a/java/src/main/java/org/rocksdb/Status.java +++ b/java/src/main/java/org/rocksdb/Status.java @@ -5,6 +5,7 @@ package org.rocksdb; +import java.io.Serializable; import java.util.Objects; /** @@ -13,7 +14,8 @@ * Currently only used with {@link RocksDBException} when the * status is not {@link Code#Ok} */ -public class Status { +public class Status implements Serializable { + private static final long serialVersionUID = -3794191127754280439L; private final Code code; /* @Nullable */ private final SubCode subCode; /* @Nullable */ private final String state; diff --git a/java/src/main/java/org/rocksdb/TableProperties.java b/java/src/main/java/org/rocksdb/TableProperties.java index 02b95608e..7fb1bcc77 100644 --- a/java/src/main/java/org/rocksdb/TableProperties.java +++ b/java/src/main/java/org/rocksdb/TableProperties.java @@ -46,6 +46,7 @@ public class TableProperties { * Access is package private as this will only be constructed from * C++ via JNI and for testing. */ + @SuppressWarnings("PMD.ArrayIsStoredDirectly") TableProperties(final long dataSize, final long indexSize, final long indexPartitions, final long topLevelIndexSize, final long indexKeyIsUserKey, final long indexValueIsDeltaEncoded, final long filterSize, final long rawKeySize, @@ -116,6 +117,7 @@ public long getIndexSize() { * * @return the total number of index partitions. */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public long getIndexPartitions() { return indexPartitions; } @@ -299,6 +301,7 @@ public long getFastCompressionEstimatedDataSize() { * @return the name of the column family, or null if the * column family is unknown. */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") /*@Nullable*/ public byte[] getColumnFamilyName() { return columnFamilyName; } diff --git a/java/src/main/java/org/rocksdb/ThreadStatus.java b/java/src/main/java/org/rocksdb/ThreadStatus.java index 38e7fad9c..4211453d1 100644 --- a/java/src/main/java/org/rocksdb/ThreadStatus.java +++ b/java/src/main/java/org/rocksdb/ThreadStatus.java @@ -118,6 +118,7 @@ public OperationStage getOperationStage() { * * @return the properties */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public long[] getOperationProperties() { return operationProperties; } diff --git a/java/src/main/java/org/rocksdb/Transaction.java b/java/src/main/java/org/rocksdb/Transaction.java index 7d61a208e..8ab968a3c 100644 --- a/java/src/main/java/org/rocksdb/Transaction.java +++ b/java/src/main/java/org/rocksdb/Transaction.java @@ -5,6 +5,8 @@ package org.rocksdb; +import static org.rocksdb.RocksDB.PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -26,6 +28,8 @@ * examples. */ public class Transaction extends RocksObject { + private static final String FOR_EACH_KEY_THERE_MUST_BE_A_COLUMNFAMILYHANDLE = + "For each key there must be a ColumnFamilyHandle."; private final RocksDB parent; @@ -345,8 +349,7 @@ public byte[][] multiGet(final ReadOptions readOptions, // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.length != columnFamilyHandles.size()) { - throw new IllegalArgumentException( - "For each key there must be a ColumnFamilyHandle."); + throw new IllegalArgumentException(FOR_EACH_KEY_THERE_MUST_BE_A_COLUMNFAMILYHANDLE); } if(keys.length == 0) { return new byte[0][0]; @@ -396,9 +399,9 @@ public List multiGetAsList(final ReadOptions readOptions, // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size() != columnFamilyHandles.size()) { - throw new IllegalArgumentException("For each key there must be a ColumnFamilyHandle."); + throw new IllegalArgumentException(FOR_EACH_KEY_THERE_MUST_BE_A_COLUMNFAMILYHANDLE); } - if (keys.size() == 0) { + if (keys.isEmpty()) { return new ArrayList<>(0); } final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); @@ -474,7 +477,7 @@ public byte[][] multiGet(final ReadOptions readOptions, final byte[][] keys) */ public List multiGetAsList(final ReadOptions readOptions, final List keys) throws RocksDBException { - if (keys.size() == 0) { + if (keys.isEmpty()) { return new ArrayList<>(0); } final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); @@ -637,8 +640,7 @@ public byte[][] multiGetForUpdate(final ReadOptions readOptions, // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.length != columnFamilyHandles.size()){ - throw new IllegalArgumentException( - "For each key there must be a ColumnFamilyHandle."); + throw new IllegalArgumentException(FOR_EACH_KEY_THERE_MUST_BE_A_COLUMNFAMILYHANDLE); } if(keys.length == 0) { return new byte[0][0]; @@ -673,9 +675,9 @@ public List multiGetForUpdateAsList(final ReadOptions readOptions, // Check if key size equals cfList size. If not a exception must be // thrown. If not a Segmentation fault happens. if (keys.size() != columnFamilyHandles.size()) { - throw new IllegalArgumentException("For each key there must be a ColumnFamilyHandle."); + throw new IllegalArgumentException(FOR_EACH_KEY_THERE_MUST_BE_A_COLUMNFAMILYHANDLE); } - if (keys.size() == 0) { + if (keys.isEmpty()) { return new ArrayList<>(); } final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); @@ -727,7 +729,7 @@ public byte[][] multiGetForUpdate(final ReadOptions readOptions, final byte[][] public List multiGetForUpdateAsList( final ReadOptions readOptions, final List keys) throws RocksDBException { assert (isOwningHandle()); - if (keys.size() == 0) { + if (keys.isEmpty()) { return new ArrayList<>(0); } @@ -1227,9 +1229,9 @@ public void delete(final byte[][] keyParts) throws RocksDBException { * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key, final boolean assumeTracked) throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, + final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, assumeTracked); @@ -1259,9 +1261,9 @@ public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[] key) throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) + throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, key, key.length, columnFamilyHandle.nativeHandle_, false); @@ -1288,7 +1290,7 @@ public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ - @Experimental("Performance optimization for a very specific workload") + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) public void singleDelete(final byte[] key) throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, key, key.length); @@ -1311,10 +1313,9 @@ public void singleDelete(final byte[] key) throws RocksDBException { * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts, final boolean assumeTracked) - throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts, + final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); singleDelete(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_, assumeTracked); @@ -1333,9 +1334,9 @@ public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ - @Experimental("Performance optimization for a very specific workload") - public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, - final byte[][] keyParts) throws RocksDBException { + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) + public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte[][] keyParts) + throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, keyParts, keyParts.length, columnFamilyHandle.nativeHandle_, false); @@ -1352,7 +1353,7 @@ public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, * @throws RocksDBException when one of the TransactionalDB conditions * described above occurs, or in the case of an unexpected error */ - @Experimental("Performance optimization for a very specific workload") + @Experimental(PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD) public void singleDelete(final byte[][] keyParts) throws RocksDBException { assert(isOwningHandle()); singleDelete(nativeHandle_, keyParts, keyParts.length); @@ -1980,9 +1981,9 @@ public static TransactionState getTransactionState(final byte value) { * * @return The waiting transactions */ + @SuppressWarnings("PMD.UnusedPrivateMethod") private WaitingTransactions newWaitingTransactions( - final long columnFamilyId, final String key, - final long[] transactionIds) { + final long columnFamilyId, final String key, final long[] transactionIds) { return new WaitingTransactions(columnFamilyId, key, transactionIds); } @@ -2021,6 +2022,7 @@ public String getKey() { * * @return The IDs of the waiting transactions */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public long[] getTransactionIds() { return transactionIds; } diff --git a/java/src/main/java/org/rocksdb/TransactionDB.java b/java/src/main/java/org/rocksdb/TransactionDB.java index 105f4eff0..134a0c8a1 100644 --- a/java/src/main/java/org/rocksdb/TransactionDB.java +++ b/java/src/main/java/org/rocksdb/TransactionDB.java @@ -14,8 +14,8 @@ */ public class TransactionDB extends RocksDB implements TransactionalDB { - - private TransactionDBOptions transactionDbOptions_; + // Field is "used" to prevent GC of the + @SuppressWarnings("PMD.UnusedPrivateField") private TransactionDBOptions transactionDbOptions_; /** * Private constructor. @@ -116,6 +116,7 @@ public static TransactionDB open(final DBOptions dbOptions, * * @throws RocksDBException if an error occurs whilst closing. */ + @Override public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { @@ -137,6 +138,7 @@ public void closeE() throws RocksDBException { *

    * See also {@link #close()}. */ + @SuppressWarnings("PMD.EmptyCatchBlock") @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { @@ -218,7 +220,7 @@ public List getAllPreparedTransactions() { final List txns = new ArrayList<>(); for(final long jtxnHandle : jtxnHandles) { - final Transaction txn = new Transaction(this, jtxnHandle); + final Transaction txn = new Transaction(this, jtxnHandle); // NOPMD - CloseResource // this instance doesn't own the underlying C++ object txn.disOwnNativeHandle(); @@ -233,6 +235,7 @@ public static class KeyLockInfo { private final long[] transactionIDs; private final boolean exclusive; + @SuppressWarnings("PMD.ArrayIsStoredDirectly") public KeyLockInfo(final String key, final long[] transactionIDs, final boolean exclusive) { this.key = key; this.transactionIDs = transactionIDs; @@ -253,6 +256,7 @@ public String getKey() { * * @return the Transaction IDs. */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") public long[] getTransactionIDs() { return transactionIDs; } @@ -287,8 +291,8 @@ public Map getLockStatusData() { * * @return The waiting transactions */ - private DeadlockInfo newDeadlockInfo( - final long transactionID, final long columnFamilyId, + @SuppressWarnings("PMD.UnusedPrivateMethod") + private DeadlockInfo newDeadlockInfo(final long transactionID, final long columnFamilyId, final String waitingKey, final boolean exclusive) { return new DeadlockInfo(transactionID, columnFamilyId, waitingKey, exclusive); @@ -349,6 +353,7 @@ public static class DeadlockPath { final DeadlockInfo[] path; final boolean limitExceeded; + @SuppressWarnings("PMD.ArrayIsStoredDirectly") public DeadlockPath(final DeadlockInfo[] path, final boolean limitExceeded) { this.path = path; this.limitExceeded = limitExceeded; diff --git a/java/src/main/java/org/rocksdb/TtlDB.java b/java/src/main/java/org/rocksdb/TtlDB.java index 2bb0c4333..9a90ba358 100644 --- a/java/src/main/java/org/rocksdb/TtlDB.java +++ b/java/src/main/java/org/rocksdb/TtlDB.java @@ -154,6 +154,7 @@ public static TtlDB open(final DBOptions options, final String db_path, * * @throws RocksDBException if an error occurs whilst closing. */ + @Override public void closeE() throws RocksDBException { if (owningHandle_.compareAndSet(true, false)) { try { @@ -175,6 +176,7 @@ public void closeE() throws RocksDBException { *

    * See also {@link #close()}. */ + @SuppressWarnings("PMD.EmptyCatchBlock") @Override public void close() { if (owningHandle_.compareAndSet(true, false)) { diff --git a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java b/java/src/main/java/org/rocksdb/WBWIRocksIterator.java index e0b99b1b5..25d6e6f9d 100644 --- a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java +++ b/java/src/main/java/org/rocksdb/WBWIRocksIterator.java @@ -159,10 +159,10 @@ public DirectSlice getKey() { * no value */ public DirectSlice getValue() { - if(!value.isOwningHandle()) { - return null; //TODO(AR) migrate to JDK8 java.util.Optional#empty() - } else { + if (value.isOwningHandle()) { return value; + } else { + return null; // TODO(AR) migrate to JDK8 java.util.Optional#empty() } } @@ -178,6 +178,7 @@ public int hashCode() { return (key == null) ? 0 : key.hashCode(); } + @SuppressWarnings("PMD.CloseResource") @Override public boolean equals(final Object other) { if(other == null) { diff --git a/java/src/main/java/org/rocksdb/WriteBufferManager.java b/java/src/main/java/org/rocksdb/WriteBufferManager.java index 9c5645880..40176aba4 100644 --- a/java/src/main/java/org/rocksdb/WriteBufferManager.java +++ b/java/src/main/java/org/rocksdb/WriteBufferManager.java @@ -9,7 +9,6 @@ * Java wrapper over native write_buffer_manager class */ public class WriteBufferManager extends RocksObject { - /** * Construct a new instance of WriteBufferManager. *

    diff --git a/java/src/main/java/org/rocksdb/util/BytewiseComparator.java b/java/src/main/java/org/rocksdb/util/BytewiseComparator.java index 9561b0a31..202241d3b 100644 --- a/java/src/main/java/org/rocksdb/util/BytewiseComparator.java +++ b/java/src/main/java/org/rocksdb/util/BytewiseComparator.java @@ -52,9 +52,9 @@ static int _compare(final ByteBuffer a, final ByteBuffer b) { return r; } + @SuppressWarnings("PMD.EmptyControlStatement") @Override - public void findShortestSeparator(final ByteBuffer start, - final ByteBuffer limit) { + public void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { // Find length of common prefix final int minLength = Math.min(start.remaining(), limit.remaining()); int diffIndex = 0; diff --git a/java/src/main/java/org/rocksdb/util/Environment.java b/java/src/main/java/org/rocksdb/util/Environment.java index 9ad51c7c7..53ff65d26 100644 --- a/java/src/main/java/org/rocksdb/util/Environment.java +++ b/java/src/main/java/org/rocksdb/util/Environment.java @@ -3,12 +3,20 @@ import java.io.File; import java.io.IOException; +import java.util.Locale; public class Environment { - private static String OS = System.getProperty("os.name").toLowerCase(); - private static String ARCH = System.getProperty("os.arch").toLowerCase(); + @SuppressWarnings("FieldMayBeFinal") + private static String OS = System.getProperty("os.name").toLowerCase(Locale.getDefault()); + @SuppressWarnings("FieldMayBeFinal") + private static String ARCH = System.getProperty("os.arch").toLowerCase(Locale.getDefault()); + @SuppressWarnings("FieldMayBeFinal") private static String MUSL_ENVIRONMENT = System.getenv("ROCKSDB_MUSL_LIBC"); + private static final String LIBC_MUSL_PREFIX = "libc.musl"; + + private static final String SPARCV9 = "sparcv9"; + /** * Will be lazily initialised by {@link #isMuslLibc()} instead of the previous static * initialisation. The lazy initialisation prevents Windows from reporting suspicious behaviour of @@ -70,6 +78,7 @@ public static boolean isMuslLibc() { * * @return true if the environment has a musl libc, false otherwise. */ + @SuppressWarnings("PMD.EmptyCatchBlock") static boolean initIsMuslLibc() { // consider explicit user setting from environment first if ("true".equalsIgnoreCase(MUSL_ENVIRONMENT)) { @@ -114,7 +123,7 @@ static boolean initIsMuslLibc() { return false; } for (final File f : libFiles) { - if (f.getName().startsWith("libc.musl")) { + if (f.getName().startsWith(LIBC_MUSL_PREFIX)) { return true; } } @@ -132,7 +141,7 @@ public static boolean isOpenBSD() { } public static boolean is64Bit() { - if (ARCH.indexOf("sparcv9") >= 0) { + if (ARCH.contains(SPARCV9)) { return true; } return (ARCH.indexOf("64") > 0); diff --git a/java/src/main/java/org/rocksdb/util/IntComparator.java b/java/src/main/java/org/rocksdb/util/IntComparator.java index cc096cd14..2caf0c601 100644 --- a/java/src/main/java/org/rocksdb/util/IntComparator.java +++ b/java/src/main/java/org/rocksdb/util/IntComparator.java @@ -48,7 +48,7 @@ public int compare(final ByteBuffer a, final ByteBuffer b) { * * @return negative if a < b, 0 if a == b, positive otherwise */ - private final int compareIntKeys(final ByteBuffer a, final ByteBuffer b) { + private int compareIntKeys(final ByteBuffer a, final ByteBuffer b) { final int iA = a.getInt(); final int iB = b.getInt(); diff --git a/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java b/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java index 4c06f80aa..3d3c42941 100644 --- a/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java +++ b/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java @@ -38,9 +38,9 @@ public int compare(final ByteBuffer a, final ByteBuffer b) { return -BytewiseComparator._compare(a, b); } + @SuppressWarnings("PMD.EmptyControlStatement") @Override - public void findShortestSeparator(final ByteBuffer start, - final ByteBuffer limit) { + public void findShortestSeparator(final ByteBuffer start, final ByteBuffer limit) { // Find length of common prefix final int minLength = Math.min(start.remaining(), limit.remaining()); int diffIndex = 0; diff --git a/java/src/test/java/org/rocksdb/DBOptionsTest.java b/java/src/test/java/org/rocksdb/DBOptionsTest.java index d79f78db7..cb7eabcfb 100644 --- a/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ b/java/src/test/java/org/rocksdb/DBOptionsTest.java @@ -886,6 +886,8 @@ public void onMemTableSealed(final MemTableInfo memTableInfo) { wasCalled2.set(true); } }) { + assertThat(options.setListeners(null)).isEqualTo(options); + assertThat(options.listeners().size()).isEqualTo(0); assertThat(options.setListeners(Arrays.asList(el1, el2))).isEqualTo(options); final List listeners = options.listeners(); assertEquals(el1, listeners.get(0)); diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 3f6ebc71e..fa1d1bd05 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1346,6 +1346,25 @@ public void enableAutoCompaction() throws RocksDBException { } } + @Test + public void enableAutoCompactionNull() throws RocksDBException { + try (final DBOptions options = new DBOptions().setCreateIfMissing(true)) { + final List cfDescs = + Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); + final List cfHandles = new ArrayList<>(); + final String dbPath = dbFolder.getRoot().getAbsolutePath(); + try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { + try { + db.enableAutoCompaction(null); + } finally { + for (final ColumnFamilyHandle cfHandle : cfHandles) { + cfHandle.close(); + } + } + } + } + } + @Test public void numberLevels() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true)) { @@ -1579,9 +1598,43 @@ public void suggestCompactRange() throws RocksDBException { db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); db.put(cfHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); db.put(cfHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); + try { + final Range range = db.suggestCompactRange(); + assertThat(range).isNotNull(); + } finally { + for (final ColumnFamilyHandle cfHandle : cfHandles) { + cfHandle.close(); + } + } + } + } + } + + @Test + public void suggestCompactRangeCF() throws RocksDBException { + try (final DBOptions options = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true)) { + final List cfDescs = + Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), + new ColumnFamilyDescriptor("new_cf".getBytes(), new ColumnFamilyOptions()), + new ColumnFamilyDescriptor("new_cf2".getBytes(), new ColumnFamilyOptions())); + + final List cfHandles = new ArrayList<>(); + final String dbPath = dbFolder.getRoot().getAbsolutePath(); + try (final RocksDB db = RocksDB.open(options, dbPath, cfDescs, cfHandles)) { + db.put(cfHandles.get(0), "key1".getBytes(UTF_8), "value1".getBytes(UTF_8)); + db.put(cfHandles.get(0), "key2".getBytes(UTF_8), "value2".getBytes(UTF_8)); + db.put(cfHandles.get(0), "key3".getBytes(UTF_8), "value3".getBytes(UTF_8)); + db.put(cfHandles.get(1), "key1_new_cf".getBytes(UTF_8), "value1".getBytes(UTF_8)); + db.put(cfHandles.get(1), "key2_new_cf".getBytes(UTF_8), "value2".getBytes(UTF_8)); + db.put(cfHandles.get(1), "key3_new_cf".getBytes(UTF_8), "value3".getBytes(UTF_8)); try { final Range range = db.suggestCompactRange(cfHandles.get(0)); assertThat(range).isNotNull(); + final Range rangeCF = db.suggestCompactRange(cfHandles.get(1)); + assertThat(rangeCF).isNotNull(); + final Range rangeCFEmpty = db.suggestCompactRange(cfHandles.get(2)); + assertThat(rangeCFEmpty).isNotNull(); } finally { for (final ColumnFamilyHandle cfHandle : cfHandles) { cfHandle.close(); diff --git a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java index ae340e06d..5e5369217 100644 --- a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java +++ b/java/src/test/java/org/rocksdb/util/EnvironmentTest.java @@ -5,7 +5,6 @@ package org.rocksdb.util; import static org.assertj.core.api.Assertions.assertThat; -import static org.hamcrest.Matchers.is; import java.lang.reflect.Field; import org.junit.AfterClass; @@ -13,11 +12,11 @@ import org.junit.Test; public class EnvironmentTest { - private final static String ARCH_FIELD_NAME = "ARCH"; - private final static String OS_FIELD_NAME = "OS"; + private static final String ARCH_FIELD_NAME = "ARCH"; + private static final String OS_FIELD_NAME = "OS"; - private final static String MUSL_ENVIRONMENT_FIELD_NAME = "MUSL_ENVIRONMENT"; - private final static String MUSL_LIBC_FIELD_NAME = "MUSL_LIBC"; + private static final String MUSL_ENVIRONMENT_FIELD_NAME = "MUSL_ENVIRONMENT"; + private static final String MUSL_LIBC_FIELD_NAME = "MUSL_LIBC"; private static String INITIAL_OS; private static String INITIAL_ARCH; @@ -255,8 +254,7 @@ public void resolveIsMuslLibc() { assertThat(Environment.initIsMuslLibc()).isFalse(); } - private void setEnvironmentClassFields(String osName, - String osArch) { + private void setEnvironmentClassFields(final String osName, final String osArch) { setEnvironmentClassField(OS_FIELD_NAME, osName); setEnvironmentClassField(ARCH_FIELD_NAME, osArch); } @@ -270,7 +268,7 @@ public static void restoreState() { } @SuppressWarnings("unchecked") - private static T getEnvironmentClassField(String fieldName) { + private static T getEnvironmentClassField(final String fieldName) { final Field field; try { field = Environment.class.getDeclaredField(fieldName); @@ -286,7 +284,7 @@ private static T getEnvironmentClassField(String fieldName) { } } - private static void setEnvironmentClassField(String fieldName, Object value) { + private static void setEnvironmentClassField(final String fieldName, final Object value) { final Field field; try { field = Environment.class.getDeclaredField(fieldName); From 9135a61ec6a7969733836732519eadfc0b5dc50a Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Tue, 17 Oct 2023 12:21:08 -0700 Subject: [PATCH 212/386] Fix corruption error in stress test for auto_readahead_size enabled (#11961) Summary: Fix corruption error - "Corruption: first key in index doesn't match first key in block". when auto_readahead_size is enabled. Error is because of bug when index_iter_ moves forward, first_internal_key of that index_iter_ is not copied. So the Slice points to a different key resulting in wrong comparison when doing comparison. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11961 Test Plan: Ran stress test which reproduced this error. Reviewed By: anand1976 Differential Revision: D50310589 Pulled By: akankshamahajan15 fbshipit-source-id: 95d8320b8388f1e3822c32024f84754f3a20a631 --- .../block_based/block_based_table_iterator.cc | 20 +++++++++++-------- .../block_based/block_based_table_iterator.h | 14 +++++++++++-- .../bug_fixes/auto_readahead.md | 1 + 3 files changed, 25 insertions(+), 10 deletions(-) create mode 100644 unreleased_history/bug_fixes/auto_readahead.md diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index dfd7d1471..8107e58f2 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -318,7 +318,7 @@ void BlockBasedTableIterator::InitDataBlock() { bool use_block_cache_for_lookup = true; if (DoesContainBlockHandles()) { - data_block_handle = block_handles_.front().index_val_.handle; + data_block_handle = block_handles_.front().handle_; is_in_cache = block_handles_.front().is_cache_hit_; use_block_cache_for_lookup = false; } else { @@ -483,15 +483,15 @@ bool BlockBasedTableIterator::MaterializeCurrentBlock() { // handles placed in blockhandle. So index_ will be pointing to current block. // After InitDataBlock, index_iter_ can point to different block if // BlockCacheLookupForReadAheadSize is called. - IndexValue index_val; + Slice first_internal_key; if (DoesContainBlockHandles()) { - index_val = block_handles_.front().index_val_; + first_internal_key = block_handles_.front().first_internal_key_; } else { - index_val = index_iter_->value(); + first_internal_key = index_iter_->value().first_internal_key; } if (!block_iter_.Valid() || - icomp_.Compare(block_iter_.key(), index_val.first_internal_key) != 0) { + icomp_.Compare(block_iter_.key(), first_internal_key) != 0) { block_iter_.Invalidate(Status::Corruption( "first key in index doesn't match first key in block")); return false; @@ -701,7 +701,9 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( // Add the current block to block_handles_. { BlockHandleInfo block_handle_info; - block_handle_info.index_val_ = index_iter_->value(); + block_handle_info.handle_ = index_iter_->value().handle; + block_handle_info.SetFirstInternalKey( + index_iter_->value().first_internal_key); block_handles_.emplace_back(std::move(block_handle_info)); } @@ -726,7 +728,9 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( // For current data block, do the lookup in the cache. Lookup should pin the // data block and add the placeholder for cache. BlockHandleInfo block_handle_info; - block_handle_info.index_val_ = index_iter_->value(); + block_handle_info.handle_ = index_iter_->value().handle; + block_handle_info.SetFirstInternalKey( + index_iter_->value().first_internal_key); Status s = table_->LookupAndPinBlocksInCache( read_options_, block_handle, @@ -758,7 +762,7 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( // update the readahead_size. for (auto it = block_handles_.rbegin(); it != block_handles_.rend() && (*it).is_cache_hit_ == true; ++it) { - current_readahead_size -= (*it).index_val_.handle.size(); + current_readahead_size -= (*it).handle_.size(); current_readahead_size -= footer; } updated_readahead_size = current_readahead_size; diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index 7c1c09cb9..7ed7e3375 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -250,11 +250,21 @@ class BlockBasedTableIterator : public InternalIteratorBase { // BlockHandleInfo is used to store the info needed when block cache lookup // ahead is enabled to tune readahead_size. struct BlockHandleInfo { - BlockHandleInfo() {} + void SetFirstInternalKey(const Slice& key) { + if (key.empty()) { + return; + } + size_t size = key.size(); + buf_ = std::unique_ptr(new char[size]); + memcpy(buf_.get(), key.data(), size); + first_internal_key_ = Slice(buf_.get(), size); + } - IndexValue index_val_; + BlockHandle handle_; bool is_cache_hit_ = false; CachableEntry cachable_entry_; + Slice first_internal_key_; + std::unique_ptr buf_; }; bool IsIndexAtCurr() const { return is_index_at_curr_block_; } diff --git a/unreleased_history/bug_fixes/auto_readahead.md b/unreleased_history/bug_fixes/auto_readahead.md new file mode 100644 index 000000000..a6b009fde --- /dev/null +++ b/unreleased_history/bug_fixes/auto_readahead.md @@ -0,0 +1 @@ +Fix a bug in auto_readahead_size where first_internal_key of index blocks wasn't copied properly resulting in corruption error when first_internal_key was used for comparison. From 933ee295f4e2bb9a3c970d964e82a473e88bdd0c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 17 Oct 2023 13:18:04 -0700 Subject: [PATCH 213/386] Fix a race condition between recovery and backup (#11955) Summary: A race condition between recovery and backup can happen with error messages like this: ```Failure in BackupEngine::CreateNewBackup with: IO error: No such file or directory: While opening a file for sequentially reading: /dev/shm/rocksdb_test/rocksdb_crashtest_whitebox/002653.log: No such file or directory``` PR https://github.com/facebook/rocksdb/issues/6949 introduced disabling file deletion during error handling of manifest IO errors. Aformentioned race condition is caused by this chain of event: [Backup engine] disable file deletion [Recovery] disable file deletion <= this is optional for the race condition, it may or may not get called [Backup engine] get list of file to copy/link [Recovery] force enable file deletion .... some files refered by backup engine get deleted [Backup engine] copy/link file <= error no file found This PR fixes this with: 1) Recovery thread is currently forcing enabling file deletion as long as file deletion is disabled. Regardless of whether the previous error handling is for manifest IO error and that disabled it in the first place. This means it could incorrectly enabling file deletions intended by other threads like backup threads, file snapshotting threads. This PR does this check explicitly before making the call. 2) `disable_delete_obsolete_files_` is designed as a counter to allow different threads to enable and disable file deletion separately. The recovery thread currently does a force enable file deletion, because `ErrorHandler::SetBGError()` can be called multiple times by different threads when they receive a manifest IO error(details per PR https://github.com/facebook/rocksdb/issues/6949), resulting in `DBImpl::DisableFileDeletions` to be called multiple times too. Making a force enable file deletion call that resets the counter `disable_delete_obsolete_files_` to zero is a workaround for this. However, as it shows in the race condition, it can incorrectly suppress other threads like a backup thread's intention to keep the file deletion disabled. This PR adds a `std::atomic disable_file_deletion_count_` to the error handler to track the needed counter decrease more precisely. This PR tracks and caps file deletion enabling/disabling in error handler. 3) for recovery, the section to find obsolete files and purge them was moved to be done after the attempt to enable file deletion. The actual finding and purging is more likely to happen if file deletion was previously disabled and get re-enabled now. An internal function `DBImpl::EnableFileDeletionsWithLock` was added to support change 2) and 3). Some useful logging was explicitly added to keep those log messages around. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11955 Test Plan: existing unit tests Reviewed By: anand1976 Differential Revision: D50290592 Pulled By: jowlyzhang fbshipit-source-id: 73aa8331ca4d636955a5b0324b1e104a26e00c9b --- db/db_impl/db_impl.cc | 41 ++++++++++--------------------------- db/db_impl/db_impl.h | 4 ++++ db/db_impl/db_impl_files.cc | 8 ++++++++ db/error_handler.cc | 18 ++++++++++++++-- db/error_handler.h | 7 ++++++- 5 files changed, 45 insertions(+), 33 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 88880996e..ab3a79e99 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -354,7 +354,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { } // Make sure the IO Status stored in version set is set to OK. - bool file_deletion_disabled = !IsFileDeletionsEnabled(); if (s.ok()) { IOStatus io_s = versions_->io_status(); if (io_s.IsIOError()) { @@ -363,7 +362,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { // clean-up phase MANIFEST writing. We must have also disabled file // deletions. assert(!versions_->descriptor_log_); - assert(file_deletion_disabled); + assert(!IsFileDeletionsEnabled()); // Since we are trying to recover from MANIFEST write error, we need to // switch to a new MANIFEST anyway. The old MANIFEST can be corrupted. // Therefore, force writing a dummy version edit because we do not know @@ -406,34 +405,6 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { } } - JobContext job_context(0); - FindObsoleteFiles(&job_context, true); - mutex_.Unlock(); - - job_context.manifest_file_number = 1; - if (job_context.HaveSomethingToDelete()) { - PurgeObsoleteFiles(job_context); - } - job_context.Clean(); - - if (s.ok()) { - assert(versions_->io_status().ok()); - // If we reach here, we should re-enable file deletions if it was disabled - // during previous error handling. - if (file_deletion_disabled) { - // Always return ok - s = EnableFileDeletions(/*force=*/true); - if (!s.ok()) { - ROCKS_LOG_INFO( - immutable_db_options_.info_log, - "DB resume requested but could not enable file deletions [%s]", - s.ToString().c_str()); - assert(false); - } - } - } - - mutex_.Lock(); if (s.ok()) { // This will notify and unblock threads waiting for error recovery to // finish. Those previouly waiting threads can now proceed, which may @@ -446,6 +417,15 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { error_handler_.GetRecoveryError().PermitUncheckedError(); } + JobContext job_context(0); + FindObsoleteFiles(&job_context, true); + mutex_.Unlock(); + job_context.manifest_file_number = 1; + if (job_context.HaveSomethingToDelete()) { + PurgeObsoleteFiles(job_context); + } + job_context.Clean(); + if (s.ok()) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Successfully resumed DB"); } else { @@ -453,6 +433,7 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { s.ToString().c_str()); } + mutex_.Lock(); // Check for shutdown again before scheduling further compactions, // since we released and re-acquired the lock above if (shutdown_initiated_) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index cf6d1d300..10e8af97e 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2373,6 +2373,10 @@ class DBImpl : public DB { Status DisableFileDeletionsWithLock(); + // Safely decrease `disable_delete_obsolete_files_` by one while holding lock + // and return its remaning value. + int EnableFileDeletionsWithLock(); + Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, std::string ts_low); diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 9e2fbb73d..8f72e17f9 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -100,6 +100,14 @@ Status DBImpl::EnableFileDeletions(bool force) { return Status::OK(); } +int DBImpl::EnableFileDeletionsWithLock() { + mutex_.AssertHeld(); + // In case others have called EnableFileDeletions(true /* force */) in between + disable_delete_obsolete_files_ = + std::max(0, disable_delete_obsolete_files_ - 1); + return disable_delete_obsolete_files_; +} + bool DBImpl::IsFileDeletionsEnabled() const { return 0 == disable_delete_obsolete_files_; } diff --git a/db/error_handler.cc b/db/error_handler.cc index c7dd4750f..95b9a0fe6 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -396,11 +396,13 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s", bg_io_err.ToString().c_str()); - if (BackgroundErrorReason::kManifestWrite == reason || - BackgroundErrorReason::kManifestWriteNoWAL == reason) { + if (!recovery_disabled_file_deletion_ && + (BackgroundErrorReason::kManifestWrite == reason || + BackgroundErrorReason::kManifestWriteNoWAL == reason)) { // Always returns ok ROCKS_LOG_INFO(db_options_.info_log, "Disabling File Deletions"); db_->DisableFileDeletionsWithLock().PermitUncheckedError(); + recovery_disabled_file_deletion_ = true; } Status new_bg_io_err = bg_io_err; @@ -560,6 +562,18 @@ Status ErrorHandler::ClearBGError() { recovery_error_.PermitUncheckedError(); recovery_in_prog_ = false; soft_error_no_bg_work_ = false; + if (recovery_disabled_file_deletion_) { + recovery_disabled_file_deletion_ = false; + int remain_counter = db_->EnableFileDeletionsWithLock(); + if (remain_counter == 0) { + ROCKS_LOG_INFO(db_options_.info_log, "File Deletions Enabled"); + } else { + ROCKS_LOG_WARN( + db_options_.info_log, + "File Deletions Enable, but not really enabled. Counter: %d", + remain_counter); + } + } EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error, bg_error_, db_mutex_); } diff --git a/db/error_handler.h b/db/error_handler.h index f444a8f8d..ace28962d 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -42,7 +42,8 @@ class ErrorHandler { recovery_in_prog_(false), soft_error_no_bg_work_(false), is_db_stopped_(false), - bg_error_stats_(db_options.statistics) { + bg_error_stats_(db_options.statistics), + recovery_disabled_file_deletion_(false) { // Clear the checked flag for uninitialized errors bg_error_.PermitUncheckedError(); recovery_error_.PermitUncheckedError(); @@ -108,6 +109,10 @@ class ErrorHandler { // The pointer of DB statistics. std::shared_ptr bg_error_stats_; + // Tracks whether the recovery has disabled file deletion. This boolean flag + // is updated while holding db mutex. + bool recovery_disabled_file_deletion_; + const Status& HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason); Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery); From 42266939ab71a958f3112368a30c7d8c6bcd485c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 17 Oct 2023 15:25:40 -0700 Subject: [PATCH 214/386] Remove documentation that marks user-defined timestamps feature as experimental (#11974) Summary: As titled. The most notable place that marks the feature as experimental is its wiki page. That was updated. And this PR removes the experimental marker from a few places for this feature. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11974 Reviewed By: ltamasi Differential Revision: D50383640 Pulled By: jowlyzhang fbshipit-source-id: 0bfe26ceda0793515f54b602cf3cd13d0737ec25 --- include/rocksdb/write_batch.h | 2 -- util/comparator.cc | 1 - 2 files changed, 3 deletions(-) diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index 307cd7559..6752d9931 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -395,8 +395,6 @@ class WriteBatch : public WriteBatchBase { // Returns true if MarkRollback will be called during Iterate bool HasRollback() const; - // Experimental. - // // Update timestamps of existing entries in the write batch if // applicable. If a key is intended for a column family that disables // timestamp, then this API won't set the timestamp for this key. diff --git a/util/comparator.cc b/util/comparator.cc index f1f249fd3..98ecef9d2 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -231,7 +231,6 @@ class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl { } }; -// EXPERIMENTAL // Comparator with 64-bit integer timestamp. // We did not performance test this yet. template From d5bc30befa33473aec035d7b51143d2af534aeec Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Wed, 18 Oct 2023 09:38:38 -0700 Subject: [PATCH 215/386] Enforce status checking after Valid() returns false for IteratorWrapper (#11975) Summary: ... when compiled with ASSERT_STATUS_CHECKED = 1. The main change is in iterator_wrapper.h. The remaining changes are just fixing existing unit tests. Adding this check to IteratorWrapper gives a good coverage as the class is used in many places, including child iterators under merging iterator, merging iterator under DB iter, file_iter under level iterator, etc. This change can catch the bug fixed in https://github.com/facebook/rocksdb/issues/11782. Future follow up: enable `ASSERT_STATUS_CHECKED=1` for stress test and for DEBUG_LEVEL=0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11975 Test Plan: * `ASSERT_STATUS_CHECKED=1 DEBUG_LEVEL=2 make -j32 J=32 check` * I tried to run stress test with `ASSERT_STATUS_CHECKED=1`, but there are a lot of existing stress code that ignore status checking, and fail without the change in this PR. So defer that to a follow up task. Reviewed By: ajkr Differential Revision: D50383790 Pulled By: cbi42 fbshipit-source-id: 1a28ce0f5fdf1890f93400b26b3b1b3a287624ce --- db/blob/db_blob_basic_test.cc | 8 +++++ db/comparator_db_test.cc | 1 + db/db_basic_test.cc | 1 + db/db_block_cache_test.cc | 1 + db/db_bloom_filter_test.cc | 1 + db/db_compaction_filter_test.cc | 2 ++ db/db_compaction_test.cc | 1 + db/db_flush_test.cc | 1 + db/db_iter_test.cc | 29 +++++++++++++++ db/db_iterator_test.cc | 36 ++++++++++++++----- db/db_range_del_test.cc | 8 +++++ db/db_rate_limiter_test.cc | 1 + db/db_readonly_with_timestamp_test.cc | 5 +++ db/db_secondary_test.cc | 11 ++++++ db/db_tailing_iter_test.cc | 4 ++- db/db_test.cc | 6 ++++ db/db_test2.cc | 3 ++ db/db_test_util.cc | 4 +++ db/db_universal_compaction_test.cc | 1 + db/db_wal_test.cc | 1 + db/db_with_timestamp_basic_test.cc | 18 ++++++++++ db/deletefile_test.cc | 2 ++ db/external_sst_file_test.cc | 1 + db/manual_compaction_test.cc | 2 ++ db/perf_context_test.cc | 3 ++ db/plain_table_db_test.cc | 2 ++ db/prefix_test.cc | 1 + file/prefetch_test.cc | 12 +++++++ monitoring/stats_history_test.cc | 3 ++ table/iterator_wrapper.h | 25 ++++++++++++- table/sst_file_reader_test.cc | 1 + .../string_append/stringappend_test.cc | 3 ++ .../option_change_migration_test.cc | 9 +++++ 33 files changed, 196 insertions(+), 11 deletions(-) diff --git a/db/blob/db_blob_basic_test.cc b/db/blob/db_blob_basic_test.cc index c6c6d2b93..1c0caba93 100644 --- a/db/blob/db_blob_basic_test.cc +++ b/db/blob/db_blob_basic_test.cc @@ -168,6 +168,7 @@ TEST_F(DBBlobBasicTest, IterateBlobsFromCache) { ASSERT_EQ(iter->value().ToString(), blobs[i]); ++i; } + ASSERT_OK(iter->status()); ASSERT_EQ(i, num_blobs); ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0); } @@ -203,6 +204,7 @@ TEST_F(DBBlobBasicTest, IterateBlobsFromCache) { ASSERT_EQ(iter->value().ToString(), blobs[i]); ++i; } + ASSERT_OK(iter->status()); ASSERT_EQ(i, num_blobs); ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), num_blobs); @@ -224,6 +226,7 @@ TEST_F(DBBlobBasicTest, IterateBlobsFromCache) { ASSERT_EQ(iter->value().ToString(), blobs[i]); ++i; } + ASSERT_OK(iter->status()); ASSERT_EQ(i, num_blobs); ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0); } @@ -2123,6 +2126,7 @@ TEST_F(DBBlobWithTimestampTest, IterateBlobs) { /*key_is_internal*/ false); iter->Prev(); } + ASSERT_OK(iter->status()); } // Backward iteration, then reverse to forward. @@ -2169,6 +2173,7 @@ TEST_F(DBBlobWithTimestampTest, IterateBlobs) { iter->Next(); } } + ASSERT_OK(iter->status()); } // Backward iterating multiple versions of the same key, get in this order: @@ -2187,6 +2192,7 @@ TEST_F(DBBlobWithTimestampTest, IterateBlobs) { iter->Prev(); } } + ASSERT_OK(iter->status()); } int upper_bound_idx = num_blobs - 2; @@ -2209,6 +2215,7 @@ TEST_F(DBBlobWithTimestampTest, IterateBlobs) { iter->Next(); } } + ASSERT_OK(iter->status()); } // Backward iteration with upper and lower bound. @@ -2224,6 +2231,7 @@ TEST_F(DBBlobWithTimestampTest, IterateBlobs) { iter->Prev(); } } + ASSERT_OK(iter->status()); } } diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc index d75851083..0bf79bef1 100644 --- a/db/comparator_db_test.cc +++ b/db/comparator_db_test.cc @@ -165,6 +165,7 @@ void DoRandomIteraratorTest(DB* db, std::vector source_strings, AssertItersEqual(iter.get(), result_iter.get()); is_valid = iter->Valid(); } + ASSERT_OK(iter->status()); } class DoubleComparator : public Comparator { diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 3d5794149..2d6835dcc 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -138,6 +138,7 @@ TEST_F(DBBasicTest, ReadOnlyDB) { ASSERT_OK(iter->status()); ++count; } + ASSERT_OK(iter->status()); // Always expect two keys: "foo" and "bar" ASSERT_EQ(count, 2); }; diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 3335626b6..938ca911c 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -389,6 +389,7 @@ TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) { while (iter->Valid()) { iter->Next(); } + ASSERT_OK(iter->status()); delete iter; iter = nullptr; } diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 9e9c75473..9bd5c11b6 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -1905,6 +1905,7 @@ TEST_F(DBBloomFilterTest, PrefixExtractorWithFilter2) { for (iter->Seek("zzzzz_AAAA"); iter->Valid(); iter->Next()) { iter_res.emplace_back(iter->value().ToString()); } + ASSERT_OK(iter->status()); std::vector expected_res = {"val1", "val2", "val3", "val4"}; ASSERT_EQ(iter_res, expected_res); diff --git a/db/db_compaction_filter_test.cc b/db/db_compaction_filter_test.cc index f6f44dc2a..44c406c49 100644 --- a/db/db_compaction_filter_test.cc +++ b/db/db_compaction_filter_test.cc @@ -805,6 +805,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterIgnoreSnapshot) { count++; iter->Next(); } + ASSERT_OK(iter->status()); ASSERT_EQ(count, 6); read_options.snapshot = nullptr; std::unique_ptr iter1(db_->NewIterator(read_options)); @@ -815,6 +816,7 @@ TEST_F(DBTestCompactionFilter, CompactionFilterIgnoreSnapshot) { count++; iter1->Next(); } + ASSERT_OK(iter1->status()); // We have deleted 10 keys from 40 using the compaction filter // Keys 6-9 before the snapshot and 100-105 after the snapshot ASSERT_EQ(count, 30); diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index b0f6a792b..6231a4c27 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -2673,6 +2673,7 @@ TEST_P(DBCompactionTestWithParam, ConvertCompactionStyle) { keys_in_db.append(iter->key().ToString()); keys_in_db.push_back(','); } + ASSERT_OK(iter->status()); delete iter; std::string expected_keys; diff --git a/db/db_flush_test.cc b/db/db_flush_test.cc index 8537af84d..b2c9f4e67 100644 --- a/db/db_flush_test.cc +++ b/db/db_flush_test.cc @@ -1377,6 +1377,7 @@ TEST_F(DBFlushTest, MemPurgeDeleteAndDeleteRange) { ASSERT_EQ(value, NOT_FOUND); count++; } + ASSERT_OK(iter->status()); // Expected count here is 3: KEY3, KEY4, KEY5. ASSERT_EQ(count, EXPECTED_COUNT_FORLOOP); diff --git a/db/db_iter_test.cc b/db/db_iter_test.cc index 65290bfad..6fd446970 100644 --- a/db/db_iter_test.cc +++ b/db/db_iter_test.cc @@ -275,6 +275,7 @@ TEST_F(DBIteratorTest, DBIteratorPrevNext) { db_iter->Next(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } // Test to check the SeekToLast() with iterate_upper_bound not set { @@ -1415,6 +1416,7 @@ TEST_F(DBIteratorTest, DBIterator1) { ASSERT_EQ(db_iter->key().ToString(), "b"); db_iter->Next(); ASSERT_FALSE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); } TEST_F(DBIteratorTest, DBIterator2) { @@ -1528,6 +1530,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1552,6 +1555,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1576,6 +1580,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2,merge_3"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1600,6 +1605,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "put_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1624,6 +1630,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1648,6 +1655,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1672,6 +1680,7 @@ TEST_F(DBIteratorTest, DBIterator5) { ASSERT_EQ(db_iter->value().ToString(), "put_1,merge_4,merge_5,merge_6"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1726,6 +1735,7 @@ TEST_F(DBIteratorTest, DBIterator6) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1750,6 +1760,7 @@ TEST_F(DBIteratorTest, DBIterator6) { ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1774,6 +1785,7 @@ TEST_F(DBIteratorTest, DBIterator6) { ASSERT_EQ(db_iter->value().ToString(), "merge_1,merge_2,merge_3"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1794,6 +1806,7 @@ TEST_F(DBIteratorTest, DBIterator6) { nullptr /* read_callback */)); db_iter->SeekToLast(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1818,6 +1831,7 @@ TEST_F(DBIteratorTest, DBIterator6) { ASSERT_EQ(db_iter->value().ToString(), "merge_4"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1842,6 +1856,7 @@ TEST_F(DBIteratorTest, DBIterator6) { ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1866,6 +1881,7 @@ TEST_F(DBIteratorTest, DBIterator6) { ASSERT_EQ(db_iter->value().ToString(), "merge_4,merge_5,merge_6"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } } @@ -1910,6 +1926,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1952,6 +1969,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -1994,6 +2012,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -2041,6 +2060,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -2089,6 +2109,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -2131,6 +2152,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -2179,6 +2201,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -2228,6 +2251,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } { @@ -2271,6 +2295,7 @@ TEST_F(DBIteratorTest, DBIterator7) { ASSERT_EQ(db_iter->value().ToString(), "merge_1"); db_iter->Prev(); ASSERT_TRUE(!db_iter->Valid()); + ASSERT_OK(db_iter->status()); } } @@ -2440,6 +2465,7 @@ TEST_F(DBIteratorTest, SeekToLastOccurrenceSeq0) { ASSERT_EQ(db_iter->value().ToString(), "2"); db_iter->Next(); ASSERT_FALSE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); } TEST_F(DBIteratorTest, DBIterator11) { @@ -2469,6 +2495,7 @@ TEST_F(DBIteratorTest, DBIterator11) { ASSERT_EQ(db_iter->key().ToString(), "b"); db_iter->Next(); ASSERT_FALSE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); } TEST_F(DBIteratorTest, DBIterator12) { @@ -2497,6 +2524,7 @@ TEST_F(DBIteratorTest, DBIterator12) { ASSERT_EQ(db_iter->value().ToString(), "1"); db_iter->Prev(); ASSERT_FALSE(db_iter->Valid()); + ASSERT_OK(db_iter->status()); } TEST_F(DBIteratorTest, DBIterator13) { @@ -2635,6 +2663,7 @@ TEST_F(DBIterWithMergeIterTest, InnerMergeIterator1) { ASSERT_EQ(db_iter_->value().ToString(), "3"); db_iter_->Next(); ASSERT_FALSE(db_iter_->Valid()); + ASSERT_OK(db_iter_->status()); } TEST_F(DBIterWithMergeIterTest, InnerMergeIterator2) { diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index 4df3448f9..29c39f6ad 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -183,6 +183,7 @@ TEST_P(DBIteratorTest, NonBlockingIteration) { ASSERT_OK(iter->status()); count++; } + ASSERT_OK(iter->status()); ASSERT_EQ(count, 1); delete iter; @@ -217,6 +218,7 @@ TEST_P(DBIteratorTest, NonBlockingIteration) { ASSERT_OK(iter->status()); count++; } + ASSERT_OK(iter->status()); ASSERT_EQ(count, 1); ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS)); ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD)); @@ -870,6 +872,7 @@ TEST_P(DBIteratorTest, IterWithSnapshot) { } } db_->ReleaseSnapshot(snapshot); + ASSERT_OK(iter->status()); delete iter; } while (ChangeOptions()); } @@ -1214,6 +1217,7 @@ TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) { iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); ASSERT_EQ(upper_bound_hits, 1); } } @@ -1338,6 +1342,7 @@ TEST_P(DBIteratorTest, IndexWithFirstKey) { iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT)); EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS)); } @@ -1579,6 +1584,7 @@ class DBIteratorTestForPinnedData : public DBIteratorTest { ASSERT_EQ("1", prop_value); all_keys.push_back(iter->key()); } + ASSERT_OK(iter->status()); ASSERT_EQ(all_keys.size(), true_data.size()); // Verify that all keys slices are valid (backward) @@ -1682,7 +1688,7 @@ TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) { ASSERT_EQ(kv.first, data_iter->first); ASSERT_EQ(kv.second, data_iter->second); } - + ASSERT_OK(iter->status()); delete iter; } @@ -1728,6 +1734,7 @@ TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) { ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } + ASSERT_OK(iter->status()); ASSERT_EQ(results.size(), 1000); for (size_t i = 0; i < results.size(); i++) { @@ -1785,6 +1792,7 @@ TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) { ASSERT_EQ("1", prop_value); results.emplace_back(iter->key(), iter->value().ToString()); } + ASSERT_OK(iter->status()); auto data_iter = true_data.begin(); for (size_t i = 0; i < results.size(); i++, data_iter++) { @@ -2079,6 +2087,7 @@ TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) { ASSERT_EQ(iter->value().ToString(), data_iter->second); data_iter++; } + ASSERT_OK(iter->status()); ASSERT_EQ(data_iter, true_data.rend()); delete iter; @@ -2136,6 +2145,7 @@ TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) { entries_right++; data_iter++; } + ASSERT_OK(iter->status()); ASSERT_EQ(data_iter, true_data.rend()); delete iter; @@ -2175,6 +2185,7 @@ TEST_P(DBIteratorTest, IteratorWithLocalStatistics) { total_next++; if (!iter->Valid()) { + EXPECT_OK(iter->status()); break; } total_next_found++; @@ -2202,6 +2213,7 @@ TEST_P(DBIteratorTest, IteratorWithLocalStatistics) { total_prev++; if (!iter->Valid()) { + EXPECT_OK(iter->status()); break; } total_prev_found++; @@ -2416,6 +2428,7 @@ TEST_P(DBIteratorTest, Refresh) { ASSERT_EQ(iter->key().compare(Slice("x")), 0); iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); iter.reset(); } @@ -2469,32 +2482,32 @@ TEST_P(DBIteratorTest, RefreshWithSnapshot) { verify_iter(1, 3); // Refresh to same snapshot ASSERT_OK(iter->Refresh(snapshot)); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); iter->Seek(Key(3)); verify_iter(3, 6); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); // Refresh to a newer snapshot ASSERT_OK(iter->Refresh(snapshot2)); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); iter->SeekToFirst(); verify_iter(0, 4, /*new_key2=*/true); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); // Refresh to an older snapshot ASSERT_OK(iter->Refresh(snapshot)); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); iter->Seek(Key(3)); verify_iter(3, 6); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); // Refresh to no snapshot ASSERT_OK(iter->Refresh()); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); iter->Seek(Key(2)); verify_iter(2, 4, /*new_key2=*/true); verify_iter(6, 7); - ASSERT_TRUE(iter->status().ok() && !iter->Valid()); + ASSERT_TRUE(!iter->Valid() && iter->status().ok()); // Change LSM shape, new SuperVersion is created. ASSERT_OK(Flush()); @@ -2599,6 +2612,7 @@ TEST_P(DBIteratorTest, TableFilter) { ASSERT_EQ(IterStatus(iter), "f->6"); iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); ASSERT_TRUE(unseen.empty()); delete iter; } @@ -2621,6 +2635,7 @@ TEST_P(DBIteratorTest, TableFilter) { ASSERT_EQ(IterStatus(iter), "f->6"); iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); delete iter; } } @@ -2705,6 +2720,7 @@ TEST_P(DBIteratorTest, SkipStatistics) { ASSERT_OK(iter->status()); count++; } + ASSERT_OK(iter->status()); ASSERT_EQ(count, 3); delete iter; skip_count += 8; // Same as above, but in reverse order @@ -2740,6 +2756,7 @@ TEST_P(DBIteratorTest, SkipStatistics) { ASSERT_OK(iter->status()); count++; } + ASSERT_OK(iter->status()); ASSERT_EQ(count, 2); delete iter; // 3 deletes + 3 original keys + lower sequence of "a" @@ -3322,6 +3339,7 @@ TEST_F(DBIteratorTest, BackwardIterationOnInplaceUpdateMemtable) { for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { ++count; } + ASSERT_OK(iter->status()); ASSERT_EQ(kNumKeys, count); } diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 2e93f96d7..003117eec 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -682,6 +682,7 @@ TEST_F(DBRangeDelTest, TableEvictedDuringScan) { // soon as its refcount drops to zero. bbto.block_cache->EraseUnRefEntries(); } + ASSERT_OK(iter->status()); ASSERT_EQ(kNum, expected); delete iter; db_->ReleaseSnapshot(snapshot); @@ -840,6 +841,7 @@ TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) { ++expected; } } + ASSERT_OK(iter->status()); ASSERT_EQ(kNum, expected); delete iter; } @@ -908,6 +910,7 @@ TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) { std::string key; ASSERT_EQ(expected[i], iter->key()); } + ASSERT_OK(iter->status()); ASSERT_EQ(3, i); delete iter; db_->ReleaseSnapshot(snapshot); @@ -1382,6 +1385,7 @@ TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) { for (; iter->Valid(); iter->Next()) { ++keys_found; } + EXPECT_OK(iter->status()); delete iter; return keys_found; }; @@ -1485,6 +1489,7 @@ TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) { for (; iter->Valid(); iter->Prev()) { ++keys_found; } + ASSERT_OK(iter->status()); delete iter; ASSERT_EQ(kNumKeys, keys_found); @@ -1519,6 +1524,7 @@ TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) { iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); delete iter; db_->ReleaseSnapshot(snapshot); @@ -1564,6 +1570,7 @@ TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) { iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); } TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) { @@ -1978,6 +1985,7 @@ TEST_F(DBRangeDelTest, IteratorRefresh) { ASSERT_EQ("key1", iter->key()); iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); delete iter; } diff --git a/db/db_rate_limiter_test.cc b/db/db_rate_limiter_test.cc index 60cde3157..05419db44 100644 --- a/db/db_rate_limiter_test.cc +++ b/db/db_rate_limiter_test.cc @@ -220,6 +220,7 @@ TEST_P(DBRateLimiterOnReadTest, Iterator) { ++expected; } } + ASSERT_OK(iter->status()); // Reverse scan does not read evenly (one block per iteration) due to // descending seqno ordering, so wait until after the loop to check total. ASSERT_EQ(expected, options_.rate_limiter->GetTotalRequests(Env::IO_USER)); diff --git a/db/db_readonly_with_timestamp_test.cc b/db/db_readonly_with_timestamp_test.cc index 8e8a5d278..7a37bfec8 100644 --- a/db/db_readonly_with_timestamp_test.cc +++ b/db/db_readonly_with_timestamp_test.cc @@ -240,6 +240,7 @@ TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); size_t expected_count = kMaxKey - start_keys[i] + 1; ASSERT_EQ(expected_count, count); @@ -252,6 +253,7 @@ TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); ASSERT_EQ(static_cast(kMaxKey) - start_keys[i] + 1, count); // SeekToFirst()/SeekToLast() with lower/upper bounds. @@ -273,6 +275,7 @@ TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); ASSERT_EQ(r - std::max(l, start_keys[i]), count); for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0; @@ -282,6 +285,7 @@ TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); l += (kMaxKey / 100); r -= (kMaxKey / 100); } @@ -328,6 +332,7 @@ TEST_F(DBReadOnlyTestWithTimestamp, Iterators) { CheckIterUserEntry(iters[0], Key1(key), kTypeValue, "value" + std::to_string(key), write_timestamp); } + ASSERT_OK(iters[0]->status()); size_t expected_count = kMaxKey - 0 + 1; ASSERT_EQ(expected_count, count); diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc index afceabe67..987756906 100644 --- a/db/db_secondary_test.cc +++ b/db/db_secondary_test.cc @@ -200,6 +200,7 @@ TEST_F(DBSecondaryTest, ReopenAsSecondary) { } ++count; } + ASSERT_OK(iter->status()); delete iter; ASSERT_EQ(3, count); } @@ -534,6 +535,8 @@ TEST_F(DBSecondaryTest, SecondaryCloseFiles) { } ASSERT_FALSE(iter1->Valid()); ASSERT_FALSE(iter2->Valid()); + ASSERT_OK(iter1->status()); + ASSERT_OK(iter2->status()); }; ASSERT_OK(Put("a", "value")); @@ -806,6 +809,7 @@ TEST_F(DBSecondaryTest, MissingTableFileDuringOpen) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++count; } + ASSERT_OK(iter->status()); ASSERT_EQ(2, count); delete iter; } @@ -863,6 +867,7 @@ TEST_F(DBSecondaryTest, MissingTableFile) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { ++count; } + ASSERT_OK(iter->status()); ASSERT_EQ(2, count); delete iter; } @@ -935,6 +940,7 @@ TEST_F(DBSecondaryTest, SwitchManifest) { ASSERT_EQ("value_" + std::to_string(kNumFiles - 1), iter->value().ToString()); } + EXPECT_OK(iter->status()); }; range_scan_db(); @@ -1485,6 +1491,7 @@ TEST_F(DBSecondaryTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); size_t expected_count = kMaxKey - start_keys[i] + 1; ASSERT_EQ(expected_count, count); @@ -1497,6 +1504,7 @@ TEST_F(DBSecondaryTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); ASSERT_EQ(static_cast(kMaxKey) - start_keys[i] + 1, count); // SeekToFirst()/SeekToLast() with lower/upper bounds. @@ -1518,6 +1526,7 @@ TEST_F(DBSecondaryTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); ASSERT_EQ(r - std::max(l, start_keys[i]), count); for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0; @@ -1527,6 +1536,7 @@ TEST_F(DBSecondaryTestWithTimestamp, IteratorAndGet) { get_value_and_check(db_, read_opts, it->key(), it->value(), write_timestamps[i]); } + ASSERT_OK(it->status()); l += (kMaxKey / 100); r -= (kMaxKey / 100); } @@ -1736,6 +1746,7 @@ TEST_F(DBSecondaryTestWithTimestamp, Iterators) { CheckIterUserEntry(iters[0], Key1(key), kTypeValue, "value" + std::to_string(key), write_timestamp); } + ASSERT_OK(iters[0]->status()); size_t expected_count = kMaxKey - 0 + 1; ASSERT_EQ(expected_count, count); diff --git a/db/db_tailing_iter_test.cc b/db/db_tailing_iter_test.cc index d3debed7e..07ffadc2a 100644 --- a/db/db_tailing_iter_test.cc +++ b/db/db_tailing_iter_test.cc @@ -52,6 +52,7 @@ TEST_P(DBTestTailingIterator, TailingIteratorSingle) { iter->Next(); ASSERT_TRUE(!iter->Valid()); + ASSERT_OK(iter->status()); } TEST_P(DBTestTailingIterator, TailingIteratorKeepAdding) { @@ -361,7 +362,7 @@ TEST_P(DBTestTailingIterator, TailingIteratorDeletes) { int count = 0; for (; iter->Valid(); iter->Next(), ++count) ; - + ASSERT_OK(iter->status()); ASSERT_EQ(count, num_records); } Close(); @@ -408,6 +409,7 @@ TEST_P(DBTestTailingIterator, TailingIteratorPrefixSeek) { iter->Next(); ASSERT_TRUE(!iter->Valid()); + ASSERT_OK(iter->status()); } Close(); } diff --git a/db/db_test.cc b/db/db_test.cc index c59951d78..99a03b150 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -127,6 +127,7 @@ TEST_F(DBTest, MockEnvTest) { iterator->Next(); } ASSERT_TRUE(!iterator->Valid()); + ASSERT_OK(iterator->status()); delete iterator; DBImpl* dbi = static_cast_with_check(db); @@ -171,6 +172,7 @@ TEST_F(DBTest, MemEnvTest) { iterator->Next(); } ASSERT_TRUE(!iterator->Valid()); + ASSERT_OK(iterator->status()); delete iterator; DBImpl* dbi = static_cast_with_check(db); @@ -2983,6 +2985,7 @@ TEST_F(DBTest, GroupCommitTest) { itr->Next(); } ASSERT_TRUE(!itr->Valid()); + ASSERT_OK(itr->status()); delete itr; HistogramData hist_data; @@ -3511,6 +3514,8 @@ static bool CompareIterators(int step, DB* model, DB* db, ok = false; } } + EXPECT_OK(miter->status()); + EXPECT_OK(dbiter->status()); (void)count; delete miter; delete dbiter; @@ -6008,6 +6013,7 @@ TEST_F(DBTest, MergeTestTime) { ASSERT_OK(iter->status()); ++count; } + ASSERT_OK(iter->status()); ASSERT_EQ(1, count); ASSERT_EQ(4000000, TestGetTickerCount(options, MERGE_OPERATION_TOTAL_TIME)); diff --git a/db/db_test2.cc b/db/db_test2.cc index c9fbe15f4..be25cf54e 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1805,6 +1805,7 @@ TEST_P(CompressionFailuresTest, CompressionFailures) { ASSERT_EQ(key_value_written[key], value); key_value_written.erase(key); } + ASSERT_OK(db_iter->status()); ASSERT_EQ(0, key_value_written.size()); } else if (compression_failure_type_ == kTestDecompressionFail) { ASSERT_EQ(std::string(s.getState()), @@ -3801,6 +3802,7 @@ TEST_F(DBTest2, MemtableOnlyIterator) { count++; } ASSERT_TRUE(!it->Valid()); + ASSERT_OK(it->status()); ASSERT_EQ(2, count); delete it; @@ -7588,6 +7590,7 @@ TEST_F(DBTest2, BestEffortsRecoveryWithSstUniqueIdVerification) { ASSERT_EQ(std::to_string(cnt), it->key()); ASSERT_EQ(expected_v, it->value()); } + EXPECT_OK(it->status()); ASSERT_EQ(expected_count, cnt); }; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index bb3a74e30..3fb457676 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -943,6 +943,7 @@ std::string DBTestBase::Contents(int cf) { EXPECT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]); matched++; } + EXPECT_OK(iter->status()); EXPECT_EQ(matched, forward.size()); delete iter; @@ -1365,6 +1366,7 @@ std::string DBTestBase::IterStatus(Iterator* iter) { if (iter->Valid()) { result = iter->key().ToString() + "->" + iter->value().ToString(); } else { + EXPECT_OK(iter->status()); result = "(invalid)"; } return result; @@ -1583,6 +1585,7 @@ void DBTestBase::VerifyDBFromMap(std::map true_data, iter_cnt++; total_reads++; } + ASSERT_OK(iter->status()); ASSERT_EQ(data_iter, true_data.end()) << iter_cnt << " / " << true_data.size(); delete iter; @@ -1606,6 +1609,7 @@ void DBTestBase::VerifyDBFromMap(std::map true_data, iter_cnt++; total_reads++; } + ASSERT_OK(iter->status()); ASSERT_EQ(data_rev, true_data.rend()) << iter_cnt << " / " << true_data.size(); diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 282a57849..44f5c3bfa 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -1469,6 +1469,7 @@ TEST_P(DBTestUniversalCompaction, IncreaseUniversalCompactionNumLevels) { keys_in_db.append(iter->key().ToString()); keys_in_db.push_back(','); } + EXPECT_OK(iter->status()); delete iter; std::string expected_keys; diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 01dc84a0f..edc2ebf2e 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -2228,6 +2228,7 @@ TEST_P(DBWALTestWithParamsVaryingRecoveryMode, data.push_back( std::make_pair(iter->key().ToString(), iter->value().ToString())); } + EXPECT_OK(iter->status()); delete iter; return data; }; diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 0dd0ce8b9..4bd8eaa0b 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -525,6 +525,7 @@ TEST_F(DBBasicTestWithTimestamp, SimpleIterate) { CheckIterUserEntry(it.get(), Key1(key), kTypeValue, "value" + std::to_string(i), write_timestamps[i]); } + ASSERT_OK(it->status()); ASSERT_EQ(static_cast(kMaxKey) - start_keys[i] + 1, count); // SeekToFirst()/SeekToLast() with lower/upper bounds. @@ -544,6 +545,7 @@ TEST_F(DBBasicTestWithTimestamp, SimpleIterate) { CheckIterUserEntry(it.get(), Key1(key), kTypeValue, "value" + std::to_string(i), write_timestamps[i]); } + ASSERT_OK(it->status()); ASSERT_EQ(r - std::max(l, start_keys[i]), count); for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0; @@ -551,6 +553,7 @@ TEST_F(DBBasicTestWithTimestamp, SimpleIterate) { CheckIterUserEntry(it.get(), Key1(key - 1), kTypeValue, "value" + std::to_string(i), write_timestamps[i]); } + ASSERT_OK(it->status()); l += (kMaxKey / 100); r -= (kMaxKey / 100); } @@ -733,6 +736,7 @@ TEST_P(DBBasicTestWithTimestampTableOptions, GetAndMultiGet) { ASSERT_EQ(it->value(), value_from_get); ASSERT_EQ(Timestamp(1, 0), timestamp); } + ASSERT_OK(it->status()); // verify MultiGet() constexpr uint64_t step = 2; @@ -1065,6 +1069,7 @@ TEST_F(DBBasicTestWithTimestamp, SimpleForwardIterateLowerTsBound) { write_timestamps[i - 1]); } } + ASSERT_OK(it->status()); size_t expected_count = kMaxKey + 1; ASSERT_EQ(expected_count, count); } @@ -1143,6 +1148,7 @@ TEST_F(DBBasicTestWithTimestamp, BackwardIterateLowerTsBound) { write_timestamps[1]); } } + ASSERT_OK(it->status()); size_t expected_count = kMaxKey + 1; ASSERT_EQ(expected_count, count); } @@ -1173,6 +1179,7 @@ TEST_F(DBBasicTestWithTimestamp, BackwardIterateLowerTsBound) { CheckIterEntry(it.get(), Key1(key), kTypeDeletionWithTimestamp, Slice(), write_timestamp); } + ASSERT_OK(it->status()); ASSERT_EQ(kMaxKey + 1, count); } Close(); @@ -1278,6 +1285,7 @@ TEST_F(DBBasicTestWithTimestamp, BackwardIterateLowerTsBound_Reseek) { CheckIterEntry(it.get(), "a", kTypeValue, "v" + std::to_string(4 + i), Timestamp(4 + i, 0)); } + ASSERT_OK(it->status()); } Close(); @@ -3145,6 +3153,7 @@ TEST_P(DBBasicTestWithTimestampPrefixSeek, IterateWithPrefix) { "value" + std::to_string(i), write_ts_list[i]); iter->Next(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); // Seek to kMinKey iter->Seek(Key1(kMinKey)); @@ -3152,6 +3161,7 @@ TEST_P(DBBasicTestWithTimestampPrefixSeek, IterateWithPrefix) { "value" + std::to_string(i), write_ts_list[i]); iter->Prev(); ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); } const std::vector targets = {kMinKey, kMinKey + 0x10, kMinKey + 0x100, kMaxKey}; @@ -3190,6 +3200,7 @@ TEST_P(DBBasicTestWithTimestampPrefixSeek, IterateWithPrefix) { ++expected_key; it->Next(); } + ASSERT_OK(it->status()); ASSERT_EQ(expected_ub - targets[j] + 1, count); count = 0; @@ -3208,6 +3219,7 @@ TEST_P(DBBasicTestWithTimestampPrefixSeek, IterateWithPrefix) { --expected_key; it->Prev(); } + ASSERT_OK(it->status()); ASSERT_EQ(targets[j] - std::max(expected_lb, kMinKey) + 1, count); } } @@ -3313,6 +3325,7 @@ TEST_P(DBBasicTestWithTsIterTombstones, IterWithDelete) { ASSERT_EQ(Key1(key), iter->key()); ASSERT_EQ("value1" + std::to_string(key), iter->value()); } + ASSERT_OK(iter->status()); ASSERT_EQ((kMaxKey - kMinKey + 1) / 2, count); } Close(); @@ -3932,6 +3945,7 @@ TEST_P(DBBasicTestWithTimestampTableOptions, DeleteRangeBaiscReadAndIterate) { ++expected; } } + ASSERT_OK(iter->status()); ASSERT_EQ(kNum, expected); expected = kNum / 2; @@ -3939,6 +3953,7 @@ TEST_P(DBBasicTestWithTimestampTableOptions, DeleteRangeBaiscReadAndIterate) { ASSERT_EQ(Key1(expected), iter->key()); ++expected; } + ASSERT_OK(iter->status()); ASSERT_EQ(kNum, expected); expected = kRangeBegin - 1; @@ -3946,6 +3961,7 @@ TEST_P(DBBasicTestWithTimestampTableOptions, DeleteRangeBaiscReadAndIterate) { ASSERT_EQ(Key1(expected), iter->key()); --expected; } + ASSERT_OK(iter->status()); ASSERT_EQ(-1, expected); read_ts = Timestamp(0, 0); @@ -4227,6 +4243,7 @@ TEST_F(DBBasicTestWithTimestamp, MergeBasic) { ASSERT_EQ(value, it->value()); ASSERT_EQ(write_ts_strs[i], it->timestamp()); } + EXPECT_OK(it->status()); ASSERT_EQ(kNumOfUniqKeys, key_int_val); key_int_val = kNumOfUniqKeys - 1; @@ -4238,6 +4255,7 @@ TEST_F(DBBasicTestWithTimestamp, MergeBasic) { ASSERT_EQ(value, it->value()); ASSERT_EQ(write_ts_strs[i], it->timestamp()); } + ASSERT_OK(it->status()); ASSERT_EQ(std::numeric_limits::max(), key_int_val); value_suffix = value_suffix + "." + std::to_string(i + 1); diff --git a/db/deletefile_test.cc b/db/deletefile_test.cc index 481eda7dd..b6d4f559e 100644 --- a/db/deletefile_test.cc +++ b/db/deletefile_test.cc @@ -575,6 +575,7 @@ TEST_F(DeleteFileTest, DeleteNonDefaultColumnFamily) { ASSERT_OK(itr->status()); ++count; } + ASSERT_OK(itr->status()); ASSERT_EQ(count, 1000); } @@ -588,6 +589,7 @@ TEST_F(DeleteFileTest, DeleteNonDefaultColumnFamily) { ASSERT_OK(itr->status()); ++count; } + ASSERT_OK(itr->status()); ASSERT_EQ(count, 1000); } } diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index 9a0b7e659..ef4ab7fa5 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -2623,6 +2623,7 @@ TEST_P(ExternalSSTFileTest, "AfterRead"); ingest_thread.join(); for (auto* iter : iters) { + ASSERT_OK(iter->status()); delete iter; } iters.clear(); diff --git a/db/manual_compaction_test.cc b/db/manual_compaction_test.cc index e9767ab99..95b099a66 100644 --- a/db/manual_compaction_test.cc +++ b/db/manual_compaction_test.cc @@ -124,6 +124,7 @@ TEST_F(ManualCompactionTest, CompactTouchesAllKeys) { ASSERT_EQ("key3", itr->key().ToString()); itr->Next(); ASSERT_TRUE(!itr->Valid()); + ASSERT_OK(itr->status()); delete itr; delete options.compaction_filter; @@ -179,6 +180,7 @@ TEST_F(ManualCompactionTest, Test) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { num_keys++; } + ASSERT_OK(iter->status()); delete iter; ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys"; diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index eb51bcfbd..666ed32f0 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -149,6 +149,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) { ASSERT_TRUE(iter->Valid()); StopWatchNano timer2(SystemClock::Default().get(), true); iter->Next(); + ASSERT_OK(iter->status()); auto elapsed_nanos2 = timer2.ElapsedNanos(); if (FLAGS_verbose) { std::cout << "next cmp: " << get_perf_context()->user_key_comparison_count @@ -1092,6 +1093,7 @@ TEST_F(PerfContextTest, MergeOperandCount) { get_perf_context()->Reset(); } + ASSERT_OK(it->status()); } // Backward iteration @@ -1104,6 +1106,7 @@ TEST_F(PerfContextTest, MergeOperandCount) { get_perf_context()->Reset(); } + ASSERT_OK(it->status()); } }; diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index d117639a4..a6acb7b18 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -897,6 +897,7 @@ TEST_P(PlainTableDBTest, IteratorLargeKeys) { } ASSERT_TRUE(!iter->Valid()); + ASSERT_OK(iter->status()); delete iter; } @@ -945,6 +946,7 @@ TEST_P(PlainTableDBTest, IteratorLargeKeysWithPrefix) { } ASSERT_TRUE(!iter->Valid()); + ASSERT_OK(iter->status()); delete iter; } diff --git a/db/prefix_test.cc b/db/prefix_test.cc index a8ae04035..bb6e6f7a6 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -782,6 +782,7 @@ TEST_F(PrefixTest, PrefixSeekModePrev) { } } } + ASSERT_OK(iter->status()); } } diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index b58b8fd22..45cda33d6 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -261,6 +261,7 @@ TEST_P(PrefetchTest, Basic) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { num_keys++; } + ASSERT_OK(iter->status()); (void)num_keys; } @@ -715,6 +716,7 @@ TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) { iter->Seek(Key(key_count++)); iter->Next(); } + ASSERT_OK(iter->status()); buff_prefetch_level_count[level] = buff_prefetch_count; if (support_prefetch && !use_direct_io) { @@ -1071,6 +1073,7 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) { do { iter->Seek(BuildKey(i)); if (!iter->Valid()) { + ASSERT_OK(iter->status()); break; } i = i + 100; @@ -1090,6 +1093,7 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) { auto iter = std::unique_ptr(db_->NewIterator(ReadOptions())); for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { } + ASSERT_OK(iter->status()); if (support_prefetch && !use_direct_io) { ASSERT_EQ(fs->GetPrefetchCount(), 13); fs->ClearPrefetchCount(); @@ -1577,6 +1581,7 @@ TEST_P(PrefetchTest, DBIterLevelReadAhead) { ASSERT_OK(iter->status()); num_keys++; } + ASSERT_OK(iter->status()); ASSERT_EQ(num_keys, total_keys); // For index and data blocks. @@ -1691,6 +1696,7 @@ TEST_P(PrefetchTest, DBIterLevelReadAheadWithAsyncIO) { ASSERT_OK(iter->status()); num_keys++; } + ASSERT_OK(iter->status()); ASSERT_EQ(num_keys, total_keys); // For index and data blocks. @@ -1841,6 +1847,7 @@ TEST_P(PrefetchTest, DBIterAsyncIONoIOUring) { ASSERT_OK(iter->status()); num_keys++; } + ASSERT_OK(iter->status()); ASSERT_EQ(num_keys, total_keys); // Check stats to make sure async prefetch is done. @@ -1868,6 +1875,7 @@ TEST_P(PrefetchTest, DBIterAsyncIONoIOUring) { ASSERT_OK(iter->status()); num_keys++; } + ASSERT_OK(iter->status()); ASSERT_EQ(num_keys, total_keys); // Check stats to make sure async prefetch is done. @@ -2464,6 +2472,7 @@ TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { iter->Next(); reseek_keys_with_tuning++; } + ASSERT_OK(iter->status()); uint64_t readahead_trimmed = options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); @@ -2508,6 +2517,7 @@ TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { iter->Next(); reseek_keys_without_tuning++; } + ASSERT_OK(iter->status()); uint64_t readahead_trimmed = options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); @@ -2753,6 +2763,7 @@ TEST_P(PrefetchTest, ReadAsyncWithPosixFS) { ASSERT_OK(iter->status()); num_keys++; } + ASSERT_OK(iter->status()); if (read_async_called) { ASSERT_EQ(num_keys, total_keys); @@ -3137,6 +3148,7 @@ TEST_P(PrefetchTest, TraceReadAsyncWithCallbackWrapper) { ASSERT_OK(iter->status()); num_keys++; } + ASSERT_OK(iter->status()); // End the tracing. ASSERT_OK(db_->EndIOTrace()); diff --git a/monitoring/stats_history_test.cc b/monitoring/stats_history_test.cc index cfed7bad7..b5b17aa78 100644 --- a/monitoring/stats_history_test.cc +++ b/monitoring/stats_history_test.cc @@ -206,6 +206,7 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { ASSERT_TRUE(iterator->key() == iterator->value()); } + ASSERT_OK(iterator->status()); delete iterator; ASSERT_OK(Flush()); ASSERT_OK(Delete("sol")); @@ -219,6 +220,7 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) { ASSERT_TRUE(iterator->key() == iterator->value()); } + ASSERT_OK(iterator->status()); delete iterator; ASSERT_OK(Flush()); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); @@ -280,6 +282,7 @@ int countkeys(Iterator* iter) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { count++; } + EXPECT_OK(iter->status()); return count; } diff --git a/table/iterator_wrapper.h b/table/iterator_wrapper.h index 3e6f9c1ae..a9de3dff3 100644 --- a/table/iterator_wrapper.h +++ b/table/iterator_wrapper.h @@ -52,6 +52,17 @@ class IteratorWrapperBase { void DeleteIter(bool is_arena_mode) { if (iter_) { +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED + if (!status_checked_after_invalid_) { + // If this assertion fails, it is likely that you did not check + // iterator status after Valid() returns false. + fprintf(stderr, + "Failed to check status after Valid() returned false from this " + "iterator.\n"); + port::PrintStack(); + std::abort(); + } +#endif if (!is_arena_mode) { delete iter_; } else { @@ -61,7 +72,12 @@ class IteratorWrapperBase { } // Iterator interface methods - bool Valid() const { return valid_; } + bool Valid() const { +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED + status_checked_after_invalid_ = valid_; +#endif + return valid_; + } Slice key() const { assert(Valid()); return result_.key; @@ -72,6 +88,9 @@ class IteratorWrapperBase { } // Methods below require iter() != nullptr Status status() const { +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED + status_checked_after_invalid_ = true; +#endif assert(iter_); return iter_->status(); } @@ -183,6 +202,10 @@ class IteratorWrapperBase { InternalIteratorBase* iter_; IterateResult result_; bool valid_; + +#ifdef ROCKSDB_ASSERT_STATUS_CHECKED + mutable bool status_checked_after_invalid_ = true; +#endif }; using IteratorWrapper = IteratorWrapperBase; diff --git a/table/sst_file_reader_test.cc b/table/sst_file_reader_test.cc index ba81d7815..36a7975cf 100644 --- a/table/sst_file_reader_test.cc +++ b/table/sst_file_reader_test.cc @@ -304,6 +304,7 @@ class SstFileReaderTimestampTest : public testing::Test { } ASSERT_FALSE(iter->Valid()); + ASSERT_OK(iter->status()); } protected: diff --git a/utilities/merge_operators/string_append/stringappend_test.cc b/utilities/merge_operators/string_append/stringappend_test.cc index 0bf09af8a..acc71c8e4 100644 --- a/utilities/merge_operators/string_append/stringappend_test.cc +++ b/utilities/merge_operators/string_append/stringappend_test.cc @@ -193,6 +193,7 @@ TEST_P(StringAppendOperatorTest, IteratorTest) { ASSERT_EQ(res, "a1,a2,a3"); } } + ASSERT_OK(it->status()); // Should release the snapshot and be aware of the new stuff now it.reset(db_->NewIterator(ReadOptions())); @@ -217,6 +218,7 @@ TEST_P(StringAppendOperatorTest, IteratorTest) { ASSERT_EQ(res, "a1,a2,a3,a4"); } } + ASSERT_OK(it->status()); slists.Append("k3", "g1"); @@ -242,6 +244,7 @@ TEST_P(StringAppendOperatorTest, IteratorTest) { ASSERT_EQ(res, "g1"); } } + ASSERT_OK(it->status()); } TEST_P(StringAppendOperatorTest, SimpleTest) { diff --git a/utilities/option_change_migration/option_change_migration_test.cc b/utilities/option_change_migration/option_change_migration_test.cc index 9fa718cfc..1cb42a0ca 100644 --- a/utilities/option_change_migration/option_change_migration_test.cc +++ b/utilities/option_change_migration/option_change_migration_test.cc @@ -87,6 +87,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate1) { for (; it->Valid(); it->Next()) { keys.insert(it->key().ToString()); } + ASSERT_OK(it->status()); } Close(); @@ -124,6 +125,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate1) { it->Next(); } ASSERT_TRUE(!it->Valid()); + ASSERT_OK(it->status()); } } @@ -165,6 +167,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate2) { for (; it->Valid(); it->Next()) { keys.insert(it->key().ToString()); } + ASSERT_OK(it->status()); } Close(); @@ -202,6 +205,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate2) { it->Next(); } ASSERT_TRUE(!it->Valid()); + ASSERT_OK(it->status()); } } @@ -249,6 +253,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate3) { for (; it->Valid(); it->Next()) { keys.insert(it->key().ToString()); } + ASSERT_OK(it->status()); } Close(); @@ -286,6 +291,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate3) { it->Next(); } ASSERT_TRUE(!it->Valid()); + ASSERT_OK(it->status()); } } @@ -333,6 +339,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate4) { for (; it->Valid(); it->Next()) { keys.insert(it->key().ToString()); } + ASSERT_OK(it->status()); } Close(); @@ -370,6 +377,7 @@ TEST_P(DBOptionChangeMigrationTests, Migrate4) { it->Next(); } ASSERT_TRUE(!it->Valid()); + ASSERT_OK(it->status()); } } @@ -509,6 +517,7 @@ TEST_F(DBOptionChangeMigrationTest, CompactedSrcToUniversal) { for (; it->Valid(); it->Next()) { keys.insert(it->key().ToString()); } + ASSERT_OK(it->status()); } Close(); From 0bb3a26d89e0eb6f3a48a8a2caea0c778abfad84 Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Wed, 18 Oct 2023 11:01:04 -0700 Subject: [PATCH 216/386] Lazy load native library in Statistics constructor. (#11953) Summary: Should fix https://github.com/facebook/rocksdb/issues/9667 and follow the same appropoach as https://github.com/facebook/rocksdb/issues/11919 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11953 Reviewed By: hx235 Differential Revision: D50307456 Pulled By: ajkr fbshipit-source-id: 43e7e671e8b04875185b38284cefd4c3e11981fa --- java/Makefile | 1 + .../src/main/java/org/rocksdb/Statistics.java | 12 ++++++++-- .../test/java/org/rocksdb/StatisticsTest.java | 22 +++++++++++-------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/java/Makefile b/java/Makefile index ede740afa..f37dc0c4c 100644 --- a/java/Makefile +++ b/java/Makefile @@ -450,6 +450,7 @@ test: java java_test run_test: $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ALL_JAVA_TESTS) + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner org.rocksdb.StatisticsTest run_plugin_test: $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ROCKSDB_PLUGIN_JAVA_TESTS) diff --git a/java/src/main/java/org/rocksdb/Statistics.java b/java/src/main/java/org/rocksdb/Statistics.java index 9f3c9a62c..09e08ee56 100644 --- a/java/src/main/java/org/rocksdb/Statistics.java +++ b/java/src/main/java/org/rocksdb/Statistics.java @@ -14,7 +14,7 @@ public class Statistics extends RocksObject { public Statistics() { - super(newStatistics()); + super(newStatisticsInstance()); } public Statistics(final Statistics otherStatistics) { @@ -22,7 +22,7 @@ public Statistics(final Statistics otherStatistics) { } public Statistics(final EnumSet ignoreHistograms) { - super(newStatistics(toArrayValues(ignoreHistograms))); + super(newStatisticsInstance(toArrayValues(ignoreHistograms))); } public Statistics(final EnumSet ignoreHistograms, final Statistics otherStatistics) { @@ -134,8 +134,16 @@ public String toString() { return toString(nativeHandle_); } + private static long newStatisticsInstance() { + RocksDB.loadLibrary(); + return newStatistics(); + } private static native long newStatistics(); private static native long newStatistics(final long otherStatisticsHandle); + private static long newStatisticsInstance(final byte[] ignoreHistograms) { + RocksDB.loadLibrary(); + return newStatistics(ignoreHistograms); + } private static native long newStatistics(final byte[] ignoreHistograms); private static native long newStatistics( final byte[] ignoreHistograms, final long otherStatisticsHandle); diff --git a/java/src/test/java/org/rocksdb/StatisticsTest.java b/java/src/test/java/org/rocksdb/StatisticsTest.java index de92102ec..269cc56a0 100644 --- a/java/src/test/java/org/rocksdb/StatisticsTest.java +++ b/java/src/test/java/org/rocksdb/StatisticsTest.java @@ -5,24 +5,28 @@ package org.rocksdb; +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.charset.StandardCharsets; +import java.util.EnumSet; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import java.nio.charset.StandardCharsets; - -import static org.assertj.core.api.Assertions.assertThat; - public class StatisticsTest { - - @ClassRule - public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = - new RocksNativeLibraryResource(); - @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + @Test + public void createStatistics() throws RocksDBException { + final Statistics statistics = new Statistics(); + statistics.setStatsLevel(StatsLevel.EXCEPT_DETAILED_TIMERS); + final Statistics statisticsWithHistogramOptions = + new Statistics(EnumSet.of(HistogramType.DB_WRITE, HistogramType.COMPACTION_TIME)); + statisticsWithHistogramOptions.reset(); + } + @Test public void statsLevel() throws RocksDBException { final Statistics statistics = new Statistics(); From a80e3f6c579fd2170739d049ed9c71e06c96e991 Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Wed, 18 Oct 2023 12:46:35 -0700 Subject: [PATCH 217/386] Add keyExists Java API (#11705) Summary: Add a new method to check if a key exists in the database. It avoids copying data between C++ and Java code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11705 Reviewed By: ajkr Differential Revision: D50370934 Pulled By: akankshamahajan15 fbshipit-source-id: ab2d42213fbebcaff919b0ffbbef9d45e88ca365 --- java/Makefile | 1 + java/rocksjni/rocksjni.cc | 102 +++++++ java/src/main/java/org/rocksdb/RocksDB.java | 259 ++++++++++++++++++ .../test/java/org/rocksdb/KeyExistsTest.java | 225 +++++++++++++++ 4 files changed, 587 insertions(+) create mode 100644 java/src/test/java/org/rocksdb/KeyExistsTest.java diff --git a/java/Makefile b/java/Makefile index f37dc0c4c..100a2c66f 100644 --- a/java/Makefile +++ b/java/Makefile @@ -142,6 +142,7 @@ JAVA_TESTS = \ org.rocksdb.FilterTest\ org.rocksdb.FlushTest\ org.rocksdb.InfoLogLevelTest\ + org.rocksdb.KeyExistsTest \ org.rocksdb.KeyMayExistTest\ org.rocksdb.ConcurrentTaskLimiterTest\ org.rocksdb.LoggerTest\ diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 8823b0d31..920b443b9 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -2215,6 +2215,108 @@ bool key_may_exist_direct_helper(JNIEnv* env, jlong jdb_handle, return exists; } +jboolean key_exists_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, + jlong jread_opts_handle, char* key, jint jkey_len) { + std::string value; + bool value_found = false; + + auto* db = reinterpret_cast(jdb_handle); + + ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; + if (jcf_handle == 0) { + cf_handle = db->DefaultColumnFamily(); + } else { + cf_handle = + reinterpret_cast(jcf_handle); + } + + ROCKSDB_NAMESPACE::ReadOptions read_opts = + jread_opts_handle == 0 + ? ROCKSDB_NAMESPACE::ReadOptions() + : *(reinterpret_cast( + jread_opts_handle)); + + ROCKSDB_NAMESPACE::Slice key_slice(key, jkey_len); + + const bool may_exist = + db->KeyMayExist(read_opts, cf_handle, key_slice, &value, &value_found); + + if (may_exist) { + ROCKSDB_NAMESPACE::Status s; + { + ROCKSDB_NAMESPACE::PinnableSlice pinnable_val; + s = db->Get(read_opts, cf_handle, key_slice, &pinnable_val); + } + if (s.IsNotFound()) { + return JNI_FALSE; + } else if (s.ok()) { + return JNI_TRUE; + } else { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); + return JNI_FALSE; + } + } else { + return JNI_FALSE; + } +} + +/* + * Class: org_rocksdb_RocksDB + * Method: keyExist + * Signature: (JJJ[BII)Z + */ +jboolean Java_org_rocksdb_RocksDB_keyExists(JNIEnv* env, jobject, + jlong jdb_handle, jlong jcf_handle, + jlong jread_opts_handle, + jbyteArray jkey, jint jkey_offset, + jint jkey_len) { + jbyte* key = new jbyte[jkey_len]; + env->GetByteArrayRegion(jkey, jkey_offset, jkey_len, key); + if (env->ExceptionCheck()) { + // exception thrown: ArrayIndexOutOfBoundsException + delete[] key; + return JNI_FALSE; + } else { + jboolean key_exists = + key_exists_helper(env, jdb_handle, jcf_handle, jread_opts_handle, + reinterpret_cast(key), jkey_len); + delete[] key; + return key_exists; + } +} + +/* + private native boolean keyExistDirect(final long handle, final long + cfHandle, final long readOptHandle, final ByteBuffer key, final int keyOffset, + final int keyLength); + + + * Class: org_rocksdb_RocksDB + * Method: keyExistDirect + * Signature: (JJJLjava/nio/ByteBuffer;II)Z + */ +jboolean Java_org_rocksdb_RocksDB_keyExistsDirect( + JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, + jlong jread_opts_handle, jobject jkey, jint jkey_offset, jint jkey_len) { + char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); + if (key == nullptr) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( + env, + "Invalid key argument (argument is not a valid direct ByteBuffer)"); + return JNI_FALSE; + } + if (env->GetDirectBufferCapacity(jkey) < (jkey_offset + jkey_len)) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( + env, + "Invalid key argument. Capacity is less than requested region (offset " + "+ length)."); + return JNI_FALSE; + } + + return key_exists_helper(env, jdb_handle, jcf_handle, jread_opts_handle, key, + jkey_len); +} + /* * Class: org_rocksdb_RocksDB * Method: keyMayExist diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index a23821a92..453c4b033 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -2435,6 +2435,259 @@ public List multiGetByteBuffers(final ReadOptions readOptio return results; } + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * @param key byte array of a key to search for* + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final byte[] key) { + return keyExists(key, 0, key.length); + } + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be + * non-negative and no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative + * and no larger than "key".length + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final byte[] key, final int offset, final int len) { + return keyExists(null, null, key, offset, len); + } + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param key byte array of a key to search for + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ColumnFamilyHandle columnFamilyHandle, final byte[] key) { + return keyExists(columnFamilyHandle, key, 0, key.length); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be + * non-negative and no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative + * and no larger than "key".length + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, + final int offset, final int len) { + return keyExists(columnFamilyHandle, null, key, offset, len); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param readOptions {@link ReadOptions} instance + * @param key byte array of a key to search for + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ReadOptions readOptions, final byte[] key) { + return keyExists(readOptions, key, 0, key.length); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param readOptions {@link ReadOptions} instance + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be + * non-negative and no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative + * and no larger than "key".length + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists( + final ReadOptions readOptions, final byte[] key, final int offset, final int len) { + return keyExists(null, readOptions, key, offset, len); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param readOptions {@link ReadOptions} instance + * @param key byte array of a key to search for + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ColumnFamilyHandle columnFamilyHandle, + final ReadOptions readOptions, final byte[] key) { + return keyExists(columnFamilyHandle, readOptions, key, 0, key.length); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param readOptions {@link ReadOptions} instance + * @param key byte array of a key to search for + * @param offset the offset of the "key" array to be used, must be + * non-negative and no larger than "key".length + * @param len the length of the "key" array to be used, must be non-negative + * and no larger than "key".length + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ColumnFamilyHandle columnFamilyHandle, + final ReadOptions readOptions, final byte[] key, final int offset, final int len) { + checkBounds(offset, len, key.length); + return keyExists(nativeHandle_, + columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, + readOptions == null ? 0 : readOptions.nativeHandle_, key, offset, len); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param key ByteBuffer with key. Must be allocated as direct. + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ByteBuffer key) { + return keyExists(null, null, key); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param key ByteBuffer with key. Must be allocated as direct. + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key) { + return keyExists(columnFamilyHandle, null, key); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param readOptions {@link ReadOptions} instance + * @param key ByteBuffer with key. Must be allocated as direct. + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ReadOptions readOptions, final ByteBuffer key) { + return keyExists(null, readOptions, key); + } + + /** + * Check if a key exists in the database. + * This method is not as lightweight as {@code keyMayExist} but it gives a 100% guarantee + * of a correct result, whether the key exists or not. + * + * Internally it checks if the key may exist and then double checks with read operation + * that confirms the key exists. This deals with the case where {@code keyMayExist} may return + * a false positive. + * + * The code crosses the Java/JNI boundary only once. + * + * @param columnFamilyHandle {@link ColumnFamilyHandle} instance + * @param readOptions {@link ReadOptions} instance + * @param key ByteBuffer with key. Must be allocated as direct. + * @return true if key exist in database, otherwise false. + */ + public boolean keyExists(final ColumnFamilyHandle columnFamilyHandle, + final ReadOptions readOptions, final ByteBuffer key) { + assert key != null : "key ByteBuffer parameter cannot be null"; + assert key.isDirect() : "key parameter must be a direct ByteBuffer"; + + return keyExistsDirect(nativeHandle_, + columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, + readOptions == null ? 0 : readOptions.nativeHandle_, key, key.position(), key.limit()); + } + /** * If the key definitely does not exist in the database, then this method * returns false, otherwise it returns true if the key might exist. @@ -4559,6 +4812,12 @@ private native void multiGet(final long dbHandle, final long rOptHandle, final int[] keyLengths, final ByteBuffer[] valuesArray, final int[] valuesSizeArray, final Status[] statusArray); + private native boolean keyExists(final long handle, final long cfHandle, final long readOptHandle, + final byte[] key, final int keyOffset, final int keyLength); + + private native boolean keyExistsDirect(final long handle, final long cfHandle, + final long readOptHandle, final ByteBuffer key, final int keyOffset, final int keyLength); + private native boolean keyMayExist( final long handle, final long cfHandle, final long readOptHandle, final byte[] key, final int keyOffset, final int keyLength); diff --git a/java/src/test/java/org/rocksdb/KeyExistsTest.java b/java/src/test/java/org/rocksdb/KeyExistsTest.java new file mode 100644 index 000000000..07e9a61f8 --- /dev/null +++ b/java/src/test/java/org/rocksdb/KeyExistsTest.java @@ -0,0 +1,225 @@ +package org.rocksdb; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.*; +import org.junit.rules.ExpectedException; +import org.junit.rules.TemporaryFolder; + +public class KeyExistsTest { + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Rule public ExpectedException exceptionRule = ExpectedException.none(); + + List cfDescriptors; + List columnFamilyHandleList = new ArrayList<>(); + RocksDB db; + @Before + public void before() throws RocksDBException { + cfDescriptors = Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), + new ColumnFamilyDescriptor("new_cf".getBytes())); + final DBOptions options = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + + db = RocksDB.open( + options, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList); + } + + @After + public void after() { + for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { + columnFamilyHandle.close(); + } + db.close(); + } + + @Test + public void keyExists() throws RocksDBException { + db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); + boolean exists = db.keyExists("key".getBytes(UTF_8)); + assertThat(exists).isTrue(); + exists = db.keyExists("key2".getBytes(UTF_8)); + assertThat(exists).isFalse(); + } + + @Test + public void keyExistsColumnFamily() throws RocksDBException { + byte[] key1 = "keyBBCF0".getBytes(UTF_8); + byte[] key2 = "keyBBCF1".getBytes(UTF_8); + db.put(columnFamilyHandleList.get(0), key1, "valueBBCF0".getBytes(UTF_8)); + db.put(columnFamilyHandleList.get(1), key2, "valueBBCF1".getBytes(UTF_8)); + + assertThat(db.keyExists(columnFamilyHandleList.get(0), key1)).isTrue(); + assertThat(db.keyExists(columnFamilyHandleList.get(0), key2)).isFalse(); + + assertThat(db.keyExists(columnFamilyHandleList.get(1), key1)).isFalse(); + assertThat(db.keyExists(columnFamilyHandleList.get(1), key2)).isTrue(); + } + + @Test + public void keyExistsColumnFamilyReadOptions() throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + byte[] key1 = "keyBBCF0".getBytes(UTF_8); + byte[] key2 = "keyBBCF1".getBytes(UTF_8); + db.put(columnFamilyHandleList.get(0), key1, "valueBBCF0".getBytes(UTF_8)); + db.put(columnFamilyHandleList.get(1), key2, "valueBBCF1".getBytes(UTF_8)); + + assertThat(db.keyExists(columnFamilyHandleList.get(0), readOptions, key1)).isTrue(); + assertThat(db.keyExists(columnFamilyHandleList.get(0), readOptions, key2)).isFalse(); + + assertThat(db.keyExists(columnFamilyHandleList.get(1), readOptions, key1)).isFalse(); + assertThat(db.keyExists(columnFamilyHandleList.get(1), readOptions, key2)).isTrue(); + } + } + + @Test + public void keyExistsReadOptions() throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); + boolean exists = db.keyExists(readOptions, "key".getBytes(UTF_8)); + assertThat(exists).isTrue(); + exists = db.keyExists("key2".getBytes(UTF_8)); + assertThat(exists).isFalse(); + } + } + + @Test + public void keyExistsAfterDelete() throws RocksDBException { + db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); + boolean exists = db.keyExists(null, null, "key".getBytes(UTF_8), 0, 3); + assertThat(exists).isTrue(); + db.delete("key".getBytes(UTF_8)); + exists = db.keyExists(null, null, "key".getBytes(UTF_8), 0, 3); + assertThat(exists).isFalse(); + } + + @Test + public void keyExistsArrayIndexOutOfBoundsException() throws RocksDBException { + db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); + exceptionRule.expect(IndexOutOfBoundsException.class); + db.keyExists(null, null, "key".getBytes(UTF_8), 0, 5); + } + + @Test() + public void keyExistsArrayIndexOutOfBoundsExceptionWrongOffset() throws RocksDBException { + db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); + exceptionRule.expect(IndexOutOfBoundsException.class); + db.keyExists(null, null, "key".getBytes(UTF_8), 6, 2); + } + + @Test + public void keyExistsDirectByteBuffer() throws RocksDBException { + byte[] key = "key".getBytes(UTF_8); + + db.put(key, "value".getBytes(UTF_8)); + ByteBuffer buff = ByteBuffer.allocateDirect(key.length); + buff.put(key); + buff.flip(); + boolean exists = db.keyExists(buff); + assertThat(exists).isTrue(); + } + + @Test + public void keyExistsDirectByteBufferReadOptions() throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + byte[] key = "key".getBytes(UTF_8); + + db.put(key, "value".getBytes(UTF_8)); + ByteBuffer buff = ByteBuffer.allocateDirect(key.length); + buff.put(key); + buff.flip(); + + boolean exists = db.keyExists(buff); + assertThat(exists).isTrue(); + } + } + + @Test + public void keyExistsDirectByteBufferAfterDelete() throws RocksDBException { + byte[] key = "key".getBytes(UTF_8); + + db.put(key, "value".getBytes(UTF_8)); + ByteBuffer buff = ByteBuffer.allocateDirect(key.length); + buff.put(key); + buff.flip(); + boolean exists = db.keyExists(buff); + assertThat(exists).isTrue(); + db.delete(key); + exists = db.keyExists(buff); + assertThat(exists).isFalse(); + } + + @Test + public void keyExistsDirectByteBufferColumnFamily() throws RocksDBException { + byte[] key1 = "keyBBCF0".getBytes(UTF_8); + byte[] key2 = "keyBBCF1".getBytes(UTF_8); + db.put(columnFamilyHandleList.get(0), key1, "valueBBCF0".getBytes(UTF_8)); + db.put(columnFamilyHandleList.get(1), key2, "valueBBCF1".getBytes(UTF_8)); + + ByteBuffer key1Buff = ByteBuffer.allocateDirect(key1.length); + key1Buff.put(key1); + key1Buff.flip(); + + ByteBuffer key2Buff = ByteBuffer.allocateDirect(key2.length); + key2Buff.put(key2); + key2Buff.flip(); + + assertThat(db.keyExists(columnFamilyHandleList.get(0), key1Buff)).isTrue(); + assertThat(db.keyExists(columnFamilyHandleList.get(0), key2Buff)).isFalse(); + + assertThat(db.keyExists(columnFamilyHandleList.get(1), key1Buff)).isFalse(); + assertThat(db.keyExists(columnFamilyHandleList.get(1), key2Buff)).isTrue(); + } + + @Test + public void keyExistsDirectByteBufferColumnFamilyReadOptions() throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + byte[] key1 = "keyBBCF0".getBytes(UTF_8); + byte[] key2 = "keyBBCF1".getBytes(UTF_8); + db.put(columnFamilyHandleList.get(0), key1, "valueBBCF0".getBytes(UTF_8)); + db.put(columnFamilyHandleList.get(1), key2, "valueBBCF1".getBytes(UTF_8)); + + ByteBuffer key1Buff = ByteBuffer.allocateDirect(key1.length); + key1Buff.put(key1); + key1Buff.flip(); + + ByteBuffer key2Buff = ByteBuffer.allocateDirect(key2.length); + key2Buff.put(key2); + key2Buff.flip(); + + assertThat(db.keyExists(columnFamilyHandleList.get(0), readOptions, key1Buff)).isTrue(); + assertThat(db.keyExists(columnFamilyHandleList.get(0), readOptions, key2Buff)).isFalse(); + + assertThat(db.keyExists(columnFamilyHandleList.get(1), readOptions, key1Buff)).isFalse(); + assertThat(db.keyExists(columnFamilyHandleList.get(1), readOptions, key2Buff)).isTrue(); + } + } + + @Test + public void keyExistsDirectReadOptions() throws RocksDBException { + try (final ReadOptions readOptions = new ReadOptions()) { + byte[] key = "key1".getBytes(UTF_8); + db.put(key, "value".getBytes(UTF_8)); + ByteBuffer buff = ByteBuffer.allocateDirect(key.length); + buff.put(key); + buff.flip(); + boolean exists = db.keyExists(readOptions, key); + assertThat(exists).isTrue(); + buff.clear(); + + buff.put("key2".getBytes(UTF_8)); + buff.flip(); + exists = db.keyExists("key2".getBytes(UTF_8)); + assertThat(exists).isFalse(); + } + } +} From 55590012ae8157ca311ff38cba0af878810b775b Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Wed, 18 Oct 2023 12:51:50 -0700 Subject: [PATCH 218/386] Add RocksJava tests to CMake (#11756) Summary: Refactor CMake build to allow run java tests via CMake, including Windows. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11756 Reviewed By: hx235 Differential Revision: D50370739 Pulled By: akankshamahajan15 fbshipit-source-id: ae05cc08a0f9bb2a0a4f1ece02c523fb465bb817 --- .circleci/config.yml | 7 + java/CMakeLists.txt | 295 +++++++++++++++++++++++++++++++++++++---- java/rocksjni/portal.h | 4 +- 3 files changed, 277 insertions(+), 29 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 10fe22c6f..e6bd030e9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -78,6 +78,11 @@ commands: shell: powershell.exe command: | build_tools\run_ci_db_test.ps1 -SuiteRun arena_test,db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16 + - run: + name: "Test RocksJava" + command: | + cd build\java + & $Env:CTEST_BIN -C Debug -j 16 pre-steps-macos: steps: - pre-steps @@ -566,6 +571,7 @@ jobs: THIRDPARTY_HOME: C:/Users/circleci/thirdparty CMAKE_HOME: C:/Program Files/CMake CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe + CTEST_BIN: C:/Program Files/CMake/bin/ctest.exe SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib @@ -581,6 +587,7 @@ jobs: THIRDPARTY_HOME: C:/Users/circleci/thirdparty CMAKE_HOME: C:/Program Files/CMake CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe + CTEST_BIN: C:/Program Files/CMake/bin/ctest.exe SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 4126ebe29..45f1a6c94 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -1,5 +1,12 @@ cmake_minimum_required(VERSION 3.4) +set(JAVA_JUNIT_VERSION "4.13.1") +set(JAVA_HAMCR_VERSION "2.2") +set(JAVA_MOCKITO_VERSION "1.10.19") +set(JAVA_CGLIB_VERSION "3.3.0") +set(JAVA_ASSERTJ_VERSION "2.9.0") + + if(${CMAKE_VERSION} VERSION_LESS "3.11.4") message("Please consider switching to CMake 3.11.4 or newer") endif() @@ -279,20 +286,219 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java src/main/java/org/rocksdb/util/SizeUnit.java src/main/java/org/rocksdb/UInt64AddOperator.java -) - -set(JAVA_TEST_CLASSES - src/test/java/org/rocksdb/BackupEngineTest.java - src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java src/test/java/org/rocksdb/NativeComparatorWrapperTest.java - src/test/java/org/rocksdb/PlatformRandomHelper.java src/test/java/org/rocksdb/RocksDBExceptionTest.java - src/test/java/org/rocksdb/RocksNativeLibraryResource.java - src/test/java/org/rocksdb/SnapshotTest.java + src/test/java/org/rocksdb/test/TestableEventListener.java src/test/java/org/rocksdb/WriteBatchTest.java + src/test/java/org/rocksdb/RocksNativeLibraryResource.java src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java src/test/java/org/rocksdb/util/WriteBatchGetter.java +) + +set(JAVA_TEST_CLASSES + src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java + src/test/java/org/rocksdb/EventListenerTest.java + src/test/java/org/rocksdb/CompactionOptionsTest.java + src/test/java/org/rocksdb/PlatformRandomHelper.java + src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java + src/test/java/org/rocksdb/MutableDBOptionsTest.java + src/test/java/org/rocksdb/WriteOptionsTest.java + src/test/java/org/rocksdb/SstPartitionerTest.java + src/test/java/org/rocksdb/RocksMemEnvTest.java + src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java + src/test/java/org/rocksdb/ClockCacheTest.java + src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java + src/test/java/org/rocksdb/SnapshotTest.java + src/test/java/org/rocksdb/CompactionJobStatsTest.java + src/test/java/org/rocksdb/MemTableTest.java + src/test/java/org/rocksdb/CompactionFilterFactoryTest.java + src/test/java/org/rocksdb/DefaultEnvTest.java + src/test/java/org/rocksdb/DBOptionsTest.java + src/test/java/org/rocksdb/RocksIteratorTest.java + src/test/java/org/rocksdb/SliceTest.java + src/test/java/org/rocksdb/MultiGetTest.java + src/test/java/org/rocksdb/ComparatorOptionsTest.java + src/test/java/org/rocksdb/NativeLibraryLoaderTest.java + src/test/java/org/rocksdb/StatisticsTest.java + src/test/java/org/rocksdb/WALRecoveryModeTest.java + src/test/java/org/rocksdb/TransactionLogIteratorTest.java + src/test/java/org/rocksdb/ReadOptionsTest.java + src/test/java/org/rocksdb/SecondaryDBTest.java + src/test/java/org/rocksdb/KeyMayExistTest.java + src/test/java/org/rocksdb/BlobOptionsTest.java + src/test/java/org/rocksdb/InfoLogLevelTest.java + src/test/java/org/rocksdb/CompactionPriorityTest.java + src/test/java/org/rocksdb/FlushOptionsTest.java + src/test/java/org/rocksdb/VerifyChecksumsTest.java + src/test/java/org/rocksdb/MultiColumnRegressionTest.java + src/test/java/org/rocksdb/FlushTest.java + src/test/java/org/rocksdb/PutMultiplePartsTest.java + src/test/java/org/rocksdb/StatisticsCollectorTest.java + src/test/java/org/rocksdb/LRUCacheTest.java + src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java + src/test/java/org/rocksdb/TransactionTest.java + src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java + src/test/java/org/rocksdb/BackupEngineOptionsTest.java + src/test/java/org/rocksdb/CheckPointTest.java + src/test/java/org/rocksdb/PlainTableConfigTest.java + src/test/java/org/rocksdb/TransactionDBOptionsTest.java + src/test/java/org/rocksdb/ReadOnlyTest.java + src/test/java/org/rocksdb/EnvOptionsTest.java + src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java + src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java + src/test/java/org/rocksdb/test/TestableEventListener.java + src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java src/test/java/org/rocksdb/test/TestableEventListener.java + src/test/java/org/rocksdb/test/RocksJunitRunner.java + src/test/java/org/rocksdb/LoggerTest.java + src/test/java/org/rocksdb/FilterTest.java + src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java + src/test/java/org/rocksdb/util/IntComparatorTest.java + src/test/java/org/rocksdb/util/JNIComparatorTest.java + src/test/java/org/rocksdb/util/ByteBufferAllocator.java + src/test/java/org/rocksdb/util/SizeUnitTest.java + src/test/java/org/rocksdb/util/BytewiseComparatorTest.java + src/test/java/org/rocksdb/util/EnvironmentTest.java + src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java + src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java + src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java + src/test/java/org/rocksdb/util/TestUtil.java + src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java + src/test/java/org/rocksdb/Types.java + src/test/java/org/rocksdb/MixedOptionsTest.java + src/test/java/org/rocksdb/CompactRangeOptionsTest.java + src/test/java/org/rocksdb/SstFileWriterTest.java + src/test/java/org/rocksdb/WalFilterTest.java + src/test/java/org/rocksdb/AbstractTransactionTest.java + src/test/java/org/rocksdb/MergeTest.java + src/test/java/org/rocksdb/OptionsTest.java + src/test/java/org/rocksdb/WriteBatchThreadedTest.java + src/test/java/org/rocksdb/MultiGetManyKeysTest.java + src/test/java/org/rocksdb/TimedEnvTest.java + src/test/java/org/rocksdb/CompactionStopStyleTest.java + src/test/java/org/rocksdb/CompactionJobInfoTest.java + src/test/java/org/rocksdb/BlockBasedTableConfigTest.java + src/test/java/org/rocksdb/BuiltinComparatorTest.java + src/test/java/org/rocksdb/RateLimiterTest.java + src/test/java/org/rocksdb/TransactionOptionsTest.java + src/test/java/org/rocksdb/WriteBatchWithIndexTest.java + src/test/java/org/rocksdb/WriteBatchHandlerTest.java + src/test/java/org/rocksdb/OptimisticTransactionDBTest.java + src/test/java/org/rocksdb/OptionsUtilTest.java + src/test/java/org/rocksdb/OptimisticTransactionTest.java + src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java + src/test/java/org/rocksdb/CompressionOptionsTest.java + src/test/java/org/rocksdb/ColumnFamilyTest.java + src/test/java/org/rocksdb/SstFileReaderTest.java + src/test/java/org/rocksdb/TransactionDBTest.java + src/test/java/org/rocksdb/RocksDBTest.java + src/test/java/org/rocksdb/MutableOptionsGetSetTest.java + src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java + src/test/java/org/rocksdb/SstFileManagerTest.java + src/test/java/org/rocksdb/BackupEngineTest.java + src/test/java/org/rocksdb/DirectSliceTest.java + src/test/java/org/rocksdb/StatsCallbackMock.java + src/test/java/org/rocksdb/CompressionTypesTest.java + src/test/java/org/rocksdb/MemoryUtilTest.java + src/test/java/org/rocksdb/TableFilterTest.java + src/test/java/org/rocksdb/TtlDBTest.java +) + +set(JAVA_TEST_RUNNING_CLASSES + org.rocksdb.ConcurrentTaskLimiterTest + org.rocksdb.EventListenerTest + org.rocksdb.CompactionOptionsTest + org.rocksdb.IngestExternalFileOptionsTest + org.rocksdb.MutableDBOptionsTest + org.rocksdb.WriteOptionsTest + org.rocksdb.SstPartitionerTest + org.rocksdb.RocksMemEnvTest + org.rocksdb.CompactionOptionsUniversalTest + org.rocksdb.ClockCacheTest + # org.rocksdb.BytewiseComparatorRegressionTest + org.rocksdb.SnapshotTest + org.rocksdb.CompactionJobStatsTest + org.rocksdb.MemTableTest + org.rocksdb.CompactionFilterFactoryTest + # org.rocksdb.DefaultEnvTest + org.rocksdb.DBOptionsTest + org.rocksdb.WriteBatchTest + org.rocksdb.RocksIteratorTest + org.rocksdb.SliceTest + org.rocksdb.MultiGetTest + org.rocksdb.ComparatorOptionsTest + # org.rocksdb.NativeLibraryLoaderTest + org.rocksdb.StatisticsTest + org.rocksdb.WALRecoveryModeTest + org.rocksdb.TransactionLogIteratorTest + org.rocksdb.ReadOptionsTest + org.rocksdb.SecondaryDBTest + org.rocksdb.KeyMayExistTest + org.rocksdb.BlobOptionsTest + org.rocksdb.InfoLogLevelTest + org.rocksdb.CompactionPriorityTest + org.rocksdb.FlushOptionsTest + org.rocksdb.VerifyChecksumsTest + org.rocksdb.MultiColumnRegressionTest + org.rocksdb.FlushTest + org.rocksdb.PutMultiplePartsTest + org.rocksdb.StatisticsCollectorTest + org.rocksdb.LRUCacheTest + org.rocksdb.ColumnFamilyOptionsTest + org.rocksdb.TransactionTest + org.rocksdb.CompactionOptionsFIFOTest + org.rocksdb.BackupEngineOptionsTest + org.rocksdb.CheckPointTest + org.rocksdb.PlainTableConfigTest + # org.rocksdb.TransactionDBOptionsTest + org.rocksdb.ReadOnlyTest + org.rocksdb.EnvOptionsTest + org.rocksdb.LoggerTest + org.rocksdb.FilterTest + # org.rocksdb.ByteBufferUnsupportedOperationTest + # org.rocksdb.util.IntComparatorTest + # org.rocksdb.util.JNIComparatorTest + org.rocksdb.util.SizeUnitTest + # org.rocksdb.util.BytewiseComparatorTest + org.rocksdb.util.EnvironmentTest + # org.rocksdb.util.BytewiseComparatorIntTest + # org.rocksdb.util.ReverseBytewiseComparatorIntTest + org.rocksdb.MixedOptionsTest + org.rocksdb.CompactRangeOptionsTest + # org.rocksdb.SstFileWriterTest + org.rocksdb.WalFilterTest + # org.rocksdb.AbstractTransactionTest + org.rocksdb.MergeTest + org.rocksdb.OptionsTest + org.rocksdb.WriteBatchThreadedTest + org.rocksdb.MultiGetManyKeysTest + org.rocksdb.TimedEnvTest + org.rocksdb.CompactionStopStyleTest + org.rocksdb.CompactionJobInfoTest + org.rocksdb.BlockBasedTableConfigTest + org.rocksdb.BuiltinComparatorTest + org.rocksdb.RateLimiterTest + # org.rocksdb.TransactionOptionsTest + org.rocksdb.WriteBatchWithIndexTest + org.rocksdb.WriteBatchHandlerTest + org.rocksdb.OptimisticTransactionDBTest + org.rocksdb.OptionsUtilTest + org.rocksdb.OptimisticTransactionTest + org.rocksdb.MutableColumnFamilyOptionsTest + org.rocksdb.CompressionOptionsTest + org.rocksdb.ColumnFamilyTest + org.rocksdb.SstFileReaderTest + org.rocksdb.TransactionDBTest + org.rocksdb.RocksDBTest + org.rocksdb.MutableOptionsGetSetTest + # org.rocksdb.OptimisticTransactionOptionsTest + org.rocksdb.SstFileManagerTest + org.rocksdb.BackupEngineTest + org.rocksdb.DirectSliceTest + org.rocksdb.CompressionTypesTest + org.rocksdb.MemoryUtilTest + org.rocksdb.TableFilterTest + org.rocksdb.TtlDBTest ) include(FindJava) @@ -304,12 +510,20 @@ include_directories(${PROJECT_SOURCE_DIR}/java) set(JAVA_TEST_LIBDIR ${PROJECT_SOURCE_DIR}/java/test-libs) set(JAVA_TMP_JAR ${JAVA_TEST_LIBDIR}/tmp.jar) -set(JAVA_JUNIT_JAR ${JAVA_TEST_LIBDIR}/junit-4.12.jar) -set(JAVA_HAMCR_JAR ${JAVA_TEST_LIBDIR}/hamcrest-core-1.3.jar) -set(JAVA_MOCKITO_JAR ${JAVA_TEST_LIBDIR}/mockito-all-1.10.19.jar) -set(JAVA_CGLIB_JAR ${JAVA_TEST_LIBDIR}/cglib-2.2.2.jar) -set(JAVA_ASSERTJ_JAR ${JAVA_TEST_LIBDIR}/assertj-core-1.7.1.jar) +set(JAVA_JUNIT_JAR ${JAVA_TEST_LIBDIR}/junit-${JAVA_JUNIT_VERSION}.jar) +set(JAVA_HAMCR_JAR ${JAVA_TEST_LIBDIR}/hamcrest-${JAVA_HAMCR_VERSION}.jar) +set(JAVA_MOCKITO_JAR ${JAVA_TEST_LIBDIR}/mockito-all-${JAVA_MOCKITO_VERSION}.jar) +set(JAVA_CGLIB_JAR ${JAVA_TEST_LIBDIR}/cglib-${JAVA_CGLIB_VERSION}.jar) +set(JAVA_ASSERTJ_JAR ${JAVA_TEST_LIBDIR}/assertj-core-${JAVA_ASSERTJ_VERSION}.jar) set(JAVA_TESTCLASSPATH ${JAVA_JUNIT_JAR} ${JAVA_HAMCR_JAR} ${JAVA_MOCKITO_JAR} ${JAVA_CGLIB_JAR} ${JAVA_ASSERTJ_JAR}) +message("CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") +message("MINGW: ${MINGW}") + +if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + set(JAVA_RUN_TESTCLASSPATH ${JAVA_JUNIT_JAR}$${JAVA_HAMCR_JAR}$${JAVA_MOCKITO_JAR}$${JAVA_CGLIB_JAR}$${JAVA_ASSERTJ_JAR}) +else() + set(JAVA_RUN_TESTCLASSPATH ${JAVA_JUNIT_JAR}:${JAVA_HAMCR_JAR}:${JAVA_MOCKITO_JAR}:${JAVA_CGLIB_JAR}:${JAVA_ASSERTJ_JAR}) +endif() set(JNI_OUTPUT_DIR ${PROJECT_SOURCE_DIR}/java/include) file(MAKE_DIRECTORY ${JNI_OUTPUT_DIR}) @@ -327,26 +541,32 @@ elseif(${CMAKE_VERSION} VERSION_LESS "3.11.4") message("Using an old CMAKE (${CMAKE_VERSION}) - JNI headers generated in separate step") add_jar( rocksdbjni_classes - SOURCES - ${JAVA_MAIN_CLASSES} - ${JAVA_TEST_CLASSES} - INCLUDE_JARS ${JAVA_TESTCLASSPATH} + SOURCES ${JAVA_MAIN_CLASSES} ) else () # Java 1.8 or newer prepare the JAR... message("Preparing Jar for JDK ${Java_VERSION_STRING}") + message("JAVA_TESTCLASSPATH=${JAVA_TESTCLASSPATH}") add_jar( rocksdbjni_classes - SOURCES - ${JAVA_MAIN_CLASSES} - ${JAVA_TEST_CLASSES} - INCLUDE_JARS ${JAVA_TESTCLASSPATH} + SOURCES ${JAVA_MAIN_CLASSES} + INCLUDE_JARS ${ROCKSDBJNI_CLASSES_JAR_FILE} ${JAVA_TESTCLASSPATH} GENERATE_NATIVE_HEADERS rocksdbjni_headers DESTINATION ${JNI_OUTPUT_DIR} ) endif() +get_target_property(ROCKSDBJNI_CLASSES_JAR_FILE rocksdbjni_classes JAR_FILE) +add_jar( + rocksdbjni_test_classes + SOURCES + ${JAVA_MAIN_CLASSES} + ${JAVA_TEST_CLASSES} + INCLUDE_JARS ${ROCKSDBJNI_CLASSES_JAR_FILE} ${JAVA_TESTCLASSPATH} + GENERATE_NATIVE_HEADERS rocksdbjni_test_headers DESTINATION ${JNI_OUTPUT_DIR} +) + if(NOT EXISTS ${PROJECT_SOURCE_DIR}/java/classes) file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/java/classes) endif() @@ -365,7 +585,7 @@ endif() if(NOT EXISTS ${JAVA_JUNIT_JAR}) message("Downloading ${JAVA_JUNIT_JAR}") - file(DOWNLOAD ${DEPS_URL}/junit-4.12.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + file(DOWNLOAD ${DEPS_URL}/junit-${JAVA_JUNIT_VERSION}.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) @@ -375,7 +595,7 @@ if(NOT EXISTS ${JAVA_JUNIT_JAR}) endif() if(NOT EXISTS ${JAVA_HAMCR_JAR}) message("Downloading ${JAVA_HAMCR_JAR}") - file(DOWNLOAD ${DEPS_URL}/hamcrest-core-1.3.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + file(DOWNLOAD ${DEPS_URL}/hamcrest-${JAVA_HAMCR_VERSION}.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) @@ -385,7 +605,7 @@ if(NOT EXISTS ${JAVA_HAMCR_JAR}) endif() if(NOT EXISTS ${JAVA_MOCKITO_JAR}) message("Downloading ${JAVA_MOCKITO_JAR}") - file(DOWNLOAD ${DEPS_URL}/mockito-all-1.10.19.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + file(DOWNLOAD ${DEPS_URL}/mockito-all-${JAVA_MOCKITO_VERSION}.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) @@ -395,7 +615,7 @@ if(NOT EXISTS ${JAVA_MOCKITO_JAR}) endif() if(NOT EXISTS ${JAVA_CGLIB_JAR}) message("Downloading ${JAVA_CGLIB_JAR}") - file(DOWNLOAD ${DEPS_URL}/cglib-2.2.2.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + file(DOWNLOAD ${DEPS_URL}/cglib-${JAVA_CGLIB_VERSION}.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) @@ -405,7 +625,7 @@ if(NOT EXISTS ${JAVA_CGLIB_JAR}) endif() if(NOT EXISTS ${JAVA_ASSERTJ_JAR}) message("Downloading ${JAVA_ASSERTJ_JAR}") - file(DOWNLOAD ${DEPS_URL}/assertj-core-1.7.1.jar ${JAVA_TMP_JAR} STATUS downloadStatus) + file(DOWNLOAD ${DEPS_URL}/assertj-core-${JAVA_ASSERTJ_VERSION}.jar ${JAVA_TMP_JAR} STATUS downloadStatus) list(GET downloadStatus 0 error_code) list(GET downloadStatus 1 error_message) if(NOT error_code EQUAL 0) @@ -534,7 +754,12 @@ if(NOT MSVC) endif() set(ROCKSDBJNI_STATIC_LIB rocksdbjni${ARTIFACT_SUFFIX}) -add_library(${ROCKSDBJNI_STATIC_LIB} ${JNI_NATIVE_SOURCES}) +if(MINGW) + # Build mingw only as staic library. + add_library(${ROCKSDBJNI_STATIC_LIB} ${JNI_NATIVE_SOURCES}) +else() + add_library(${ROCKSDBJNI_STATIC_LIB} SHARED ${JNI_NATIVE_SOURCES}) +endif() add_dependencies(${ROCKSDBJNI_STATIC_LIB} rocksdbjni_headers) target_link_libraries(${ROCKSDBJNI_STATIC_LIB} ${ROCKSDB_STATIC_LIB} ${ROCKSDB_LIB}) @@ -551,3 +776,19 @@ if(NOT MINGW) COMPILE_PDB_NAME ${ROCKSDBJNI_STATIC_LIB}.pdb ) endif() + +enable_testing() +get_target_property(ROCKSDBJNI_CLASSES_TEST_JAR_FILE rocksdbjni_test_classes JAR_FILE) +foreach (CLAZZ ${JAVA_TEST_RUNNING_CLASSES}) + if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") + add_test( + NAME jtest_${CLAZZ} + COMMAND ${Java_JAVA_EXECUTABLE} ${JVMARGS} -ea -Xcheck:jni -Djava.library.path=${PROJECT_BINARY_DIR}/java/${CMAKE_BUILD_TYPE} -classpath ${JAVA_RUN_TESTCLASSPATH}$${ROCKSDBJNI_CLASSES_TEST_JAR_FILE} org.rocksdb.test.RocksJunitRunner ${CLAZZ} + ) + else() + add_test( + NAME jtest_${CLAZZ} + COMMAND ${Java_JAVA_EXECUTABLE} ${JVMARGS} -ea -Xcheck:jni -Djava.library.path=${PROJECT_BINARY_DIR}/java -classpath ${JAVA_RUN_TESTCLASSPATH}:${ROCKSDBJNI_CLASSES_TEST_JAR_FILE} org.rocksdb.test.RocksJunitRunner ${CLAZZ} + ) + endif() +endforeach(CLAZZ) \ No newline at end of file diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index eba181203..15e71bf1e 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -8898,11 +8898,11 @@ class BlockBasedTableOptionsJni table_factory_options->data_block_hash_table_util_ratio, ChecksumTypeJni::toJavaChecksumType(table_factory_options->checksum), table_factory_options->no_block_cache, - static_cast(table_factory_options->block_size), + static_cast(table_factory_options->block_size), table_factory_options->block_size_deviation, table_factory_options->block_restart_interval, table_factory_options->index_block_restart_interval, - static_cast(table_factory_options->metadata_block_size), + static_cast(table_factory_options->metadata_block_size), table_factory_options->partition_filters, table_factory_options->optimize_filters_for_memory, table_factory_options->use_delta_encoding, From 0836a2b26dfbbe30c15e8cebf47771917d55e760 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 18 Oct 2023 18:00:07 -0700 Subject: [PATCH 219/386] New tickers on deletion compactions grouped by reasons (#11957) Summary: Context/Summary: as titled Pull Request resolved: https://github.com/facebook/rocksdb/pull/11957 Test Plan: piggyback on existing tests; fixed a failed test due to adding new stats Reviewed By: ajkr, cbi42 Differential Revision: D50294310 Pulled By: hx235 fbshipit-source-id: d99b97ebac41efc1bdeaf9ca7a1debd2927d54cd --- db/compaction/compaction_picker_fifo.cc | 3 +++ db/db_impl/db_impl.h | 1 + db/db_impl/db_impl_compaction_flush.cc | 24 +++++++++++++++++++ db/db_options_test.cc | 15 ++++++++++++ include/rocksdb/statistics.h | 4 ++++ java/rocksjni/portal.h | 10 ++++++++ .../src/main/java/org/rocksdb/TickerType.java | 4 ++++ monitoring/statistics.cc | 2 ++ monitoring/stats_history_test.cc | 22 +++++++++++------ .../new_features/fifo_drop_file_new_stats.md | 1 + 10 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 unreleased_history/new_features/fifo_drop_file_new_stats.md diff --git a/db/compaction/compaction_picker_fifo.cc b/db/compaction/compaction_picker_fifo.cc index 9aa24302e..505297770 100644 --- a/db/compaction/compaction_picker_fifo.cc +++ b/db/compaction/compaction_picker_fifo.cc @@ -17,6 +17,9 @@ #include "logging/log_buffer.h" #include "logging/logging.h" #include "options/options_helper.h" +#include "rocksdb/listener.h" +#include "rocksdb/statistics.h" +#include "rocksdb/status.h" #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 10e8af97e..d8365c0d0 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2248,6 +2248,7 @@ class DBImpl : public DB { bool ShouldntRunManualCompaction(ManualCompactionState* m); bool HaveManualCompaction(ColumnFamilyData* cfd); bool MCOverlap(ManualCompactionState* m, ManualCompactionState* m1); + void UpdateDeletionCompactionStats(const std::unique_ptr& c); void BuildCompactionJobInfo(const ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index bdf4f3894..5af305d31 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -3704,6 +3704,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, ROCKS_LOG_BUFFER(log_buffer, "[%s] Deleted %d files\n", c->column_family_data()->GetName().c_str(), c->num_input_files(0)); + if (status.ok() && io_s.ok()) { + UpdateDeletionCompactionStats(c); + } *made_progress = true; TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction", c->column_family_data()); @@ -4082,6 +4085,27 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) { return false; } +void DBImpl::UpdateDeletionCompactionStats( + const std::unique_ptr& c) { + if (c == nullptr) { + return; + } + + CompactionReason reason = c->compaction_reason(); + + switch (reason) { + case CompactionReason::kFIFOMaxSize: + RecordTick(stats_, FIFO_MAX_SIZE_COMPACTIONS); + break; + case CompactionReason::kFIFOTtl: + RecordTick(stats_, FIFO_TTL_COMPACTIONS); + break; + default: + assert(false); + break; + } +} + void DBImpl::BuildCompactionJobInfo( const ColumnFamilyData* cfd, Compaction* c, const Status& st, const CompactionJobStats& compaction_job_stats, const int job_id, diff --git a/db/db_options_test.cc b/db/db_options_test.cc index 7e77ac55e..f52982bbc 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -959,6 +959,7 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { Options options; options.env = CurrentOptions().env; options.compaction_style = kCompactionStyleFIFO; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); options.write_buffer_size = 10 << 10; // 10KB options.arena_block_size = 4096; options.compression = kNoCompression; @@ -992,6 +993,9 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(NumTableFilesAtLevel(0), 10); + ASSERT_EQ(options.statistics->getTickerCount(FIFO_TTL_COMPACTIONS), 0); + ASSERT_EQ(options.statistics->getTickerCount(FIFO_MAX_SIZE_COMPACTIONS), 0); + // Set ttl to 1 minute. So all files should get deleted. ASSERT_OK(dbfull()->SetOptions({{"ttl", "60"}})); ASSERT_EQ(dbfull()->GetOptions().ttl, 60); @@ -999,6 +1003,10 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 0); + ASSERT_GT(options.statistics->getTickerCount(FIFO_TTL_COMPACTIONS), 0); + ASSERT_EQ(options.statistics->getTickerCount(FIFO_MAX_SIZE_COMPACTIONS), 0); + ASSERT_OK(options.statistics->Reset()); + // NOTE: Presumed unnecessary and removed: resetting mock time in env // Test dynamically changing compaction_options_fifo.max_table_files_size @@ -1022,6 +1030,9 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr)); ASSERT_EQ(NumTableFilesAtLevel(0), 10); + ASSERT_EQ(options.statistics->getTickerCount(FIFO_MAX_SIZE_COMPACTIONS), 0); + ASSERT_EQ(options.statistics->getTickerCount(FIFO_TTL_COMPACTIONS), 0); + // Set max_table_files_size to 12 KB. So only 1 file should remain now. ASSERT_OK(dbfull()->SetOptions( {{"compaction_options_fifo", "{max_table_files_size=12288;}"}})); @@ -1031,6 +1042,10 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_OK(dbfull()->TEST_WaitForCompact()); ASSERT_EQ(NumTableFilesAtLevel(0), 1); + ASSERT_GT(options.statistics->getTickerCount(FIFO_MAX_SIZE_COMPACTIONS), 0); + ASSERT_EQ(options.statistics->getTickerCount(FIFO_TTL_COMPACTIONS), 0); + ASSERT_OK(options.statistics->Reset()); + // Test dynamically changing compaction_options_fifo.allow_compaction options.compaction_options_fifo.max_table_files_size = 500 << 10; // 500KB options.ttl = 0; diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index dcaf54278..9d81e435f 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -518,6 +518,10 @@ enum Tickers : uint32_t { // ReadOptions.auto_readahead_size is set. READAHEAD_TRIMMED, + // Number of FIFO compactions that drop files based on different reasons + FIFO_MAX_SIZE_COMPACTIONS, + FIFO_TTL_COMPACTIONS, + TICKER_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 15e71bf1e..5f3d65ac5 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5165,6 +5165,10 @@ class TickerTypeJni { return -0x3C; case ROCKSDB_NAMESPACE::Tickers::READAHEAD_TRIMMED: return -0x3D; + case ROCKSDB_NAMESPACE::Tickers::FIFO_MAX_SIZE_COMPACTIONS: + return -0x3E; + case ROCKSDB_NAMESPACE::Tickers::FIFO_TTL_COMPACTIONS: + return -0x3F; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep @@ -5528,6 +5532,12 @@ class TickerTypeJni { return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT; case -0x3C: return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_MISMATCH_COUNT; + case -0x3D: + return ROCKSDB_NAMESPACE::Tickers::READAHEAD_TRIMMED; + case -0x3E: + return ROCKSDB_NAMESPACE::Tickers::FIFO_MAX_SIZE_COMPACTIONS; + case -0x3F: + return ROCKSDB_NAMESPACE::Tickers::FIFO_TTL_COMPACTIONS; case 0x5F: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java index ac4cc9213..a718dfa15 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/rocksdb/TickerType.java @@ -766,6 +766,10 @@ public enum TickerType { READAHEAD_TRIMMED((byte) -0x3D), + FIFO_MAX_SIZE_COMPACTIONS((byte) -0x3E), + + FIFO_TTL_COMPACTIONS((byte) -0x3F), + TICKER_ENUM_MAX((byte) 0x5F); private final byte value; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index d3c0dc08f..5aede1df1 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -259,6 +259,8 @@ const std::vector> TickersNameMap = { {BYTES_DECOMPRESSED_FROM, "rocksdb.bytes.decompressed.from"}, {BYTES_DECOMPRESSED_TO, "rocksdb.bytes.decompressed.to"}, {READAHEAD_TRIMMED, "rocksdb.readahead.trimmed"}, + {FIFO_MAX_SIZE_COMPACTIONS, "rocksdb.fifo.max.size.compactions"}, + {FIFO_TTL_COMPACTIONS, "rocksdb.fifo.ttl.compactions"}, }; const std::vector> HistogramsNameMap = { diff --git a/monitoring/stats_history_test.cc b/monitoring/stats_history_test.cc index b5b17aa78..37db0cfe1 100644 --- a/monitoring/stats_history_test.cc +++ b/monitoring/stats_history_test.cc @@ -185,6 +185,8 @@ TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) { TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { constexpr int kPeriodSec = 1; + constexpr int kEstimatedOneSliceSize = 16000; + Options options; options.create_if_missing = true; options.statistics = CreateDBStatistics(); @@ -244,10 +246,12 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { } size_t stats_history_size = dbfull()->TEST_EstimateInMemoryStatsHistorySize(); ASSERT_GE(slice_count, kIterations - 1); - ASSERT_GE(stats_history_size, 15000); - // capping memory cost at 15000 bytes since one slice is around 10000~15000 - ASSERT_OK(dbfull()->SetDBOptions({{"stats_history_buffer_size", "15000"}})); - ASSERT_EQ(15000, dbfull()->GetDBOptions().stats_history_buffer_size); + ASSERT_GE(stats_history_size, kEstimatedOneSliceSize); + // capping memory cost to roughly one slice's size + ASSERT_OK(dbfull()->SetDBOptions( + {{"stats_history_buffer_size", std::to_string(kEstimatedOneSliceSize)}})); + ASSERT_EQ(kEstimatedOneSliceSize, + dbfull()->GetDBOptions().stats_history_buffer_size); // Wait for stats persist to finish for (int i = 0; i < kIterations; ++i) { @@ -267,9 +271,13 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) { } size_t stats_history_size_reopen = dbfull()->TEST_EstimateInMemoryStatsHistorySize(); - // only one slice can fit under the new stats_history_buffer_size - ASSERT_LT(slice_count, 2); - ASSERT_TRUE(stats_history_size_reopen < 15000 && + + // Only one slice can fit under the new stats_history_buffer_size + // + // If `slice_count == 0` when new statistics are added, consider increasing + // `kEstimatedOneSliceSize` + ASSERT_EQ(slice_count, 1); + ASSERT_TRUE(stats_history_size_reopen < 16000 && stats_history_size_reopen > 0); ASSERT_TRUE(stats_count_reopen < stats_count && stats_count_reopen > 0); Close(); diff --git a/unreleased_history/new_features/fifo_drop_file_new_stats.md b/unreleased_history/new_features/fifo_drop_file_new_stats.md new file mode 100644 index 000000000..30134b2c2 --- /dev/null +++ b/unreleased_history/new_features/fifo_drop_file_new_stats.md @@ -0,0 +1 @@ +Added new tickers `rocksdb.fifo.{max.size|ttl}.compactions` to count FIFO compactions that drop files for different reasons From 2e514e4b4a9b78c9ded5b9e0387f019c5ad2bf12 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Thu, 19 Oct 2023 10:30:54 -0700 Subject: [PATCH 220/386] Fix copyright header (#11986) Summary: Add missing copyright header Pull Request resolved: https://github.com/facebook/rocksdb/pull/11986 Test Plan: N/A Reviewed By: hx235 Differential Revision: D50461904 Pulled By: jaykorean fbshipit-source-id: b1b2704890f4a0bb3c9b464b01468e85a5365a8e --- java/src/test/java/org/rocksdb/KeyExistsTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/java/src/test/java/org/rocksdb/KeyExistsTest.java b/java/src/test/java/org/rocksdb/KeyExistsTest.java index 07e9a61f8..1ee9bdce2 100644 --- a/java/src/test/java/org/rocksdb/KeyExistsTest.java +++ b/java/src/test/java/org/rocksdb/KeyExistsTest.java @@ -1,3 +1,7 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). package org.rocksdb; import static java.nio.charset.StandardCharsets.UTF_8; From ef0c3f08fae318d1b1e4159e0437579a36c6cfd8 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 19 Oct 2023 14:51:22 -0700 Subject: [PATCH 221/386] Fix rare destructor bug in AutoHCC (#11988) Summary: and some other small enhancements/fixes: * The main bug fixed is that in some rare cases, the "published" table size might be smaller than the actual table size. This is a transient state that can happen with concurrent growth that is normally fixed after enough insertions, but if the cache is destroyed soon enough after growth, it could fail to fully destroy some entries and cause assertion failures. We can fix this by detecting the true table size in the destructor. * Change the "too many iterations" debug threshold from 512 to 768. We might have hit at least one false positive failure. (Failed despite legitimate operation.) * Added some stronger assertions in some places to aid in debugging. * Use COERCE_CONTEXT_SWITCH to make behavior of Grow less predictable in terms of thread interleaving. (Might add in more places.) This was useful in reproducing the destructor bug. * Fix some comments with typos or that were based on earlier revisions of the code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11988 Test Plan: Variants of this bug-finding command: ``` USE_CLANG=1 COMPILE_WITH_ASAN=1 COMPILE_WITH_UBSAN=1 COERCE_CONTEXT_SWITCH=1 DEBUG_LEVEL=2 make -j32 cache_bench && while ROCKSDB_DEBUG=1 ./cache_bench -cache_type=auto_hyper_clock_cache -histograms=0 -cache_size=80000000 -threads=32 -populate_cache=0 -ops_per_thread=1000 -num_shard_bits=0; do :; done ``` Reviewed By: jowlyzhang Differential Revision: D50470318 Pulled By: pdillinger fbshipit-source-id: d407a8bb0b6d2ddc598a954c319a1640136f12f2 --- cache/clock_cache.cc | 84 ++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index b70d225bb..a6d41985a 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -1790,6 +1790,8 @@ inline bool MatchAndRef(const UniqueId64x2* hashed_key, const ClockHandle& h, } } +// Assumes a chain rewrite lock prevents concurrent modification of +// these chain pointers void UpgradeShiftsOnRange(AutoHyperClockTable::HandleImpl* arr, size_t& frontier, uint64_t stop_before_or_new_tail, int old_shift, int new_shift) { @@ -1805,7 +1807,6 @@ void UpgradeShiftsOnRange(AutoHyperClockTable::HandleImpl* arr, if (next_with_shift == stop_before_or_new_tail) { // Stopping at entry with pointer matching "stop before" assert(!HandleImpl::IsEnd(next_with_shift)); - // We need to keep a reference to it also to keep it stable. return; } if (HandleImpl::IsEnd(next_with_shift)) { @@ -1817,8 +1818,7 @@ void UpgradeShiftsOnRange(AutoHyperClockTable::HandleImpl* arr, frontier = SIZE_MAX; return; } - // Next is another entry to process, so upgrade and unref and advance - // frontier + // Next is another entry to process, so upgrade and advance frontier arr[frontier].chain_next_with_shift.fetch_add(1U, std::memory_order_acq_rel); assert(GetShiftFromNextWithShift(next_with_shift + 1) == new_shift); @@ -2015,14 +2015,28 @@ AutoHyperClockTable::AutoHyperClockTable( } AutoHyperClockTable::~AutoHyperClockTable() { - // Assumes there are no references or active operations on any slot/element - // in the table. - size_t end = GetTableSize(); + // As usual, destructor assumes there are no references or active operations + // on any slot/element in the table. + + // It's possible that there were not enough Insert() after final concurrent + // Grow to ensure length_info_ (published GetTableSize()) is fully up to + // date. Probe for first unused slot to ensure we see the whole structure. + size_t used_end = GetTableSize(); + while (used_end < array_.Count() && + array_[used_end].head_next_with_shift.load() != + HandleImpl::kUnusedMarker) { + used_end++; + } #ifndef NDEBUG - std::vector was_populated(end); - std::vector was_pointed_to(end); + for (size_t i = used_end; i < array_.Count(); i++) { + assert(array_[i].head_next_with_shift.load() == 0); + assert(array_[i].chain_next_with_shift.load() == 0); + assert(array_[i].meta.load() == 0); + } + std::vector was_populated(used_end); + std::vector was_pointed_to(used_end); #endif - for (size_t i = 0; i < end; i++) { + for (size_t i = 0; i < used_end; i++) { HandleImpl& h = array_[i]; switch (h.meta >> ClockHandle::kStateShift) { case ClockHandle::kStateEmpty: @@ -2061,7 +2075,7 @@ AutoHyperClockTable::~AutoHyperClockTable() { // This check is not perfect, but should detect most reasonable cases // of abandonned or floating entries, etc. (A floating cycle would not // be reported as bad.) - for (size_t i = 0; i < end; i++) { + for (size_t i = 0; i < used_end; i++) { if (was_populated[i]) { assert(was_pointed_to[i]); } else { @@ -2070,8 +2084,9 @@ AutoHyperClockTable::~AutoHyperClockTable() { } #endif + // Metadata charging only follows the published table size assert(usage_.load() == 0 || - usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl)); + usage_.load() == GetTableSize() * sizeof(HandleImpl)); assert(occupancy_ == 0); } @@ -2099,7 +2114,7 @@ void AutoHyperClockTable::StartInsert(InsertState& state) { // and a larger limit is used to break cycles should they occur in production. #define CHECK_TOO_MANY_ITERATIONS(i) \ { \ - assert(i < 512); \ + assert(i < 768); \ if (UNLIKELY(i >= 4096)) { \ std::terminate(); \ } \ @@ -2160,12 +2175,19 @@ bool AutoHyperClockTable::Grow(InsertState& state) { bool own = array_[grow_home].head_next_with_shift.compare_exchange_strong( expected_zero, empty_head, std::memory_order_acq_rel); if (own) { + assert(array_[grow_home].meta.load(std::memory_order_acquire) == 0); break; } else { // Taken by another thread. Try next slot. assert(expected_zero != 0); } } +#ifdef COERCE_CONTEXT_SWITCH + // This is useful in reproducing concurrency issues in Grow() + while (Random::GetTLSInstance()->OneIn(2)) { + std::this_thread::yield(); + } +#endif // Basically, to implement https://en.wikipedia.org/wiki/Linear_hashing // entries that belong in a new chain starting at grow_home will be // split off from the chain starting at old_home, which is computed here. @@ -2518,7 +2540,7 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, // BHome --------------------New------------> [A1] -Old-> ... // And we need to upgrade as much as we can on the "first" chain // (the one eventually pointing to the other's frontier). This will - // also finish off any case in which one of the targer chains will be empty. + // also finish off any case in which one of the target chains will be empty. if (chain_frontier_first >= 0) { size_t& first_frontier = chain_frontier_first == 0 ? /*&*/ zero_chain_frontier @@ -2638,7 +2660,9 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, int home_shift = GetShiftFromNextWithShift(next_with_shift); (void)home; (void)home_shift; - HandleImpl* h = &arr[GetNextFromNextWithShift(next_with_shift)]; + size_t next = GetNextFromNextWithShift(next_with_shift); + assert(next < array_.Count()); + HandleImpl* h = &arr[next]; HandleImpl* prev_to_keep = nullptr; #ifndef NDEBUG uint64_t prev_to_keep_next_with_shift = 0; @@ -2669,8 +2693,7 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, // Entries for eviction become purgeable purgeable = true; assert((h->meta.load(std::memory_order_acquire) >> - ClockHandle::kStateShift) & - ClockHandle::kStateOccupiedBit); + ClockHandle::kStateShift) == ClockHandle::kStateConstruction); } } else { (void)op_data; @@ -2682,8 +2705,7 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, if (purgeable) { assert((h->meta.load(std::memory_order_acquire) >> - ClockHandle::kStateShift) & - ClockHandle::kStateOccupiedBit); + ClockHandle::kStateShift) == ClockHandle::kStateConstruction); pending_purge = true; } else if (pending_purge) { if (prev_to_keep) { @@ -2703,9 +2725,12 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, // Can simply restart (GetNewHead() already updated from CAS failure). next_with_shift = rewrite_lock.GetNewHead(); assert(!HandleImpl::IsEnd(next_with_shift)); - h = &arr[GetNextFromNextWithShift(next_with_shift)]; + next = GetNextFromNextWithShift(next_with_shift); + assert(next < array_.Count()); + h = &arr[next]; pending_purge = false; assert(prev_to_keep == nullptr); + assert(GetShiftFromNextWithShift(next_with_shift) == home_shift); continue; } pending_purge = false; @@ -2733,7 +2758,9 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, if (HandleImpl::IsEnd(next_with_shift)) { h = nullptr; } else { - h = &arr[GetNextFromNextWithShift(next_with_shift)]; + next = GetNextFromNextWithShift(next_with_shift); + assert(next < array_.Count()); + h = &arr[next]; assert(h != prev_to_keep); } } @@ -3237,21 +3264,8 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( // reinserted it into the same chain, causing us to cycle back in the // same chain and potentially see some entries again if we keep walking. // Newly-inserted entries are inserted before older ones, so we are at - // least guaranteed not to miss anything. - // * For kIsLookup, this is ok, as it's just a transient, slight hiccup - // in performance. - // * For kIsRemove, we are careful in overwriting the next pointer. The - // replacement value comes from the next pointer on an entry that we - // exclusively own. If that entry is still connected to the chain, its - // next must be valid for the chain. If it's not still connected to the - // chain (e.g. to unblock another thread Grow op), we will either not - // find the entry to remove on the chain or the CAS attempt to replace - // the appropriate next will fail, in which case we'll try again to find - // the removal target on the chain. - // * For kIsClockUpdateChain, we essentially have a special case of - // kIsRemove, as we only need to remove entries where we have taken - // ownership of one for eviction. In rare cases, we might - // double-clock-update some entries (ok as long as it's rare). + // least guaranteed not to miss anything. Here in Lookup, it's just a + // transient, slight hiccup in performance. if (full_match_or_unknown) { // Full match. From d7567d5eee5a0210376ce25475ae95b88b0a9c14 Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Fri, 20 Oct 2023 10:38:27 -0700 Subject: [PATCH 222/386] Update libs for RocksJava Static build (#9304) Summary: Updates ZStd and Snappy to the latest versions. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9304 Reviewed By: ajkr Differential Revision: D33176708 Pulled By: cbi42 fbshipit-source-id: eb50db50557c433e19fcc7c2874329d1d6cba93f --- Makefile | 12 ++++++------ build_tools/build_detect_platform | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index ff140b1eb..904983e8b 100644 --- a/Makefile +++ b/Makefile @@ -2094,8 +2094,8 @@ ROCKSDB_JAVADOCS_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-sources.jar SHA256_CMD = sha256sum -ZLIB_VER ?= 1.2.13 -ZLIB_SHA256 ?= b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30 +ZLIB_VER ?= 1.3 +ZLIB_SHA256 ?= ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e ZLIB_DOWNLOAD_BASE ?= http://zlib.net BZIP2_VER ?= 1.0.8 BZIP2_SHA256 ?= ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269 @@ -2103,11 +2103,11 @@ BZIP2_DOWNLOAD_BASE ?= http://sourceware.org/pub/bzip2 SNAPPY_VER ?= 1.1.8 SNAPPY_SHA256 ?= 16b677f07832a612b0836178db7f374e414f94657c138e6993cbfc5dcc58651f SNAPPY_DOWNLOAD_BASE ?= https://github.com/google/snappy/archive -LZ4_VER ?= 1.9.3 -LZ4_SHA256 ?= 030644df4611007ff7dc962d981f390361e6c97a34e5cbc393ddfbe019ffe2c1 +LZ4_VER ?= 1.9.4 +LZ4_SHA256 ?= 0b0e3aa07c8c063ddf40b082bdf7e37a1562bda40a0ff5272957f3e987e0e54b LZ4_DOWNLOAD_BASE ?= https://github.com/lz4/lz4/archive -ZSTD_VER ?= 1.4.9 -ZSTD_SHA256 ?= acf714d98e3db7b876e5b540cbf6dee298f60eb3c0723104f6d3f065cd60d6a8 +ZSTD_VER ?= 1.5.5 +ZSTD_SHA256 ?= 9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4 ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index a9a49e23a..fd70a9835 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -674,7 +674,7 @@ else fi if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then - # For portability compile for macOS 10.14 or newer + # For portability compile for macOS 10.14 (2018) or newer COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.14" PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.14" # -mmacosx-version-min must come first here. From 543191f2eacadf14e3aa6ff9a08f85a8ad82da95 Mon Sep 17 00:00:00 2001 From: rogertyang Date: Fri, 20 Oct 2023 13:28:28 -0700 Subject: [PATCH 223/386] Add bounds checking to WBWIIteratorImpl and respect bounds of ReadOptions in Transaction (#11680) Summary: Fix https://github.com/facebook/rocksdb/issues/11607 Fix https://github.com/facebook/rocksdb/issues/11679 Fix https://github.com/facebook/rocksdb/issues/11606 Fix https://github.com/facebook/rocksdb/issues/2343 Add bounds checking to `WBWIIteratorImpl`, which will be reflected in `BaseDeltaIterator::delta_iterator_::Valid()`, just like `BaseDeltaIterator::base_iterator_::Valid()`. In this way, the two sub itertors become more aligned from `BaseDeltaIterator`'s perspective. Like `DBIter`, the added bounds checking caps in either bound when seeking and disvalidates the `WBWIIteratorImpl` iterator when the lower bound is past or the upper bound is reached. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11680 Test Plan: - A simple test added to write_batch_with_index_test.cc to exercise the bounds checking in `WBWIIteratorImpl`. - A sophisticated test added to transaction_test.cc to assert that `Transaction` with different write policies honor bounds in `ReadOptions`. It should be so as long as the `BaseDeltaIterator` is correctly coordinating the two sub iterators to perform iterating and bounds checking. Reviewed By: ajkr Differential Revision: D48125229 Pulled By: cbi42 fbshipit-source-id: c9acea52595aed1471a63d7ca6ef15d2a2af1367 --- ...eDeltaIterator_and_Write(Un)PreparedTxn.md | 1 + utilities/transactions/transaction_test.cc | 106 +++++++++++++++ utilities/transactions/write_prepared_txn.cc | 8 +- .../transactions/write_unprepared_txn.cc | 3 +- .../write_batch_with_index.cc | 18 ++- .../write_batch_with_index_internal.cc | 15 +-- .../write_batch_with_index_internal.h | 125 +++++++++++++++--- .../write_batch_with_index_test.cc | 98 ++++++++++++++ 8 files changed, 329 insertions(+), 45 deletions(-) create mode 100644 unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md diff --git a/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md b/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md new file mode 100644 index 000000000..237351f87 --- /dev/null +++ b/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md @@ -0,0 +1 @@ +Add bounds check in WBWIIteratorImpl and make BaseDeltaIterator, WriteUnpreparedTxn and WritePreparedTxn respect the upper bound and lower bound in ReadOption. See 11680. \ No newline at end of file diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 0d2e7ccda..d12626ca8 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -78,6 +78,112 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(false, true, WRITE_PREPARED, kUnorderedWrite, true))); #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) +TEST_P(TransactionTest, TestUpperBoundUponDeletion) { + // Reproduction from the original bug report, 11606 + // This test does writes without snapshot validation, and then tries to create + // iterator later, which is unsupported in write unprepared. + if (txn_db_options.write_policy == WRITE_UNPREPARED) { + return; + } + + WriteOptions write_options; + ReadOptions read_options; + Status s; + + Transaction* txn = db->BeginTransaction(write_options); + ASSERT_TRUE(txn); + + // Write some keys in a txn + s = txn->Put("2", "2"); + ASSERT_OK(s); + + s = txn->Put("1", "1"); + ASSERT_OK(s); + + s = txn->Delete("2"); + ASSERT_OK(s); + + read_options.iterate_upper_bound = new Slice("2", 1); + Iterator* iter = txn->GetIterator(read_options); + ASSERT_OK(iter->status()); + iter->SeekToFirst(); + while (iter->Valid()) { + ASSERT_EQ("1", iter->key().ToString()); + iter->Next(); + } + delete iter; + delete txn; + delete read_options.iterate_upper_bound; +} + +TEST_P(TransactionTest, TestTxnRespectBoundsInReadOption) { + if (txn_db_options.write_policy == WRITE_UNPREPARED) { + return; + } + + WriteOptions write_options; + + { + std::unique_ptr txn(db->BeginTransaction(write_options)); + // writes that should be observed by base_iterator_ in BaseDeltaIterator + ASSERT_OK(txn->Put("a", "aa")); + ASSERT_OK(txn->Put("c", "cc")); + ASSERT_OK(txn->Put("e", "ee")); + ASSERT_OK(txn->Put("f", "ff")); + ASSERT_TRUE(txn->Commit().ok()); + } + + std::unique_ptr txn2(db->BeginTransaction(write_options)); + // writes that should be observed by delta_iterator_ in BaseDeltaIterator + ASSERT_OK(txn2->Put("b", "bb")); + ASSERT_OK(txn2->Put("c", "cc")); + ASSERT_OK(txn2->Put("f", "ff")); + + // delta_iterator_: b c f + // base_iterator_: a c e f + // + // given range [c, f) + // assert only {c, e} can be seen + + ReadOptions ro; + ro.iterate_lower_bound = new Slice("c"); + ro.iterate_upper_bound = new Slice("f"); + std::unique_ptr iter(txn2->GetIterator(ro)); + + iter->Seek(Slice("b")); + ASSERT_EQ("c", iter->key()); // lower bound capping + iter->Seek(Slice("f")); + ASSERT_FALSE(iter->Valid()); // out of bound + + iter->SeekForPrev(Slice("f")); + ASSERT_EQ("e", iter->key()); // upper bound capping + iter->SeekForPrev(Slice("b")); + ASSERT_FALSE(iter->Valid()); // out of bound + + // move to the lower bound + iter->SeekToFirst(); + ASSERT_EQ("c", iter->key()); + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + + // move to the upper bound + iter->SeekToLast(); + ASSERT_EQ("e", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + // reversely walk to the beginning + iter->SeekToLast(); + ASSERT_EQ("e", iter->key()); + iter->Prev(); + ASSERT_EQ("c", iter->key()); + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + + delete ro.iterate_lower_bound; + delete ro.iterate_upper_bound; +} + TEST_P(TransactionTest, DoubleEmptyWrite) { WriteOptions write_options; write_options.sync = true; diff --git a/utilities/transactions/write_prepared_txn.cc b/utilities/transactions/write_prepared_txn.cc index aa5091b95..58126a475 100644 --- a/utilities/transactions/write_prepared_txn.cc +++ b/utilities/transactions/write_prepared_txn.cc @@ -123,11 +123,7 @@ Status WritePreparedTxn::GetImpl(const ReadOptions& options, } Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options) { - // Make sure to get iterator from WritePrepareTxnDB, not the root db. - Iterator* db_iter = wpt_db_->NewIterator(options); - assert(db_iter); - - return write_batch_.NewIteratorWithBase(db_iter); + return GetIterator(options, wpt_db_->DefaultColumnFamily()); } Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options, @@ -136,7 +132,7 @@ Iterator* WritePreparedTxn::GetIterator(const ReadOptions& options, Iterator* db_iter = wpt_db_->NewIterator(options, column_family); assert(db_iter); - return write_batch_.NewIteratorWithBase(column_family, db_iter); + return write_batch_.NewIteratorWithBase(column_family, db_iter, &options); } Status WritePreparedTxn::PrepareInternal() { diff --git a/utilities/transactions/write_unprepared_txn.cc b/utilities/transactions/write_unprepared_txn.cc index 4c9c2a3dd..c30cf9e1f 100644 --- a/utilities/transactions/write_unprepared_txn.cc +++ b/utilities/transactions/write_unprepared_txn.cc @@ -1037,7 +1037,8 @@ Iterator* WriteUnpreparedTxn::GetIterator(const ReadOptions& options, Iterator* db_iter = wupt_db_->NewIterator(options, column_family, this); assert(db_iter); - auto iter = write_batch_.NewIteratorWithBase(column_family, db_iter); + auto iter = + write_batch_.NewIteratorWithBase(column_family, db_iter, &options); active_iterators_.push_back(iter); iter->RegisterCleanup(CleanupWriteUnpreparedWBWIIterator, this, iter); return iter; diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index b01f70a69..bbfc60f9b 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -299,12 +299,20 @@ WBWIIterator* WriteBatchWithIndex::NewIterator( Iterator* WriteBatchWithIndex::NewIteratorWithBase( ColumnFamilyHandle* column_family, Iterator* base_iterator, const ReadOptions* read_options) { - auto wbwiii = - new WBWIIteratorImpl(GetColumnFamilyID(column_family), &(rep->skip_list), - &rep->write_batch, &rep->comparator); + WBWIIteratorImpl* wbwiii; + if (read_options != nullptr) { + wbwiii = new WBWIIteratorImpl( + GetColumnFamilyID(column_family), &(rep->skip_list), &rep->write_batch, + &rep->comparator, read_options->iterate_lower_bound, + read_options->iterate_upper_bound); + } else { + wbwiii = new WBWIIteratorImpl(GetColumnFamilyID(column_family), + &(rep->skip_list), &rep->write_batch, + &rep->comparator); + } + return new BaseDeltaIterator(column_family, base_iterator, wbwiii, - GetColumnFamilyUserComparator(column_family), - read_options); + GetColumnFamilyUserComparator(column_family)); } Iterator* WriteBatchWithIndex::NewIteratorWithBase(Iterator* base_iterator) { diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index 11feb5a7e..bedd5934d 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -20,8 +20,7 @@ namespace ROCKSDB_NAMESPACE { BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family, Iterator* base_iterator, WBWIIteratorImpl* delta_iterator, - const Comparator* comparator, - const ReadOptions* read_options) + const Comparator* comparator) : forward_(true), current_at_base_(true), equal_keys_(false), @@ -29,9 +28,7 @@ BaseDeltaIterator::BaseDeltaIterator(ColumnFamilyHandle* column_family, column_family_(column_family), base_iterator_(base_iterator), delta_iterator_(delta_iterator), - comparator_(comparator), - iterate_upper_bound_(read_options ? read_options->iterate_upper_bound - : nullptr) { + comparator_(comparator) { assert(base_iterator_); assert(delta_iterator_); assert(comparator_); @@ -332,14 +329,6 @@ void BaseDeltaIterator::UpdateCurrent() { // Finished return; } - if (iterate_upper_bound_) { - if (comparator_->CompareWithoutTimestamp( - delta_entry.key, /*a_has_ts=*/false, *iterate_upper_bound_, - /*b_has_ts=*/false) >= 0) { - // out of upper bound -> finished. - return; - } - } if (delta_result == WBWIIteratorImpl::kDeleted && merge_context_.GetNumOperands() == 0) { AdvanceDelta(); diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index d8bab54ed..c4135ad32 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -34,8 +34,7 @@ class BaseDeltaIterator : public Iterator { public: BaseDeltaIterator(ColumnFamilyHandle* column_family, Iterator* base_iterator, WBWIIteratorImpl* delta_iterator, - const Comparator* comparator, - const ReadOptions* read_options = nullptr); + const Comparator* comparator); ~BaseDeltaIterator() override {} @@ -72,7 +71,6 @@ class BaseDeltaIterator : public Iterator { std::unique_ptr base_iterator_; std::unique_ptr delta_iterator_; const Comparator* comparator_; // not owned - const Slice* iterate_upper_bound_; MergeContext merge_context_; std::string merge_result_; Slice value_; @@ -200,59 +198,107 @@ class WBWIIteratorImpl : public WBWIIterator { WBWIIteratorImpl(uint32_t column_family_id, WriteBatchEntrySkipList* skip_list, const ReadableWriteBatch* write_batch, - WriteBatchEntryComparator* comparator) + WriteBatchEntryComparator* comparator, + const Slice* iterate_lower_bound = nullptr, + const Slice* iterate_upper_bound = nullptr) : column_family_id_(column_family_id), skip_list_iter_(skip_list), write_batch_(write_batch), - comparator_(comparator) {} + comparator_(comparator), + iterate_lower_bound_(iterate_lower_bound), + iterate_upper_bound_(iterate_upper_bound) {} ~WBWIIteratorImpl() override {} bool Valid() const override { - if (!skip_list_iter_.Valid()) { - return false; - } - const WriteBatchIndexEntry* iter_entry = skip_list_iter_.key(); - return (iter_entry != nullptr && - iter_entry->column_family == column_family_id_); + return !out_of_bound_ && ValidRegardlessOfBoundLimit(); } void SeekToFirst() override { - WriteBatchIndexEntry search_entry( - nullptr /* search_key */, column_family_id_, - true /* is_forward_direction */, true /* is_seek_to_first */); - skip_list_iter_.Seek(&search_entry); + if (iterate_lower_bound_ != nullptr) { + WriteBatchIndexEntry search_entry( + iterate_lower_bound_ /* search_key */, column_family_id_, + true /* is_forward_direction */, false /* is_seek_to_first */); + skip_list_iter_.Seek(&search_entry); + } else { + WriteBatchIndexEntry search_entry( + nullptr /* search_key */, column_family_id_, + true /* is_forward_direction */, true /* is_seek_to_first */); + skip_list_iter_.Seek(&search_entry); + } + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } void SeekToLast() override { - WriteBatchIndexEntry search_entry( - nullptr /* search_key */, column_family_id_ + 1, - true /* is_forward_direction */, true /* is_seek_to_first */); + WriteBatchIndexEntry search_entry = + (iterate_upper_bound_ != nullptr) + ? WriteBatchIndexEntry( + iterate_upper_bound_ /* search_key */, column_family_id_, + true /* is_forward_direction */, false /* is_seek_to_first */) + : WriteBatchIndexEntry( + nullptr /* search_key */, column_family_id_ + 1, + true /* is_forward_direction */, true /* is_seek_to_first */); + skip_list_iter_.Seek(&search_entry); if (!skip_list_iter_.Valid()) { skip_list_iter_.SeekToLast(); } else { skip_list_iter_.Prev(); } + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } void Seek(const Slice& key) override { + if (BeforeLowerBound(&key)) { // cap to prevent out of bound + SeekToFirst(); + return; + } + WriteBatchIndexEntry search_entry(&key, column_family_id_, true /* is_forward_direction */, false /* is_seek_to_first */); skip_list_iter_.Seek(&search_entry); + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } void SeekForPrev(const Slice& key) override { + if (AtOrAfterUpperBound(&key)) { // cap to prevent out of bound + SeekToLast(); + return; + } + WriteBatchIndexEntry search_entry(&key, column_family_id_, false /* is_forward_direction */, false /* is_seek_to_first */); skip_list_iter_.SeekForPrev(&search_entry); + + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } } - void Next() override { skip_list_iter_.Next(); } + void Next() override { + skip_list_iter_.Next(); + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } + } - void Prev() override { skip_list_iter_.Prev(); } + void Prev() override { + skip_list_iter_.Prev(); + if (ValidRegardlessOfBoundLimit()) { + out_of_bound_ = TestOutOfBound(); + } + } WriteEntry Entry() const override; @@ -293,6 +339,45 @@ class WBWIIteratorImpl : public WBWIIterator { WriteBatchEntrySkipList::Iterator skip_list_iter_; const ReadableWriteBatch* write_batch_; WriteBatchEntryComparator* comparator_; + const Slice* iterate_lower_bound_; + const Slice* iterate_upper_bound_; + bool out_of_bound_ = false; + + bool TestOutOfBound() const { + const Slice& curKey = Entry().key; + return AtOrAfterUpperBound(&curKey) || BeforeLowerBound(&curKey); + } + + bool ValidRegardlessOfBoundLimit() const { + if (!skip_list_iter_.Valid()) { + return false; + } + const WriteBatchIndexEntry* iter_entry = skip_list_iter_.key(); + return iter_entry != nullptr && + iter_entry->column_family == column_family_id_; + } + + bool AtOrAfterUpperBound(const Slice* k) const { + if (iterate_upper_bound_ == nullptr) { + return false; + } + + return comparator_->GetComparator(column_family_id_) + ->CompareWithoutTimestamp(*k, /*a_has_ts=*/false, + *iterate_upper_bound_, + /*b_has_ts=*/false) >= 0; + } + + bool BeforeLowerBound(const Slice* k) const { + if (iterate_lower_bound_ == nullptr) { + return false; + } + + return comparator_->GetComparator(column_family_id_) + ->CompareWithoutTimestamp(*k, /*a_has_ts=*/false, + *iterate_lower_bound_, + /*b_has_ts=*/false) < 0; + } }; class WriteBatchWithIndexInternal { diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index c69dd39a2..95333d8f4 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -1640,6 +1640,104 @@ TEST_P(WriteBatchWithIndexTest, TestNewIteratorWithBaseFromWbwi) { ASSERT_OK(iter->status()); } +TEST_P(WriteBatchWithIndexTest, TestBoundsCheckingInDeltaIterator) { + Status s = OpenDB(); + ASSERT_OK(s); + + KVMap empty_map; + + // writes that should be observed by BaseDeltaIterator::delta_iterator_ + ASSERT_OK(batch_->Put("a", "aa")); + ASSERT_OK(batch_->Put("b", "bb")); + ASSERT_OK(batch_->Put("c", "cc")); + + ReadOptions ro; + + auto check_only_b_is_visible = [&]() { + std::unique_ptr iter(batch_->NewIteratorWithBase( + db_->DefaultColumnFamily(), new KVIter(&empty_map), &ro)); + + // move to the lower bound + iter->SeekToFirst(); + ASSERT_EQ("b", iter->key()); + iter->Prev(); + ASSERT_FALSE(iter->Valid()); + + // move to the upper bound + iter->SeekToLast(); + ASSERT_EQ("b", iter->key()); + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + // test bounds checking in Seek and SeekForPrev + iter->Seek(Slice("a")); + ASSERT_EQ("b", iter->key()); + iter->Seek(Slice("b")); + ASSERT_EQ("b", iter->key()); + iter->Seek(Slice("c")); + ASSERT_FALSE(iter->Valid()); + + iter->SeekForPrev(Slice("c")); + ASSERT_EQ("b", iter->key()); + iter->SeekForPrev(Slice("b")); + ASSERT_EQ("b", iter->key()); + iter->SeekForPrev(Slice("a")); + ASSERT_FALSE(iter->Valid()); + + iter->SeekForPrev( + Slice("a.1")); // a non-existent key that is smaller than "b" + ASSERT_FALSE(iter->Valid()); + + iter->Seek(Slice("b.1")); // a non-existent key that is greater than "b" + ASSERT_FALSE(iter->Valid()); + + delete ro.iterate_lower_bound; + delete ro.iterate_upper_bound; + }; + + ro.iterate_lower_bound = new Slice("b"); + ro.iterate_upper_bound = new Slice("c"); + check_only_b_is_visible(); + + ro.iterate_lower_bound = new Slice("a.1"); + ro.iterate_upper_bound = new Slice("c"); + check_only_b_is_visible(); + + ro.iterate_lower_bound = new Slice("b"); + ro.iterate_upper_bound = new Slice("b.2"); + check_only_b_is_visible(); +} + +TEST_P(WriteBatchWithIndexTest, + TestBoundsCheckingInSeekToFirstAndLastOfDeltaIterator) { + Status s = OpenDB(); + ASSERT_OK(s); + KVMap empty_map; + // writes that should be observed by BaseDeltaIterator::delta_iterator_ + ASSERT_OK(batch_->Put("c", "cc")); + + ReadOptions ro; + auto check_nothing_visible = [&]() { + std::unique_ptr iter(batch_->NewIteratorWithBase( + db_->DefaultColumnFamily(), new KVIter(&empty_map), &ro)); + iter->SeekToFirst(); + ASSERT_FALSE(iter->Valid()); + iter->SeekToLast(); + ASSERT_FALSE(iter->Valid()); + + delete ro.iterate_lower_bound; + delete ro.iterate_upper_bound; + }; + + ro.iterate_lower_bound = new Slice("b"); + ro.iterate_upper_bound = new Slice("c"); + check_nothing_visible(); + + ro.iterate_lower_bound = new Slice("d"); + ro.iterate_upper_bound = new Slice("e"); + check_nothing_visible(); +} + TEST_P(WriteBatchWithIndexTest, SavePointTest) { ColumnFamilyHandleImplDummy cf1(1, BytewiseComparator()); KVMap empty_map; From 41550877460c4c5021c06406827382f219edb9f2 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 23 Oct 2023 09:20:59 -0700 Subject: [PATCH 224/386] Use manifest to persist pre-allocated seqnos (#11995) Summary: ... and other fixes for crash test after https://github.com/facebook/rocksdb/issues/11922. * When pre-allocating sequence numbers for establishing a time history, record that last sequence number in the manifest so that it is (most likely) restored on recovery even if no user writes were made or were recovered (e.g. no WAL). * When pre-allocating sequence numbers for establishing a time history, only do this for actually new DBs. * Remove the feature that ensures non-zero sequence number on creating the first column family with preserve/preclude option after initial DB::Open. Until fixed in a way compatible with the crash test, this creates a gap where some data written with active preserve/preclude option won't have a known associated time. Together, these ensure we don't upset the crash test by manipulating sequence numbers after initial DB creation (esp when re-opening with different options). (The crash test expects that the seqno after re-open corresponds to a known point in time from previous crash test operation, matching an expected DB state.) Follow-up work: * Re-fill the gap to ensure all data written under preserve/preclude settings have a known time estimate. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11995 Test Plan: Added to unit test SeqnoTimeTablePropTest.PrePopulateInDB Verified fixes two crash test scenarios: ## 1st reproducer First apply ``` diff --git a/db_stress_tool/expected_state.cc b/db_stress_tool/expected_state.cc index b483e154c..ef63b8d6c 100644 --- a/db_stress_tool/expected_state.cc +++ b/db_stress_tool/expected_state.cc @@ -333,6 +333,7 @@ Status FileExpectedStateManager::SaveAtAndAfter(DB* db) { s = NewFileTraceWriter(Env::Default(), soptions, trace_file_path, &trace_writer); } + if (getenv("CRASH")) assert(false); if (s.ok()) { TraceOptions trace_opts; trace_opts.filter |= kTraceFilterGet; ``` Then ``` mkdir -p /dev/shm/rocksdb_test/rocksdb_crashtest_expected mkdir -p /dev/shm/rocksdb_test/rocksdb_crashtest_whitebox rm -rf /dev/shm/rocksdb_test/rocksdb_crashtest_*/* CRASH=1 ./db_stress --db=/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --destroy_db_initially=1 --manual_wal_flush_one_in=1000000 --clear_column_family_one_in=0 --preserve_internal_time_seconds=36000 ./db_stress --db=/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --destroy_db_initially=0 --manual_wal_flush_one_in=1000000 --clear_column_family_one_in=0 --preserve_internal_time_seconds=0 ``` Without the fix you get ``` ... DB path: [/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox] (Re-)verified 34 unique IDs Error restoring historical expected values: Corruption: DB is older than any restorable expected state ``` ## 2nd reproducer First apply ``` diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 62ddead7b..f2654980f 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1126,6 +1126,7 @@ void StressTest::OperateDb(ThreadState* thread) { // OPERATION write TestPut(thread, write_opts, read_opts, rand_column_families, rand_keys, value); + if (getenv("CRASH")) assert(false); } else if (prob_op < del_bound) { assert(write_bound <= prob_op); // OPERATION delete ``` Then ``` rm -rf /dev/shm/rocksdb_test/rocksdb_crashtest_*/* CRASH=1 ./db_stress --db=/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --destroy_db_initially=1 --manual_wal_flush_one_in=1000000 --clear_column_family_one_in=0 --disable_wal=1 --reopen=0 --preserve_internal_time_seconds=0 ./db_stress --db=/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --destroy_db_initially=0 --manual_wal_flush_one_in=1000000 --clear_column_family_one_in=0 --disable_wal=1 --reopen=0 --preserve_internal_time_seconds=3600 ``` Without the fix you get ``` DB path: [/dev/shm/rocksdb_test/rocksdb_crashtest_whitebox] (Re-)verified 34 unique IDs db_stress: db_stress_tool/expected_state.cc:380: virtual rocksdb::{anonymous}::ExpectedStateTraceRecordHandler::~ ExpectedStateTraceRecordHandler(): Assertion `IsDone()' failed. ``` Reviewed By: jowlyzhang Differential Revision: D50533346 Pulled By: pdillinger fbshipit-source-id: 1056be45c5b9e537c8c601b28c4b27431a782477 --- db/db_impl/db_impl.cc | 49 ++++++++++++++++++++++++++++---------- db/db_impl/db_impl.h | 3 ++- db/db_impl/db_impl_open.cc | 5 ++-- db/seqno_time_test.cc | 24 +++++++++++++++++++ db/version_set.cc | 14 ----------- db/version_set.h | 3 --- 6 files changed, 65 insertions(+), 33 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index ab3a79e99..2d4456485 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -796,10 +796,8 @@ Status DBImpl::StartPeriodicTaskScheduler() { return s; } -Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { - if (!from_db_open) { - options_mutex_.AssertHeld(); - } +Status DBImpl::RegisterRecordSeqnoTimeWorker(bool is_new_db) { + options_mutex_.AssertHeld(); uint64_t min_preserve_seconds = std::numeric_limits::max(); uint64_t max_preserve_seconds = std::numeric_limits::min(); @@ -853,7 +851,17 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { // 2) In any DB, any data written after setting preserve/preclude options // must have a reasonable time estimate (so that we can accurately place // the data), which means at least one entry in seqno_to_time_mapping_. - if (from_db_open && GetLatestSequenceNumber() == 0) { + // + // FIXME: We don't currently guarantee that if the first column family with + // that setting is added or configured after initial DB::Open but before + // the first user Write. Fixing this causes complications with the crash + // test because if DB starts without preserve/preclude option, does some + // user writes but all those writes are lost in crash, then re-opens with + // preserve/preclude option, it sees seqno==1 which looks like one of the + // user writes was recovered, when actually it was not. + bool last_seqno_zero = GetLatestSequenceNumber() == 0; + assert(!is_new_db || last_seqno_zero); + if (is_new_db && last_seqno_zero) { // Pre-allocate seqnos and pre-populate historical mapping assert(mapping_was_empty); @@ -862,16 +870,31 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(bool from_db_open) { versions_->SetLastAllocatedSequence(kMax); versions_->SetLastPublishedSequence(kMax); versions_->SetLastSequence(kMax); + + // And record in manifest, to avoid going backwards in seqno on re-open + // (potentially with different options). Concurrency is simple because we + // are in DB::Open + { + InstrumentedMutexLock l(&mutex_); + VersionEdit edit; + edit.SetLastSequence(kMax); + s = versions_->LogAndApplyToDefaultColumnFamily( + {}, &edit, &mutex_, directories_.GetDbDir()); + if (!s.ok() && versions_->io_status().IsIOError()) { + s = error_handler_.SetBGError(versions_->io_status(), + BackgroundErrorReason::kManifestWrite); + } + } + // Pre-populate mappings for reserved sequence numbers. RecordSeqnoToTimeMapping(max_preserve_seconds); } else if (mapping_was_empty) { - // To ensure there is at least one mapping, we need a non-zero sequence - // number. Outside of DB::Open, we have to be careful. - versions_->EnsureNonZeroSequence(); - assert(GetLatestSequenceNumber() > 0); - - // Ensure at least one mapping (or log a warning) - RecordSeqnoToTimeMapping(/*populate_historical_seconds=*/0); + if (!last_seqno_zero) { + // Ensure at least one mapping (or log a warning) + RecordSeqnoToTimeMapping(/*populate_historical_seconds=*/0); + } else { + // FIXME (see limitation described above) + } } s = periodic_task_scheduler_.Register( @@ -6493,7 +6516,7 @@ void DBImpl::RecordSeqnoToTimeMapping(uint64_t populate_historical_seconds) { assert(unix_time > populate_historical_seconds); } } else { - assert(seqno > 0); + // FIXME: assert(seqno > 0); appended = seqno_to_time_mapping_.Append(seqno, unix_time); } } diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index d8365c0d0..621177ef1 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1395,6 +1395,7 @@ class DBImpl : public DB { autovector> edit_lists_; // files_to_delete_ contains sst files std::unordered_set files_to_delete_; + bool is_new_db_ = false; }; // Persist options to options file. Must be holding options_mutex_. @@ -2168,7 +2169,7 @@ class DBImpl : public DB { // Cancel scheduled periodic tasks Status CancelPeriodicTaskScheduler(); - Status RegisterRecordSeqnoTimeWorker(bool from_db_open); + Status RegisterRecordSeqnoTimeWorker(bool is_new_db); void PrintStatistics(); diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index b38eddf1a..5fd4d8f59 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -418,7 +418,8 @@ Status DBImpl::Recover( uint64_t* recovered_seq, RecoveryContext* recovery_ctx) { mutex_.AssertHeld(); - bool is_new_db = false; + bool tmp_is_new_db = false; + bool& is_new_db = recovery_ctx ? recovery_ctx->is_new_db_ : tmp_is_new_db; assert(db_lock_ == nullptr); std::vector files_in_dbname; if (!read_only) { @@ -2247,7 +2248,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, s = impl->StartPeriodicTaskScheduler(); } if (s.ok()) { - s = impl->RegisterRecordSeqnoTimeWorker(/*from_db_open=*/true); + s = impl->RegisterRecordSeqnoTimeWorker(recovery_ctx.is_new_db_); } impl->options_mutex_.Unlock(); if (!s.ok()) { diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc index 64f9b53de..199c59c9b 100644 --- a/db/seqno_time_test.cc +++ b/db/seqno_time_test.cc @@ -843,6 +843,10 @@ TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) { // interfere with the seqno-to-time mapping getting a starting entry. ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); + } else { + // FIXME: currently, starting entry after CreateColumnFamily requires + // non-zero seqno + ASSERT_OK(Delete("blah")); } // Unfortunately, if we add a CF with preserve/preclude option after @@ -899,6 +903,9 @@ TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) { } sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); ASSERT_EQ(sttm.Size(), 0); + if (!with_write) { + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0); + } ASSERT_OK(ReadOnlyReopen(track_options)); if (with_write) { @@ -906,6 +913,16 @@ TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) { } sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); ASSERT_EQ(sttm.Size(), 0); + if (!with_write) { + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0); + + // And even if we re-open read-write, we do not get pre-population, + // because that's only for new DBs. + Reopen(track_options); + sttm = dbfull()->TEST_GetSeqnoToTimeMapping(); + ASSERT_EQ(sttm.Size(), 0); + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0); + } } // #### DB#5: Destroy and open with preserve/preclude option #### @@ -987,6 +1004,13 @@ TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) { // Oldest tracking time maps to first pre-allocated seqno ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1); + // Even after no writes and DB re-open without tracking options, sequence + // numbers should not go backward into those that were pre-allocated. + // (Future work: persist the mapping) + ReopenWithColumnFamilies({"default", "one"}, + List({base_options, base_options})); + ASSERT_EQ(latest_seqno, db_->GetLatestSequenceNumber()); + Close(); } diff --git a/db/version_set.cc b/db/version_set.cc index 95df9fb8d..482b4c90d 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -7234,20 +7234,6 @@ Status VersionSet::VerifyFileMetadata(const ReadOptions& read_options, return status; } -void VersionSet::EnsureNonZeroSequence() { - uint64_t expected = 0; - // Update each from 0->1, in order, or abort if any becomes non-zero in - // parallel - if (last_allocated_sequence_.compare_exchange_strong(expected, 1)) { - if (last_published_sequence_.compare_exchange_strong(expected, 1)) { - (void)last_sequence_.compare_exchange_strong(expected, 1); - } - } - assert(last_allocated_sequence_.load() > 0); - assert(last_published_sequence_.load() > 0); - assert(last_sequence_.load() > 0); -} - ReactiveVersionSet::ReactiveVersionSet( const std::string& dbname, const ImmutableDBOptions* _db_options, const FileOptions& _file_options, Cache* table_cache, diff --git a/db/version_set.h b/db/version_set.h index bfc63e351..55bce41e9 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1344,9 +1344,6 @@ class VersionSet { last_allocated_sequence_.store(s, std::memory_order_seq_cst); } - // Allocate a dummy sequence number as needed to ensure last is non-zero. - void EnsureNonZeroSequence(); - // Note: memory_order_release must be sufficient uint64_t FetchAddLastAllocatedSequence(uint64_t s) { return last_allocated_sequence_.fetch_add(s, std::memory_order_seq_cst); From 4d9f9733b24cf7387b2beb2c00b2e7273e8412f3 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 23 Oct 2023 12:23:13 -0700 Subject: [PATCH 225/386] Disable AutoHCC in crash test (#12000) Summary: ... until I can reproduce and resolve assertion failures (mostly in PurgeImplLocked) seen in crash test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12000 Test Plan: make blackbox_crash_test Reviewed By: hx235 Differential Revision: D50565984 Pulled By: pdillinger fbshipit-source-id: 5eea1638ff2683c41b4f65ee1ffc2398071911e7 --- tools/db_crashtest.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 9bdb5a386..c9374f59c 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -125,10 +125,9 @@ "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1), "mock_direct_io": False, "cache_type": lambda: random.choice( - ["lru_cache", "fixed_hyper_clock_cache", "auto_hyper_clock_cache", - "auto_hyper_clock_cache", "tiered_lru_cache", - "tiered_fixed_hyper_clock_cache", "tiered_auto_hyper_clock_cache", - "tiered_auto_hyper_clock_cache"] + ["lru_cache", "fixed_hyper_clock_cache", + # NOTE: auto_hyper_clock_cache disabled for now + "tiered_lru_cache", "tiered_fixed_hyper_clock_cache"] ), "use_full_merge_v1": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1), From e81393e81eb760370abcdfd49f3508d54133cbbc Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 23 Oct 2023 14:42:44 -0700 Subject: [PATCH 226/386] Add some stats to observe the usefulness of scan prefetching (#11981) Summary: Add stats for better observability of scan prefetching. Its only implemented for sync scan right now. These stats can help inform future improvements in scan prefetching. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11981 Test Plan: Add a new unit test Reviewed By: akankshamahajan15 Differential Revision: D50516505 Pulled By: anand1976 fbshipit-source-id: cb1cc6cf02df8295930a49c62b11870020df3f97 --- file/file_prefetch_buffer.cc | 11 ++++++ file/file_prefetch_buffer.h | 1 + file/prefetch_test.cc | 39 +++++++++++++++++++ include/rocksdb/statistics.h | 9 +++++ java/rocksjni/portal.h | 12 ++++++ .../src/main/java/org/rocksdb/TickerType.java | 6 +++ monitoring/statistics.cc | 3 ++ table/block_based/block_based_table_reader.h | 13 ++++--- table/block_based/block_prefetcher.cc | 12 ++++-- table/block_based/partitioned_filter_block.cc | 3 +- table/block_based/partitioned_index_reader.cc | 3 +- 11 files changed, 100 insertions(+), 12 deletions(-) diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 70338d5b5..da4a1d0b9 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -98,6 +98,9 @@ Status FilePrefetchBuffer::Read(const IOOptions& opts, return s; } + if (usage_ == FilePrefetchBufferUsage::kUserScanPrefetch) { + RecordTick(stats_, PREFETCH_BYTES, read_len); + } // Update the buffer offset and size. bufs_[index].offset_ = rounddown_start; bufs_[index].buffer_.Size(static_cast(chunk_len) + result.size()); @@ -653,6 +656,11 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( if (for_compaction) { s = Prefetch(opts, reader, offset, std::max(n, readahead_size_)); } else { + if (IsOffsetInBuffer(offset, curr_)) { + RecordTick(stats_, PREFETCH_BYTES_USEFUL, + bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize() - + offset); + } if (implicit_auto_readahead_) { if (!IsEligibleForPrefetch(offset, n)) { // Ignore status as Prefetch is not called. @@ -676,6 +684,9 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( } else { return false; } + } else if (!for_compaction) { + RecordTick(stats_, PREFETCH_HITS); + RecordTick(stats_, PREFETCH_BYTES_USEFUL, n); } UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/); diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index 2be6b9f8a..d71b28ab8 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -56,6 +56,7 @@ struct BufferInfo { enum class FilePrefetchBufferUsage { kTableOpenPrefetchTail, + kUserScanPrefetch, kUnknown, }; diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 45cda33d6..84932440a 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -3453,6 +3453,45 @@ TEST_F(FilePrefetchBufferTest, IterateUpperBoundTest1) { ASSERT_EQ(result, async_result); } +TEST_F(FilePrefetchBufferTest, SyncReadaheadStats) { + std::string fname = "seek-with-block-cache-hit"; + Random rand(0); + std::string content = rand.RandomString(32768); + Write(fname, content); + + FileOptions opts; + std::unique_ptr r; + Read(fname, opts, &r); + + std::shared_ptr stats = CreateDBStatistics(); + FilePrefetchBuffer fpb(8192, 8192, true, false, false, 0, 0, 0, fs(), nullptr, + stats.get()); + Slice result; + // Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings, + // it will do two reads of 4096+8192 and 8192 + Status s; + ASSERT_TRUE(fpb.TryReadFromCache(IOOptions(), r.get(), 0, 4096, &result, &s)); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(stats->getTickerCount(PREFETCH_HITS), 0); + ASSERT_EQ(stats->getTickerCount(PREFETCH_BYTES_USEFUL), 0); + + // Simulate a block cache hit + fpb.UpdateReadPattern(4096, 4096, false); + // Now read some data that straddles the two prefetch buffers - offset 8192 to + // 16384 + ASSERT_TRUE( + fpb.TryReadFromCache(IOOptions(), r.get(), 8192, 8192, &result, &s)); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(stats->getTickerCount(PREFETCH_HITS), 0); + ASSERT_EQ(stats->getTickerCount(PREFETCH_BYTES_USEFUL), 4096); + + ASSERT_TRUE( + fpb.TryReadFromCache(IOOptions(), r.get(), 12288, 4096, &result, &s)); + ASSERT_EQ(s, Status::OK()); + ASSERT_EQ(stats->getTickerCount(PREFETCH_HITS), 1); + ASSERT_EQ(stats->getTickerCount(PREFETCH_BYTES_USEFUL), 8192); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index 9d81e435f..ecddf5c7a 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -522,6 +522,15 @@ enum Tickers : uint32_t { FIFO_MAX_SIZE_COMPACTIONS, FIFO_TTL_COMPACTIONS, + // Number of bytes prefetched during user initiated scan + PREFETCH_BYTES, + + // Number of prefetched bytes that were actually useful + PREFETCH_BYTES_USEFUL, + + // Number of FS reads avoided due to scan prefetching + PREFETCH_HITS, + TICKER_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 5f3d65ac5..840956dae 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5169,6 +5169,12 @@ class TickerTypeJni { return -0x3E; case ROCKSDB_NAMESPACE::Tickers::FIFO_TTL_COMPACTIONS: return -0x3F; + case ROCKSDB_NAMESPACE::Tickers::PREFETCH_BYTES: + return -0x40; + case ROCKSDB_NAMESPACE::Tickers::PREFETCH_BYTES_USEFUL: + return -0x41; + case ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS: + return -0x42; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep @@ -5538,6 +5544,12 @@ class TickerTypeJni { return ROCKSDB_NAMESPACE::Tickers::FIFO_MAX_SIZE_COMPACTIONS; case -0x3F: return ROCKSDB_NAMESPACE::Tickers::FIFO_TTL_COMPACTIONS; + case -0x40: + return ROCKSDB_NAMESPACE::Tickers::PREFETCH_BYTES; + case -0x41: + return ROCKSDB_NAMESPACE::Tickers::PREFETCH_BYTES_USEFUL; + case -0x42: + return ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS; case 0x5F: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/rocksdb/TickerType.java index a718dfa15..f2ca42776 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/rocksdb/TickerType.java @@ -770,6 +770,12 @@ public enum TickerType { FIFO_TTL_COMPACTIONS((byte) -0x3F), + PREFETCH_BYTES((byte) -0x40), + + PREFETCH_BYTES_USEFUL((byte) -0x41), + + PREFETCH_HITS((byte) -0x42), + TICKER_ENUM_MAX((byte) 0x5F); private final byte value; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 5aede1df1..ebfd44300 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -261,6 +261,9 @@ const std::vector> TickersNameMap = { {READAHEAD_TRIMMED, "rocksdb.readahead.trimmed"}, {FIFO_MAX_SIZE_COMPACTIONS, "rocksdb.fifo.max.size.compactions"}, {FIFO_TTL_COMPACTIONS, "rocksdb.fifo.ttl.compactions"}, + {PREFETCH_BYTES, "rocksdb.prefetch.bytes"}, + {PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"}, + {PREFETCH_HITS, "rocksdb.prefetch.hits"}, }; const std::vector> HistogramsNameMap = { diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index ed6af9b33..22361b505 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -697,14 +697,15 @@ struct BlockBasedTable::Rep { std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, uint64_t upper_bound_offset, - const std::function& readaheadsize_cb) - const { + const std::function& readaheadsize_cb, + FilePrefetchBufferUsage usage) const { fpb->reset(new FilePrefetchBuffer( readahead_size, max_readahead_size, !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */, implicit_auto_readahead, num_file_reads, num_file_reads_for_auto_readahead, upper_bound_offset, - ioptions.fs.get(), ioptions.clock, ioptions.stats, readaheadsize_cb)); + ioptions.fs.get(), ioptions.clock, ioptions.stats, readaheadsize_cb, + usage)); } void CreateFilePrefetchBufferIfNotExists( @@ -712,13 +713,13 @@ struct BlockBasedTable::Rep { std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, uint64_t upper_bound_offset, - const std::function& readaheadsize_cb) - const { + const std::function& readaheadsize_cb, + FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) const { if (!(*fpb)) { CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb, implicit_auto_readahead, num_file_reads, num_file_reads_for_auto_readahead, - upper_bound_offset, readaheadsize_cb); + upper_bound_offset, readaheadsize_cb, usage); } } diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index db2d546f6..54848b785 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -58,7 +58,8 @@ void BlockPrefetcher::PrefetchIfNeeded( readahead_size, readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, upper_bound_offset_, - readaheadsize_cb); + readaheadsize_cb, + /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } @@ -83,7 +84,8 @@ void BlockPrefetcher::PrefetchIfNeeded( &prefetch_buffer_, /*implicit_auto_readahead=*/true, /*num_file_reads=*/0, rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_, readaheadsize_cb); + upper_bound_offset_, readaheadsize_cb, + /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } @@ -114,7 +116,8 @@ void BlockPrefetcher::PrefetchIfNeeded( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_, readaheadsize_cb); + upper_bound_offset_, readaheadsize_cb, + /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } @@ -136,7 +139,8 @@ void BlockPrefetcher::PrefetchIfNeeded( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_, readaheadsize_cb); + upper_bound_offset_, readaheadsize_cb, + /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 9c0a49660..c908db41d 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -498,7 +498,8 @@ Status PartitionedFilterBlockReader::CacheDependencies( rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /* Implicit autoreadahead */, 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, - /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr); + /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr, + /*usage=*/FilePrefetchBufferUsage::kUnknown); IOOptions opts; s = rep->file->PrepareIOOptions(ro, opts); diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index 9f3f339a1..f82590718 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -170,7 +170,8 @@ Status PartitionIndexReader::CacheDependencies( rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /*Implicit auto readahead*/, 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, - /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr); + /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr, + /*usage=*/FilePrefetchBufferUsage::kUnknown); IOOptions opts; { Status s = rep->file->PrepareIOOptions(ro, opts); From 519f2a41fb76e5644c63e4e588addb3b88b36580 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 23 Oct 2023 15:12:36 -0700 Subject: [PATCH 227/386] Add cache_bench to buck build (#11990) Summary: as title Pull Request resolved: https://github.com/facebook/rocksdb/pull/11990 Test Plan: buck build in fbcode Reviewed By: hx235 Differential Revision: D50502851 Pulled By: pdillinger fbshipit-source-id: b046e4d8b90f1496e5a134faf2b936dec10922de --- TARGETS | 2 ++ buckifier/buckify_rocksdb.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/TARGETS b/TARGETS index f56cceb40..c53ed156e 100644 --- a/TARGETS +++ b/TARGETS @@ -406,6 +406,8 @@ rocks_cpp_library_wrapper(name="rocksdb_stress_lib", srcs=[ cpp_binary_wrapper(name="db_stress", srcs=["db_stress_tool/db_stress.cc"], deps=[":rocksdb_stress_lib"], extra_preprocessor_flags=[], extra_bench_libs=False) +cpp_binary_wrapper(name="cache_bench", srcs=["cache/cache_bench.cc"], deps=[":rocksdb_cache_bench_tools_lib"], extra_preprocessor_flags=[], extra_bench_libs=False) + cpp_binary_wrapper(name="ribbon_bench", srcs=["microbench/ribbon_bench.cc"], deps=[], extra_preprocessor_flags=[], extra_bench_libs=True) cpp_binary_wrapper(name="db_basic_bench", srcs=["microbench/db_basic_bench.cc"], deps=[], extra_preprocessor_flags=[], extra_bench_libs=True) diff --git a/buckifier/buckify_rocksdb.py b/buckifier/buckify_rocksdb.py index a9e7b447d..0172acf9b 100755 --- a/buckifier/buckify_rocksdb.py +++ b/buckifier/buckify_rocksdb.py @@ -197,6 +197,10 @@ def generate_targets(repo_path, deps_map): TARGETS.add_binary( "db_stress", ["db_stress_tool/db_stress.cc"], [":rocksdb_stress_lib"] ) + # cache_bench binary + TARGETS.add_binary( + "cache_bench", ["cache/cache_bench.cc"], [":rocksdb_cache_bench_tools_lib"] + ) # bench binaries for src in src_mk.get("MICROBENCH_SOURCES", []): name = src.rsplit("/", 1)[1].split(".")[0] if "/" in src else src.split(".")[0] From 99b371b417bb76aaf8f1da56dc361e22e0c51182 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 24 Oct 2023 09:58:02 -0700 Subject: [PATCH 228/386] Skip subsequent trace writes after encountering trace write failure (#11996) Summary: **Context/Summary:** We ignore trace writing status e.g, https://github.com/facebook/rocksdb/blob/543191f2eacadf14e3aa6ff9a08f85a8ad82da95/db/db_impl/db_impl_write.cc#L221-L222 If a write into the trace file fails, subsequent trace write will continue onto the same file. This will trigger the assertion `assert(sync_without_flush_called_)` intended to catch write to a file that has previously seen error, added in https://github.com/facebook/rocksdb/pull/10489, https://github.com/facebook/rocksdb/pull/10555 Alternative (rejected) is to handle trace writing status at a higher level at e.g, https://github.com/facebook/rocksdb/blob/543191f2eacadf14e3aa6ff9a08f85a8ad82da95/db/db_impl/db_impl_write.cc#L221-L222. However, it makes sense to ignore such status considering tracing is not a critical but assistant component to db operation. And this alternative requires more code change. So it's better to handle the failure at a lower level as this PR Pull Request resolved: https://github.com/facebook/rocksdb/pull/11996 Test Plan: Add new UT failed before this PR and pass after Reviewed By: akankshamahajan15 Differential Revision: D50532467 Pulled By: hx235 fbshipit-source-id: f2032abafd94917adbf89a20841d15b448782a33 --- tools/trace_analyzer_test.cc | 44 +++++++++++++++++++ trace_replay/trace_replay.cc | 14 +++++- trace_replay/trace_replay.h | 1 + .../behavior_changes/skip_trace_write.md | 1 + 4 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 unreleased_history/behavior_changes/skip_trace_write.md diff --git a/tools/trace_analyzer_test.cc b/tools/trace_analyzer_test.cc index 81dc4f2cc..e7d090eb2 100644 --- a/tools/trace_analyzer_test.cc +++ b/tools/trace_analyzer_test.cc @@ -31,6 +31,8 @@ int main() { #include "test_util/testutil.h" #include "tools/trace_analyzer_tool.h" #include "trace_replay/trace_replay.h" +#include "utilities/fault_injection_env.h" +#include "utilities/trace/file_trace_reader_writer.h" namespace ROCKSDB_NAMESPACE { @@ -785,6 +787,48 @@ TEST_F(TraceAnalyzerTest, Iterator) { */ } +TEST_F(TraceAnalyzerTest, ExistsPreviousTraceWriteError) { + DB* db_ = nullptr; + Options options; + options.create_if_missing = true; + + std::unique_ptr fault_env( + new FaultInjectionTestEnv(env_)); + const std::string trace_path = + test_path_ + "/previous_trace_write_error_trace"; + std::unique_ptr trace_writer; + ASSERT_OK(NewFileTraceWriter(fault_env.get(), env_options_, trace_path, + &trace_writer)); + + ASSERT_OK(DB::Open(options, dbname_, &db_)); + ASSERT_OK(db_->StartTrace(TraceOptions(), std::move(trace_writer))); + + // Inject write error on the first trace write. + // This trace write is made big enough to actually write to FS for error + // injection. + const std::string kBigKey(1000000, 'k'); + const std::string kBigValue(1000000, 'v'); + fault_env->SetFilesystemActive(false, Status::IOError("Injected")); + + ASSERT_OK(db_->Put(WriteOptions(), kBigKey, kBigValue)); + + fault_env->SetFilesystemActive(true); + + // Without proper handling of the previous trace write error, + // this trace write will continue and crash the db (in DEBUG_LEVEL > 0) + // due to writing to the trace file that has seen error. + ASSERT_OK(db_->Put(WriteOptions(), kBigKey, kBigValue)); + + // Verify `EndTrace()` returns the previous write trace error if any + Status s = db_->EndTrace(); + ASSERT_TRUE(s.IsIncomplete()); + ASSERT_TRUE(s.ToString().find("Tracing has seen error") != std::string::npos); + ASSERT_TRUE(s.ToString().find("Injected") != std::string::npos); + + delete db_; + ASSERT_OK(DestroyDB(dbname_, options)); +} + // Test analyzing of multiget TEST_F(TraceAnalyzerTest, MultiGet) { std::string trace_path = test_path_ + "/trace"; diff --git a/trace_replay/trace_replay.cc b/trace_replay/trace_replay.cc index c681e374c..126a8e248 100644 --- a/trace_replay/trace_replay.cc +++ b/trace_replay/trace_replay.cc @@ -345,7 +345,8 @@ Tracer::Tracer(SystemClock* clock, const TraceOptions& trace_options, : clock_(clock), trace_options_(trace_options), trace_writer_(std::move(trace_writer)), - trace_request_count_(0) { + trace_request_count_(0), + trace_write_status_(Status::OK()) { // TODO: What if this fails? WriteHeader().PermitUncheckedError(); } @@ -612,9 +613,18 @@ Status Tracer::WriteFooter() { } Status Tracer::WriteTrace(const Trace& trace) { + if (!trace_write_status_.ok()) { + return Status::Incomplete("Tracing has seen error: %s", + trace_write_status_.ToString()); + } + assert(trace_write_status_.ok()); std::string encoded_trace; TracerHelper::EncodeTrace(trace, &encoded_trace); - return trace_writer_->Write(Slice(encoded_trace)); + Status s = trace_writer_->Write(Slice(encoded_trace)); + if (!s.ok()) { + trace_write_status_ = s; + } + return s; } Status Tracer::Close() { return WriteFooter(); } diff --git a/trace_replay/trace_replay.h b/trace_replay/trace_replay.h index 9aba5ceb7..55908dcb7 100644 --- a/trace_replay/trace_replay.h +++ b/trace_replay/trace_replay.h @@ -178,6 +178,7 @@ class Tracer { TraceOptions trace_options_; std::unique_ptr trace_writer_; uint64_t trace_request_count_; + Status trace_write_status_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/behavior_changes/skip_trace_write.md b/unreleased_history/behavior_changes/skip_trace_write.md new file mode 100644 index 000000000..55ee8edae --- /dev/null +++ b/unreleased_history/behavior_changes/skip_trace_write.md @@ -0,0 +1 @@ +If an error occurs when writing to a trace file after `DB::StartTrace()`, the subsequent trace writes are skipped to avoid writing to a file that has previously seen error. In this case, `DB::EndTrace()` will also return a non-ok status with info about the error occured previously in its status message. From 917fd87513545aadfc3cf4091afd9c36b0464567 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan Date: Tue, 24 Oct 2023 11:46:18 -0700 Subject: [PATCH 229/386] Error out in case of std errors in blackbox test and export file in TARGETS Summary: - Right now in blackbox test we don't exit if there are std::error as we do in whitebox crash tests. As result those errors are swallowed. It only errors out if state is unexpected. One example that was noticed in blackbox crash test - ``` stderr has error message: ***Error restoring historical expected values: Corruption: DB is older than any restorable expected state*** Running db_stress with pid=30454: /packages/rocksdb_db_stress_internal_repo/rocks_db_stress .... ``` - This diff also provided support to export files - db_crashtest.py file to be used by different repo. Reviewed By: ajkr Differential Revision: D50564889 fbshipit-source-id: 7bafbbc6179dc79467ca2b680fe83afc7850616a --- TARGETS | 2 ++ buckifier/buckify_rocksdb.py | 1 + buckifier/targets_builder.py | 6 ++++++ buckifier/targets_cfg.py | 4 ++++ tools/db_crashtest.py | 22 ++++++++++++++++++++++ 5 files changed, 35 insertions(+) diff --git a/TARGETS b/TARGETS index c53ed156e..6f0efa400 100644 --- a/TARGETS +++ b/TARGETS @@ -5620,3 +5620,5 @@ cpp_unittest_wrapper(name="write_unprepared_transaction_test", deps=[":rocksdb_test_lib"], extra_compiler_flags=[]) + +export_file(name = "tools/db_crashtest.py") diff --git a/buckifier/buckify_rocksdb.py b/buckifier/buckify_rocksdb.py index 0172acf9b..b56e1a82d 100755 --- a/buckifier/buckify_rocksdb.py +++ b/buckifier/buckify_rocksdb.py @@ -303,6 +303,7 @@ def generate_targets(repo_path, deps_map): deps=json.dumps(deps["extra_deps"] + [":rocksdb_test_lib"]), extra_compiler_flags=json.dumps(deps["extra_compiler_flags"]), ) + TARGETS.export_file("tools/db_crashtest.py") print(ColorString.info("Generated TARGETS Summary:")) print(ColorString.info("- %d libs" % TARGETS.total_lib)) diff --git a/buckifier/targets_builder.py b/buckifier/targets_builder.py index 94dbd3653..f5d727469 100644 --- a/buckifier/targets_builder.py +++ b/buckifier/targets_builder.py @@ -148,3 +148,9 @@ def register_test(self, test_name, src, deps, extra_compiler_flags): ).encode("utf-8") ) self.total_test = self.total_test + 1 + + def export_file(self, name): + with open(self.path, "a") as targets_file: + targets_file.write( + targets_cfg.export_file_template.format(name=name) + ) diff --git a/buckifier/targets_cfg.py b/buckifier/targets_cfg.py index 66dd173c2..ead6ac51a 100644 --- a/buckifier/targets_cfg.py +++ b/buckifier/targets_cfg.py @@ -37,3 +37,7 @@ fancy_bench_wrapper(suite_name="{name}", binary_to_bench_to_metric_list_map={bench_config}, slow={slow}, expected_runtime={expected_runtime}, sl_iterations={sl_iterations}, regression_threshold={regression_threshold}) """ + +export_file_template = """ +export_file(name = "{name}") +""" diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index c9374f59c..c09bc4d65 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -846,6 +846,17 @@ def blackbox_crash_main(args, unknown_args): print("stderr has error message:") print("***" + line + "***") + stderrdata = errs.lower() + errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") + print("#times error occurred in output is " + str(errorcount) + "\n") + + if errorcount > 0: + print("TEST FAILED. Output has 'error'!!!\n") + sys.exit(2) + if stderrdata.find("fail") >= 0: + print("TEST FAILED. Output has 'fail'!!!\n") + sys.exit(2) + time.sleep(1) # time to stabilize before the next run time.sleep(1) # time to stabilize before the next run @@ -869,6 +880,17 @@ def blackbox_crash_main(args, unknown_args): print("stderr has error message:") print("***" + line + "***") + stderrdata = errs.lower() + errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") + print("#times error occurred in output is " + str(errorcount) + "\n") + + if errorcount > 0: + print("TEST FAILED. Output has 'error'!!!\n") + sys.exit(2) + if stderrdata.find("fail") >= 0: + print("TEST FAILED. Output has 'fail'!!!\n") + sys.exit(2) + # we need to clean up after ourselves -- only do this on test success shutil.rmtree(dbname, True) From ab15d3356687ea8cfb1bd579a17c92877f26911a Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Tue, 24 Oct 2023 12:03:07 -0700 Subject: [PATCH 230/386] Update history, version and format testing for 8.8 (#12004) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12004 Reviewed By: cbi42 Differential Revision: D50586984 Pulled By: hx235 fbshipit-source-id: 1480a8c2757340ebf83510557104aaa0e437b3ae --- HISTORY.md | 25 +++++++++++++++++++ include/rocksdb/version.h | 2 +- tools/check_format_compatible.sh | 2 +- ...s_prefetch_on_zero_compaction_readahead.md | 1 - .../stopped_writes_wait_for_recovery.md | 1 - .../bug_fixes/auto_readahead.md | 1 - .../bug_fixes/fallback_only_unsupported.md | 1 - ...eDeltaIterator_and_Write(Un)PreparedTxn.md | 1 - .../max_successive_merges_wide_columns.md | 1 - unreleased_history/bug_fixes/options_race.md | 1 - .../sec_cache_reservation_underflow.md | 1 - .../new_features/attribute_group_multiget.md | 1 - .../new_features/fifo_drop_file_new_stats.md | 1 - .../new_features/offpeak_db_option.md | 1 - .../new_features/runtime_burst_bytes.md | 1 - .../options_files_on_open.md | 1 - ...il_if_options_file_error_default_change.md | 1 - 17 files changed, 27 insertions(+), 16 deletions(-) delete mode 100644 unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md delete mode 100644 unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md delete mode 100644 unreleased_history/bug_fixes/auto_readahead.md delete mode 100644 unreleased_history/bug_fixes/fallback_only_unsupported.md delete mode 100644 unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md delete mode 100644 unreleased_history/bug_fixes/max_successive_merges_wide_columns.md delete mode 100644 unreleased_history/bug_fixes/options_race.md delete mode 100644 unreleased_history/bug_fixes/sec_cache_reservation_underflow.md delete mode 100644 unreleased_history/new_features/attribute_group_multiget.md delete mode 100644 unreleased_history/new_features/fifo_drop_file_new_stats.md delete mode 100644 unreleased_history/new_features/offpeak_db_option.md delete mode 100644 unreleased_history/new_features/runtime_burst_bytes.md delete mode 100644 unreleased_history/performance_improvements/options_files_on_open.md delete mode 100644 unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md diff --git a/HISTORY.md b/HISTORY.md index a4f3b69da..b32a3dd7f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,31 @@ # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` +## 8.8.0 (10/23/2023) +### New Features +* Introduce AttributeGroup by adding the first AttributeGroup support API, MultiGetEntity(). Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. More APIs to support AttributeGroup will come soon, including GetEntity, PutEntity, and others. +* Added new tickers `rocksdb.fifo.{max.size|ttl}.compactions` to count FIFO compactions that drop files for different reasons +* Add an experimental offpeak duration awareness by setting `DBOptions::daily_offpeak_time_utc` in "HH:mm-HH:mm" format. This information will be used for resource optimization in the future +* Users can now change the max bytes granted in a single refill period (i.e, burst) during runtime by `SetSingleBurstBytes()` for RocksDB rate limiter + +### Public API Changes +* The default value of `DBOptions::fail_if_options_file_error` changed from `false` to `true`. Operations that set in-memory options (e.g., `DB::Open*()`, `DB::SetOptions()`, `DB::CreateColumnFamily*()`, and `DB::DropColumnFamily()`) but fail to persist the change will now return a non-OK `Status` by default. + +### Behavior Changes +* For non direct IO, eliminate the file system prefetching attempt for compaction read when `Options::compaction_readahead_size` is 0 +* During a write stop, writes now block on in-progress recovery attempts + +### Bug Fixes +* Fix a bug in auto_readahead_size where first_internal_key of index blocks wasn't copied properly resulting in corruption error when first_internal_key was used for comparison. +* Fixed a bug where compaction read under non direct IO still falls back to RocksDB internal prefetching after file system's prefetching returns non-OK status other than `Status::NotSupported()` +* Add bounds check in WBWIIteratorImpl and make BaseDeltaIterator, WriteUnpreparedTxn and WritePreparedTxn respect the upper bound and lower bound in ReadOption. See 11680. +* Fixed the handling of wide-column base values in the `max_successive_merges` logic. +* Fixed a rare race bug involving a concurrent combination of Create/DropColumnFamily and/or Set(DB)Options that could lead to inconsistency between (a) the DB's reported options state, (b) the DB options in effect, and (c) the latest persisted OPTIONS file. +* Fixed a possible underflow when computing the compressed secondary cache share of memory reservations while updating the compressed secondary to total block cache ratio. + +### Performance Improvements +* Improved the I/O efficiency of DB::Open a new DB with `create_missing_column_families=true` and many column families. + ## 8.7.0 (09/22/2023) ### New Features * Added an experimental new "automatic" variant of HyperClockCache that does not require a prior estimate of the average size of cache entries. This variant is activated when HyperClockCacheOptions::estimated\_entry\_charge = 0 and has essentially the same concurrency benefits as the existing HyperClockCache. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index cecbb7c7b..139058155 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -12,7 +12,7 @@ // NOTE: in 'main' development branch, this should be the *next* // minor or major version number planned for release. #define ROCKSDB_MAJOR 8 -#define ROCKSDB_MINOR 8 +#define ROCKSDB_MINOR 9 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with diff --git a/tools/check_format_compatible.sh b/tools/check_format_compatible.sh index 586668096..e4771706f 100755 --- a/tools/check_format_compatible.sh +++ b/tools/check_format_compatible.sh @@ -125,7 +125,7 @@ EOF # To check for DB forward compatibility with loading options (old version # reading data from new), as well as backward compatibility -declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb" "8.7.fb") +declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb" "8.7.fb" "8.8.fb") # To check for DB forward compatibility without loading options (in addition # to the "with loading options" set), as well as backward compatibility declare -a db_forward_no_options_refs=() # N/A at the moment diff --git a/unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md b/unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md deleted file mode 100644 index e09f693ef..000000000 --- a/unreleased_history/behavior_changes/no_fs_prefetch_on_zero_compaction_readahead.md +++ /dev/null @@ -1 +0,0 @@ -For non direct IO, eliminate the file system prefetching attempt for compaction read when `Options::compaction_readahead_size` is 0 diff --git a/unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md b/unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md deleted file mode 100644 index 2c44d5572..000000000 --- a/unreleased_history/behavior_changes/stopped_writes_wait_for_recovery.md +++ /dev/null @@ -1 +0,0 @@ -* During a write stop, writes now block on in-progress recovery attempts diff --git a/unreleased_history/bug_fixes/auto_readahead.md b/unreleased_history/bug_fixes/auto_readahead.md deleted file mode 100644 index a6b009fde..000000000 --- a/unreleased_history/bug_fixes/auto_readahead.md +++ /dev/null @@ -1 +0,0 @@ -Fix a bug in auto_readahead_size where first_internal_key of index blocks wasn't copied properly resulting in corruption error when first_internal_key was used for comparison. diff --git a/unreleased_history/bug_fixes/fallback_only_unsupported.md b/unreleased_history/bug_fixes/fallback_only_unsupported.md deleted file mode 100644 index feb02ce3b..000000000 --- a/unreleased_history/bug_fixes/fallback_only_unsupported.md +++ /dev/null @@ -1 +0,0 @@ -Fixed a bug where compaction read under non direct IO still falls back to RocksDB internal prefetching after file system's prefetching returns non-OK status other than `Status::NotSupported()` diff --git a/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md b/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md deleted file mode 100644 index 237351f87..000000000 --- a/unreleased_history/bug_fixes/fix_bounds_check_in_BaseDeltaIterator_and_Write(Un)PreparedTxn.md +++ /dev/null @@ -1 +0,0 @@ -Add bounds check in WBWIIteratorImpl and make BaseDeltaIterator, WriteUnpreparedTxn and WritePreparedTxn respect the upper bound and lower bound in ReadOption. See 11680. \ No newline at end of file diff --git a/unreleased_history/bug_fixes/max_successive_merges_wide_columns.md b/unreleased_history/bug_fixes/max_successive_merges_wide_columns.md deleted file mode 100644 index d24b6cf30..000000000 --- a/unreleased_history/bug_fixes/max_successive_merges_wide_columns.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the handling of wide-column base values in the `max_successive_merges` logic. diff --git a/unreleased_history/bug_fixes/options_race.md b/unreleased_history/bug_fixes/options_race.md deleted file mode 100644 index 42b89b8ea..000000000 --- a/unreleased_history/bug_fixes/options_race.md +++ /dev/null @@ -1 +0,0 @@ -* Fixed a rare race bug involving a concurrent combination of Create/DropColumnFamily and/or Set(DB)Options that could lead to inconsistency between (a) the DB's reported options state, (b) the DB options in effect, and (c) the latest persisted OPTIONS file. diff --git a/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md b/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md deleted file mode 100644 index 571100a3e..000000000 --- a/unreleased_history/bug_fixes/sec_cache_reservation_underflow.md +++ /dev/null @@ -1 +0,0 @@ -Fixed a possible underflow when computing the compressed secondary cache share of memory reservations while updating the compressed secondary to total block cache ratio. diff --git a/unreleased_history/new_features/attribute_group_multiget.md b/unreleased_history/new_features/attribute_group_multiget.md deleted file mode 100644 index 4a7ae818f..000000000 --- a/unreleased_history/new_features/attribute_group_multiget.md +++ /dev/null @@ -1 +0,0 @@ -Introduce AttributeGroup by adding the first AttributeGroup support API, MultiGetEntity(). Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. More APIs to support AttributeGroup will come soon, including GetEntity, PutEntity, and others. diff --git a/unreleased_history/new_features/fifo_drop_file_new_stats.md b/unreleased_history/new_features/fifo_drop_file_new_stats.md deleted file mode 100644 index 30134b2c2..000000000 --- a/unreleased_history/new_features/fifo_drop_file_new_stats.md +++ /dev/null @@ -1 +0,0 @@ -Added new tickers `rocksdb.fifo.{max.size|ttl}.compactions` to count FIFO compactions that drop files for different reasons diff --git a/unreleased_history/new_features/offpeak_db_option.md b/unreleased_history/new_features/offpeak_db_option.md deleted file mode 100644 index bdc9b2a29..000000000 --- a/unreleased_history/new_features/offpeak_db_option.md +++ /dev/null @@ -1 +0,0 @@ -Add an experimental offpeak duration awareness by setting `DBOptions::daily_offpeak_time_utc` in "HH:mm-HH:mm" format. This information will be used for resource optimization in the future diff --git a/unreleased_history/new_features/runtime_burst_bytes.md b/unreleased_history/new_features/runtime_burst_bytes.md deleted file mode 100644 index 26f1a8c87..000000000 --- a/unreleased_history/new_features/runtime_burst_bytes.md +++ /dev/null @@ -1 +0,0 @@ -Users can now change the max bytes granted in a single refill period (i.e, burst) during runtime by `SetSingleBurstBytes()` for RocksDB rate limiter diff --git a/unreleased_history/performance_improvements/options_files_on_open.md b/unreleased_history/performance_improvements/options_files_on_open.md deleted file mode 100644 index e4ecf2949..000000000 --- a/unreleased_history/performance_improvements/options_files_on_open.md +++ /dev/null @@ -1 +0,0 @@ -Improved the I/O efficiency of DB::Open a new DB with `create_missing_column_families=true` and many column families. diff --git a/unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md b/unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md deleted file mode 100644 index 44e3bb507..000000000 --- a/unreleased_history/public_api_changes/fail_if_options_file_error_default_change.md +++ /dev/null @@ -1 +0,0 @@ -* The default value of `DBOptions::fail_if_options_file_error` changed from `false` to `true`. Operations that set in-memory options (e.g., `DB::Open*()`, `DB::SetOptions()`, `DB::CreateColumnFamily*()`, and `DB::DropColumnFamily()`) but fail to persist the change will now return a non-OK `Status` by default. From 0ff7665c953835da144621133ee1bc1f6250ebf0 Mon Sep 17 00:00:00 2001 From: Myth Date: Tue, 24 Oct 2023 14:41:46 -0700 Subject: [PATCH 231/386] Fix low priority write may cause crash when it is rate limited (#11932) Summary: Fixed https://github.com/facebook/rocksdb/issues/11902 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11932 Reviewed By: akankshamahajan15 Differential Revision: D50573356 Pulled By: hx235 fbshipit-source-id: adeb1abdc43b523b0357746055ce4a2eabde56a1 --- db/db_impl/db_impl_write.cc | 10 +++++++--- db/db_test2.cc | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index b3af38d21..77c1d55e7 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1956,9 +1956,13 @@ Status DBImpl::ThrottleLowPriWritesIfNeeded(const WriteOptions& write_options, // a chance to run. Now we guarantee we are still slowly making // progress. PERF_TIMER_GUARD(write_delay_time); - write_controller_.low_pri_rate_limiter()->Request( - my_batch->GetDataSize(), Env::IO_HIGH, nullptr /* stats */, - RateLimiter::OpType::kWrite); + auto data_size = my_batch->GetDataSize(); + while (data_size > 0) { + size_t allowed = write_controller_.low_pri_rate_limiter()->RequestToken( + data_size, 0 /* alignment */, Env::IO_HIGH, nullptr /* stats */, + RateLimiter::OpType::kWrite); + data_size -= allowed; + } } } return Status::OK(); diff --git a/db/db_test2.cc b/db/db_test2.cc index be25cf54e..fa9da48d2 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -3882,16 +3882,26 @@ TEST_F(DBTest2, LowPriWrite) { ASSERT_OK(Put("", "", wo)); ASSERT_EQ(1, rate_limit_count.load()); + wo.low_pri = true; + std::string big_value = std::string(1 * 1024 * 1024, 'x'); + ASSERT_OK(Put("", big_value, wo)); + ASSERT_LT(1, rate_limit_count.load()); + // Reset + rate_limit_count = 0; + wo.low_pri = false; + ASSERT_OK(Put("", big_value, wo)); + ASSERT_EQ(0, rate_limit_count.load()); + TEST_SYNC_POINT("DBTest.LowPriWrite:0"); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); ASSERT_OK(dbfull()->TEST_WaitForCompact()); wo.low_pri = true; ASSERT_OK(Put("", "", wo)); - ASSERT_EQ(1, rate_limit_count.load()); + ASSERT_EQ(0, rate_limit_count.load()); wo.low_pri = false; ASSERT_OK(Put("", "", wo)); - ASSERT_EQ(1, rate_limit_count.load()); + ASSERT_EQ(0, rate_limit_count.load()); } TEST_F(DBTest2, RateLimitedCompactionReads) { From dc87847e65449ef1cb6f787c5d753cbe8562bff1 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 24 Oct 2023 16:20:37 -0700 Subject: [PATCH 232/386] Fix windows build errors (rdtsc and fnptr) (#12008) Summary: Combining best parts of https://github.com/facebook/rocksdb/issues/11794 and https://github.com/facebook/rocksdb/issues/11766, fixing the CircleCI config in the latter. I was going to amend the latter but wasn't granted access. Ideally this would be ported back to 8.4 branch and crc32c part into 8.3 branch. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12008 Test Plan: CI Reviewed By: hx235 Differential Revision: D50616172 Pulled By: pdillinger fbshipit-source-id: fa7f778bc281e881a140522e774f480c6d1e5f48 --- .circleci/config.yml | 24 ++++++++++++++++++++++-- env/unique_id_gen.cc | 1 + util/crc32c.cc | 8 +++++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e6bd030e9..711e471c1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -69,7 +69,7 @@ commands: command: | mkdir build cd build - & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 .. + & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE="$Env:CMAKE_PORTABLE" -DSNAPPY=1 -DJNI=1 .. cd .. echo "Building with VS version: $Env:CMAKE_GENERATOR" msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64 @@ -563,6 +563,23 @@ jobs: no_output_timeout: 100m - post-steps + build-windows-vs2022-avx2: + executor: + name: win/server-2022 + size: 2xlarge + environment: + THIRDPARTY_HOME: C:/Users/circleci/thirdparty + CMAKE_HOME: C:/Program Files/CMake + CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe + CTEST_BIN: C:/Program Files/CMake/bin/ctest.exe + SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 + SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build + SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib + CMAKE_GENERATOR: Visual Studio 17 2022 + CMAKE_PORTABLE: AVX2 + steps: + - windows-build-steps + build-windows-vs2022: executor: name: win/server-2022 @@ -576,6 +593,7 @@ jobs: SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib CMAKE_GENERATOR: Visual Studio 17 2022 + CMAKE_PORTABLE: 1 steps: - windows-build-steps @@ -592,6 +610,7 @@ jobs: SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib CMAKE_GENERATOR: Visual Studio 16 2019 + CMAKE_PORTABLE: 1 steps: - windows-build-steps @@ -613,7 +632,7 @@ jobs: - post-steps build-linux-java-pmd: - machine: + machine: image: ubuntu-2004:202111-02 resource_class: large environment: @@ -887,6 +906,7 @@ workflows: - build-linux-mini-crashtest jobs-windows: jobs: + - build-windows-vs2022-avx2 - build-windows-vs2022 - build-windows-vs2019 - build-cmake-mingw diff --git a/env/unique_id_gen.cc b/env/unique_id_gen.cc index 7d221d374..8d9db8695 100644 --- a/env/unique_id_gen.cc +++ b/env/unique_id_gen.cc @@ -21,6 +21,7 @@ #ifdef __SSE4_2__ #ifdef _WIN32 #include +#define _rdtsc() __rdtsc() #else #include #endif diff --git a/util/crc32c.cc b/util/crc32c.cc index d4cd78b52..9e97045f4 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -1117,7 +1117,13 @@ static inline Function Choose_Extend() { } #elif defined(__SSE4_2__) && defined(__PCLMUL__) && !defined NO_THREEWAY_CRC32C // NOTE: runtime detection no longer supported on x86 - (void)ExtendImpl; // suppress unused warning +#ifdef _MSC_VER +#pragma warning(disable: 4551) +#endif + (void)ExtendImpl; // suppress unused warning +#ifdef _MSC_VER +#pragma warning(default: 4551) +#endif return crc32c_3way; #else return ExtendImpl; From f2c9075d16d17a14598246cfc6e620d60135849d Mon Sep 17 00:00:00 2001 From: qiuchengxuan Date: Wed, 25 Oct 2023 09:16:24 -0700 Subject: [PATCH 233/386] Fix dead loop with kSkipAnyCorruptedRecords mode selected in some cases (#11955) (#11979) Summary: With fragmented record span across multiple blocks, if any following blocks corrupted with arbitary data, and intepreted log number less than the current log number, program will fall into infinite loop due to not skipping buffer leading bytes Pull Request resolved: https://github.com/facebook/rocksdb/pull/11979 Test Plan: existing unit tests Reviewed By: ajkr Differential Revision: D50604408 Pulled By: jowlyzhang fbshipit-source-id: e50a0c7e7c3d293fb9d5afec0a3eb4a1835b7a3b --- db/log_reader.cc | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/db/log_reader.cc b/db/log_reader.cc index 5ec262dcd..4e470616f 100644 --- a/db/log_reader.cc +++ b/db/log_reader.cc @@ -469,12 +469,14 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size, const unsigned int type = header[6]; const uint32_t length = a | (b << 8); int header_size = kHeaderSize; - if ((type >= kRecyclableFullType && type <= kRecyclableLastType) || - type == kRecyclableUserDefinedTimestampSizeType) { + const bool is_recyclable_type = + ((type >= kRecyclableFullType && type <= kRecyclableLastType) || + type == kRecyclableUserDefinedTimestampSizeType); + if (is_recyclable_type) { + header_size = kRecyclableHeaderSize; if (end_of_buffer_offset_ - buffer_.size() == 0) { recycled_ = true; } - header_size = kRecyclableHeaderSize; // We need enough for the larger header if (buffer_.size() < static_cast(kRecyclableHeaderSize)) { int r = kEof; @@ -483,11 +485,8 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size, } continue; } - const uint32_t log_num = DecodeFixed32(header + 7); - if (log_num != log_number_) { - return kOldRecord; - } } + if (header_size + length > buffer_.size()) { assert(buffer_.size() >= static_cast(header_size)); *drop_size = buffer_.size(); @@ -499,6 +498,14 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size, return kBadRecordLen; } + if (is_recyclable_type) { + const uint32_t log_num = DecodeFixed32(header + 7); + if (log_num != log_number_) { + buffer_.remove_prefix(header_size + length); + return kOldRecord; + } + } + if (type == kZeroType && length == 0) { // Skip zero length record without reporting any drops since // such records are produced by the mmap based writing code in From 0f141352d8de2f743d222a6f2ff493a31dd2838c Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 25 Oct 2023 11:59:09 -0700 Subject: [PATCH 234/386] Fix race between flush error recovery and db destruction (#12002) Summary: **Context:** DB destruction will wait for ongoing error recovery through `EndAutoRecovery()` and join the recovery thread: https://github.com/facebook/rocksdb/blob/519f2a41fb76e5644c63e4e588addb3b88b36580/db/db_impl/db_impl.cc#L525 -> https://github.com/facebook/rocksdb/blob/519f2a41fb76e5644c63e4e588addb3b88b36580/db/error_handler.cc#L250 -> https://github.com/facebook/rocksdb/blob/519f2a41fb76e5644c63e4e588addb3b88b36580/db/error_handler.cc#L808-L823 However, due to a race between flush error recovery and db destruction, recovery can actually start after such wait during the db shutdown. The consequence is that the recovery thread created as part of this recovery will not be properly joined upon its destruction as part the db destruction. It then crashes the program as below. ``` std::terminate() std::default_delete::operator()(std::thread*) const std::unique_ptr>::~unique_ptr() rocksdb::ErrorHandler::~ErrorHandler() (rocksdb/db/error_handler.h:31) rocksdb::DBImpl::~DBImpl() (rocksdb/db/db_impl/db_impl.cc:725) rocksdb::DBImpl::~DBImpl() (rocksdb/db/db_impl/db_impl.cc:725) rocksdb::DBTestBase::Close() (rocksdb/db/db_test_util.cc:678) ``` **Summary:** This PR fixed it by considering whether EndAutoRecovery() has been called before creating such thread. This fix is similar to how we currently [handle](https://github.com/facebook/rocksdb/blob/519f2a41fb76e5644c63e4e588addb3b88b36580/db/error_handler.cc#L688-L694) such case inside the created recovery thread. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12002 Test Plan: A new UT repro-ed the crash before this fix and and pass after. Reviewed By: ajkr Differential Revision: D50586191 Pulled By: hx235 fbshipit-source-id: b372f6d7a94eadee4b9283b826cc5fb81779a093 --- db/error_handler.cc | 8 +++ db/error_handler_fs_test.cc | 56 +++++++++++++++++++ .../flush_recovery_db_destructor_race.md | 1 + 3 files changed, 65 insertions(+) create mode 100644 unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md diff --git a/db/error_handler.cc b/db/error_handler.cc index 95b9a0fe6..21c3a686f 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -645,6 +645,13 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( } else if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) { // Auto resume BG error is not enabled, directly return bg_error_. return bg_error_; + } else if (end_recovery_) { + // Can temporarily release db mutex + EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_, + Status::ShutdownInProgress(), + db_mutex_); + db_mutex_->AssertHeld(); + return bg_error_; } if (bg_error_stats_ != nullptr) { RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT); @@ -819,6 +826,7 @@ void ErrorHandler::EndAutoRecovery() { old_recovery_thread->join(); db_mutex_->Lock(); } + TEST_SYNC_POINT("PostEndAutoRecovery"); return; } diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index bbff8c7fe..56a0f57aa 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -7,12 +7,15 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include + #include "db/db_test_util.h" #include "file/sst_file_manager_impl.h" #include "port/stack_trace.h" #include "rocksdb/io_status.h" #include "rocksdb/sst_file_manager.h" #include "test_util/sync_point.h" +#include "test_util/testharness.h" #include "util/random.h" #include "utilities/fault_injection_env.h" #include "utilities/fault_injection_fs.h" @@ -2473,6 +2476,59 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAbortRecovery) { Destroy(options); } +TEST_F(DBErrorHandlingFSTest, FlushErrorRecoveryRaceWithDBDestruction) { + Options options = GetDefaultOptions(); + options.env = fault_env_.get(); + options.create_if_missing = true; + std::shared_ptr listener = + std::make_shared(); + options.listeners.emplace_back(listener); + DestroyAndReopen(options); + ASSERT_OK(Put("k1", "val")); + + // Inject retryable flush error + bool error_set = false; + SyncPoint::GetInstance()->SetCallBack( + "BuildTable:BeforeOutputValidation", [&](void*) { + if (error_set) { + return; + } + IOStatus st = IOStatus::IOError("Injected"); + st.SetRetryable(true); + fault_fs_->SetFilesystemActive(false, st); + error_set = true; + }); + + port::Thread db_close_thread; + SyncPoint::GetInstance()->SetCallBack( + "BuildTable:BeforeDeleteFile", [&](void*) { + // Clear retryable flush error injection + fault_fs_->SetFilesystemActive(true); + + // Coerce race between ending auto recovery in db destruction and flush + // error recovery + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"PostEndAutoRecovery", "FlushJob::WriteLevel0Table"}}); + db_close_thread = port::Thread([&] { Close(); }); + }); + SyncPoint::GetInstance()->EnableProcessing(); + + Status s = Flush(); + ASSERT_NOK(s); + + int placeholder = 1; + listener->WaitForRecovery(placeholder); + ASSERT_TRUE(listener->new_bg_error().IsShutdownInProgress()); + + // Prior to the fix, the db close will crash due to the recovery thread for + // flush error is not joined by the time of destruction. + db_close_thread.join(); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + Destroy(options); +} + TEST_F(DBErrorHandlingFSTest, FlushReadError) { std::shared_ptr listener = std::make_shared(); diff --git a/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md b/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md new file mode 100644 index 000000000..76cc3c721 --- /dev/null +++ b/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md @@ -0,0 +1 @@ +Fix a race between flush error recovery and db destruction that can lead to db crashing. From 8ee009f0d806e068f274c63701c2271cb56d1293 Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Wed, 25 Oct 2023 15:43:05 -0700 Subject: [PATCH 235/386] Downgrade windows 2019 build to older image. (#12014) Summary: This should fix failed java windows build https://github.com/facebook/rocksdb/issues/12013 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12014 Reviewed By: ajkr Differential Revision: D50664503 Pulled By: akankshamahajan15 fbshipit-source-id: 3466ce42d3cae3f8e0beba88a18744d647a32a2c --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 711e471c1..952aa5409 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -600,6 +600,7 @@ jobs: build-windows-vs2019: executor: name: win/server-2019 + version: 2023.08.1 size: 2xlarge environment: THIRDPARTY_HOME: C:/Users/circleci/thirdparty From 52be8f54f2a5570f6cac98a1cf0340ca1b98ac32 Mon Sep 17 00:00:00 2001 From: anand76 Date: Wed, 25 Oct 2023 16:54:50 -0700 Subject: [PATCH 236/386] Add APIs to query secondary cache capacity and usage for TieredCache (#12011) Summary: In `TieredCache`, the underlying compressed secondary cache is hidden from the user. So we need a way to query the capacity, as well as the portion of cache reservation charged to the compressed secondary cache. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12011 Test Plan: Update the unit tests Reviewed By: akankshamahajan15 Differential Revision: D50651943 Pulled By: anand1976 fbshipit-source-id: 06d1cb5edb75a790c919bce718e2ff65f5908220 --- cache/compressed_secondary_cache_test.cc | 7 ++++++ cache/secondary_cache_adapter.cc | 23 +++++++++++++++++++ cache/secondary_cache_adapter.h | 6 ++++- cache/sharded_cache.cc | 10 ++++++++ cache/sharded_cache.h | 2 ++ include/rocksdb/advanced_cache.h | 8 +++++++ .../tiered_cache_capacity_and_usage.md | 1 + 7 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index 71702b29f..84633941d 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -1276,6 +1276,13 @@ TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) { ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); ASSERT_EQ(sec_capacity, (30 << 20)); + ASSERT_OK(tiered_cache->GetSecondaryCacheCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 30 << 20); + size_t sec_usage; + ASSERT_OK(tiered_cache->GetSecondaryCachePinnedUsage(sec_usage)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_usage, 3 << 20, + GetPercent(3 << 20, 1)); + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.39)); EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (45 << 20), GetPercent(45 << 20, 1)); diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index 70571f5b2..84b4437e8 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -489,6 +489,29 @@ void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { } } +Status CacheWithSecondaryAdapter::GetSecondaryCacheCapacity( + size_t& size) const { + return secondary_cache_->GetCapacity(size); +} + +Status CacheWithSecondaryAdapter::GetSecondaryCachePinnedUsage( + size_t& size) const { + Status s; + if (distribute_cache_res_) { + MutexLock m(&mutex_); + size_t capacity = 0; + s = secondary_cache_->GetCapacity(capacity); + if (s.ok()) { + size = capacity - pri_cache_res_->GetTotalMemoryUsed(); + } else { + size = 0; + } + } else { + size = 0; + } + return s; +} + // Update the secondary/primary allocation ratio (remember, the primary // capacity is the total memory budget when distribute_cache_res_ is true). // When the ratio changes, we may accumulate some error in the calculations diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index 0d5f2d6ea..6b06d0829 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -47,6 +47,10 @@ class CacheWithSecondaryAdapter : public CacheWrapper { void SetCapacity(size_t capacity) override; + Status GetSecondaryCacheCapacity(size_t& size) const override; + + Status GetSecondaryCachePinnedUsage(size_t& size) const override; + Status UpdateCacheReservationRatio(double ratio); Status UpdateAdmissionPolicy(TieredAdmissionPolicy adm_policy); @@ -81,7 +85,7 @@ class CacheWithSecondaryAdapter : public CacheWrapper { // Fraction of a cache memory reservation to be assigned to the secondary // cache std::atomic sec_cache_res_ratio_; - port::Mutex mutex_; + mutable port::Mutex mutex_; #ifndef NDEBUG bool ratio_changed_ = false; #endif diff --git a/cache/sharded_cache.cc b/cache/sharded_cache.cc index 322b59226..b270df751 100644 --- a/cache/sharded_cache.cc +++ b/cache/sharded_cache.cc @@ -83,6 +83,16 @@ size_t ShardedCacheBase::GetCapacity() const { return capacity_; } +Status ShardedCacheBase::GetSecondaryCacheCapacity(size_t& size) const { + size = 0; + return Status::OK(); +} + +Status ShardedCacheBase::GetSecondaryCachePinnedUsage(size_t& size) const { + size = 0; + return Status::OK(); +} + bool ShardedCacheBase::HasStrictCapacityLimit() const { MutexLock l(&config_mutex_); return strict_capacity_limit_; diff --git a/cache/sharded_cache.h b/cache/sharded_cache.h index 39042137f..b7ef723a1 100644 --- a/cache/sharded_cache.h +++ b/cache/sharded_cache.h @@ -99,6 +99,8 @@ class ShardedCacheBase : public Cache { bool HasStrictCapacityLimit() const override; size_t GetCapacity() const override; + Status GetSecondaryCacheCapacity(size_t& size) const override; + Status GetSecondaryCachePinnedUsage(size_t& size) const override; using Cache::GetUsage; size_t GetUsage(Handle* handle) const override; diff --git a/include/rocksdb/advanced_cache.h b/include/rocksdb/advanced_cache.h index b5dcc3d49..77f1f5ce1 100644 --- a/include/rocksdb/advanced_cache.h +++ b/include/rocksdb/advanced_cache.h @@ -375,6 +375,14 @@ class Cache { // Returns the helper for the specified entry. virtual const CacheItemHelper* GetCacheItemHelper(Handle* handle) const = 0; + virtual Status GetSecondaryCacheCapacity(size_t& /*size*/) const { + return Status::NotSupported(); + } + + virtual Status GetSecondaryCachePinnedUsage(size_t& /*size*/) const { + return Status::NotSupported(); + } + // Call this on shutdown if you want to speed it up. Cache will disown // any underlying data and will not free it on delete. This call will leak // memory - call this only if you're shutting down the process. diff --git a/unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md b/unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md new file mode 100644 index 000000000..e281d069d --- /dev/null +++ b/unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md @@ -0,0 +1 @@ +Add new Cache APIs GetSecondaryCacheCapacity() and GetSecondaryCachePinnedUsage() to return the configured capacity, and cache reservation charged to the secondary cache. From 526f36b48381dd640a0426bd748dbc0bb5797c75 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 25 Oct 2023 17:48:21 -0700 Subject: [PATCH 237/386] Remove extra semicolon (#12017) Summary: As titled. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12017 Reviewed By: hx235 Differential Revision: D50670406 Pulled By: jowlyzhang fbshipit-source-id: 28b3acd930ee676d78ebb47144047ce233fc11c5 --- include/rocksdb/env.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 08f996658..63a161923 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1001,7 +1001,7 @@ class WritableFile { /* * Get the size of valid data in the file. */ - virtual uint64_t GetFileSize() { return 0; }; + virtual uint64_t GetFileSize() { return 0; } /* * Get and set the default pre-allocation block size for writes to From e230e4d248fe334fdb282eed372baa111778fdf2 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Fri, 27 Oct 2023 15:56:48 -0700 Subject: [PATCH 238/386] Make OffpeakTimeInfo available in VersionSet (#12018) Summary: As mentioned in https://github.com/facebook/rocksdb/issues/11893, we are going to use the offpeak time information to pre-process TTL-based compactions. To do so, we need to access `daily_offpeak_time_utc` in `VersionStorageInfo::ComputeCompactionScore()` where we pick the files to compact. This PR is to make the offpeak time information available at the time of compaction-scoring. We are not changing any compaction scoring logic just yet. Will follow up in a separate PR. There were two ways to achieve what we want. 1. Make `MutableDBOptions` available in `ColumnFamilyData` and `ComputeCompactionScore()` take `MutableDBOptions` along with `ImmutableOptions` and `MutableCFOptions`. 2. Make `daily_offpeak_time_utc` and `IsNowOffpeak()` available in `VersionStorageInfo`. We chose the latter as it involves smaller changes. This change includes the following - Introduction of `OffpeakTimeInfo` and `IsNowOffpeak()` has been moved from `MutableDBOptions` - `OffpeakTimeInfo` added to `VersionSet` and it can be set during construction and by `ChangeOffpeakTimeInfo()` - During `SetDBOptions()`, if offpeak time info needs to change, it calls `MaybeScheduleFlushOrCompaction()` to re-compute compaction scores and process compactions as needed Pull Request resolved: https://github.com/facebook/rocksdb/pull/12018 Test Plan: - `DBOptionsTest::OffpeakTimes` changed to include checks for `MaybeScheduleFlushOrCompaction()` calls and `VersionSet`'s OffpeakTimeInfo value change during `SetDBOptions()`. - `VersionSetTest::OffpeakTimeInfoTest` added to test `ChangeOffpeakTimeInfo()`. `IsNowOffpeak()` tests moved from `DBOptionsTest::OffpeakTimes` Reviewed By: pdillinger Differential Revision: D50723881 Pulled By: jaykorean fbshipit-source-id: 3cff0291936f3729c0e9c7750834b9378fb435f6 --- CMakeLists.txt | 1 + TARGETS | 1 + db/compaction/compaction_job_test.cc | 13 +- db/compaction/compaction_picker_test.cc | 6 +- db/db_impl/db_impl.cc | 19 ++- db/db_impl/db_impl_compaction_flush.cc | 1 + db/db_options_test.cc | 121 +++++------------ db/db_wal_test.cc | 3 +- db/flush_job_test.cc | 10 +- db/import_column_family_job.cc | 5 +- db/memtable_list_test.cc | 4 +- db/repair.cc | 5 +- db/version_builder_test.cc | 56 ++++---- db/version_set.cc | 36 ++--- db/version_set.h | 21 ++- db/version_set_test.cc | 171 +++++++++++++++++------- db/version_util.h | 3 +- db/wal_manager_test.cc | 11 +- options/db_options.cc | 32 ----- options/db_options.h | 2 - options/offpeak_time_info.cc | 48 +++++++ options/offpeak_time_info.h | 22 +++ src.mk | 1 + tools/ldb_cmd.cc | 9 +- tools/ldb_cmd_test.cc | 3 +- 25 files changed, 355 insertions(+), 249 deletions(-) create mode 100644 options/offpeak_time_info.cc create mode 100644 options/offpeak_time_info.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b475a2224..95ecf7917 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -772,6 +772,7 @@ set(SOURCES options/configurable.cc options/customizable.cc options/db_options.cc + options/offpeak_time_info.cc options/options.cc options/options_helper.cc options/options_parser.cc diff --git a/TARGETS b/TARGETS index 6f0efa400..e8aaf325d 100644 --- a/TARGETS +++ b/TARGETS @@ -163,6 +163,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "options/configurable.cc", "options/customizable.cc", "options/db_options.cc", + "options/offpeak_time_info.cc", "options/options.cc", "options/options_helper.cc", "options/options_parser.cc", diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 8bf3132a1..eccd57701 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -215,7 +215,8 @@ class CompactionJobTestBase : public testing::Test { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "")), + /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "", + /*daily_offpeak_time_utc*/ "")), shutting_down_(false), mock_table_factory_(new mock::MockTableFactory()), error_handler_(nullptr, db_options_, &mutex_), @@ -540,11 +541,11 @@ class CompactionJobTestBase : public testing::Test { ASSERT_OK(s); db_options_.info_log = info_log; - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + versions_.reset(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); compaction_job_stats_.Reset(); ASSERT_OK(SetIdentityFile(env_, dbname_)); diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 64326a95c..a5184c956 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -85,7 +85,8 @@ class CompactionPickerTestBase : public testing::Test { vstorage_.reset(new VersionStorageInfo( &icmp_, ucmp_, options_.num_levels, style, nullptr, false, EpochNumberRequirement::kMustPresent, ioptions_.clock, - options_.bottommost_file_compaction_delay)); + options_.bottommost_file_compaction_delay, + OffpeakTimeInfo(mutable_db_options_.daily_offpeak_time_utc))); vstorage_->PrepareForVersionAppend(ioptions_, mutable_cf_options_); } @@ -95,7 +96,8 @@ class CompactionPickerTestBase : public testing::Test { temp_vstorage_.reset(new VersionStorageInfo( &icmp_, ucmp_, options_.num_levels, ioptions_.compaction_style, vstorage_.get(), false, EpochNumberRequirement::kMustPresent, - ioptions_.clock, options_.bottommost_file_compaction_delay)); + ioptions_.clock, options_.bottommost_file_compaction_delay, + OffpeakTimeInfo(mutable_db_options_.daily_offpeak_time_utc))); } void DeleteVersionStorage() { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 2d4456485..4a6279148 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -276,10 +276,10 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, this->RecordSeqnoToTimeMapping(/*populate_historical_seconds=*/0); }); - versions_.reset(new VersionSet(dbname_, &immutable_db_options_, file_options_, - table_cache_.get(), write_buffer_manager_, - &write_controller_, &block_cache_tracer_, - io_tracer_, db_id_, db_session_id_)); + versions_.reset(new VersionSet( + dbname_, &immutable_db_options_, file_options_, table_cache_.get(), + write_buffer_manager_, &write_controller_, &block_cache_tracer_, + io_tracer_, db_id_, db_session_id_, options.daily_offpeak_time_utc)); column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); @@ -1328,17 +1328,24 @@ Status DBImpl::SetDBOptions( const bool max_compactions_increased = new_bg_job_limits.max_compactions > current_bg_job_limits.max_compactions; + const bool offpeak_time_changed = + versions_->offpeak_time_info().daily_offpeak_time_utc != + new_db_options.daily_offpeak_time_utc; - if (max_flushes_increased || max_compactions_increased) { + if (max_flushes_increased || max_compactions_increased || + offpeak_time_changed) { if (max_flushes_increased) { env_->IncBackgroundThreadsIfNeeded(new_bg_job_limits.max_flushes, Env::Priority::HIGH); } - if (max_compactions_increased) { env_->IncBackgroundThreadsIfNeeded(new_bg_job_limits.max_compactions, Env::Priority::LOW); } + if (offpeak_time_changed) { + versions_->ChangeOffpeakTimeInfo( + new_db_options.daily_offpeak_time_utc); + } MaybeScheduleFlushOrCompaction(); } diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 5af305d31..08812a35b 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -2783,6 +2783,7 @@ void DBImpl::EnableManualCompaction() { void DBImpl::MaybeScheduleFlushOrCompaction() { mutex_.AssertHeld(); + TEST_SYNC_POINT("DBImpl::MaybeScheduleFlushOrCompaction:Start"); if (!opened_successfully_) { // Compaction may introduce data race to DB open return; diff --git a/db/db_options_test.cc b/db/db_options_test.cc index f52982bbc..fa7f52d29 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1099,7 +1099,7 @@ TEST_F(DBOptionsTest, SetFIFOCompactionOptions) { ASSERT_EQ(fifo_temp_opt[1].age, 30000); } -TEST_F(DBOptionsTest, OffPeakTimes) { +TEST_F(DBOptionsTest, OffpeakTimes) { Options options; options.create_if_missing = true; Random rnd(test::RandomSeed()); @@ -1164,8 +1164,8 @@ TEST_F(DBOptionsTest, OffPeakTimes) { now_utc_minute * 60 + now_utc_second); Status s = DBImpl::TEST_ValidateOptions(options); ASSERT_OK(s); - auto db_options = MutableDBOptions(options); - ASSERT_EQ(expected, db_options.IsNowOffPeak(mock_clock.get())); + auto offpeak_info = OffpeakTimeInfo(options.daily_offpeak_time_utc); + ASSERT_EQ(expected, offpeak_info.IsNowOffpeak(mock_clock.get())); }; options.daily_offpeak_time_utc = ""; @@ -1194,100 +1194,53 @@ TEST_F(DBOptionsTest, OffPeakTimes) { verify_is_now_offpeak(true, 23, 59, 1); verify_is_now_offpeak(true, 23, 59, 59); - // Open the db and test by Get/SetDBOptions options.daily_offpeak_time_utc = ""; DestroyAndReopen(options); ASSERT_EQ("", dbfull()->GetDBOptions().daily_offpeak_time_utc); + + int may_schedule_compaction_called = 0; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::MaybeScheduleFlushOrCompaction:Start", + [&](void*) { may_schedule_compaction_called++; }); + SyncPoint::GetInstance()->EnableProcessing(); + + // Make sure calling SetDBOptions with invalid option does not set the value + // nor call MaybeScheduleFlushOrCompaction() for (std::string invalid_case : invalid_cases) { ASSERT_NOK( dbfull()->SetDBOptions({{"daily_offpeak_time_utc", invalid_case}})); + ASSERT_EQ( + "", + dbfull()->GetVersionSet()->offpeak_time_info().daily_offpeak_time_utc); } + ASSERT_EQ(0, may_schedule_compaction_called); + + // Changing to new valid values should call MaybeScheduleFlushOrCompaction() + // and sets the offpeak_time_info in VersionSet + int expected_count = 0; for (std::string valid_case : valid_cases) { + if (dbfull()->GetVersionSet()->offpeak_time_info().daily_offpeak_time_utc != + valid_case) { + expected_count++; + } ASSERT_OK(dbfull()->SetDBOptions({{"daily_offpeak_time_utc", valid_case}})); ASSERT_EQ(valid_case, dbfull()->GetDBOptions().daily_offpeak_time_utc); + ASSERT_EQ( + valid_case, + dbfull()->GetVersionSet()->offpeak_time_info().daily_offpeak_time_utc); } - Close(); - - // Sets off-peak time from 11:30PM to 4:30AM next day. - // Starting at 1:30PM, use mock sleep to make time pass - // and see if IsNowOffPeak() returns correctly per time changes - int now_hour = 13; - int now_minute = 30; - options.daily_offpeak_time_utc = "23:30-04:30"; - auto mock_clock = std::make_shared(env_->GetSystemClock()); - auto mock_env = std::make_unique(env_, mock_clock); - // Add some extra random days to current time - int days = rnd.Uniform(100); - mock_clock->SetCurrentTime(days * 86400 + now_hour * 3600 + now_minute * 60); - options.env = mock_env.get(); - - // Starting at 1:30PM. It's not off-peak - DestroyAndReopen(options); - ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Now it's at 4:30PM. Still not off-peak - mock_clock->MockSleepForSeconds(3 * 3600); - ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Now it's at 11:30PM. It's off-peak - mock_clock->MockSleepForSeconds(7 * 3600); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Now it's at 2:30AM next day. It's still off-peak - mock_clock->MockSleepForSeconds(3 * 3600); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Now it's at 4:30AM. It's still off-peak - mock_clock->MockSleepForSeconds(2 * 3600); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Sleep for one more minute. It's at 4:31AM It's no longer off-peak - mock_clock->MockSleepForSeconds(60); - ASSERT_FALSE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - Close(); + ASSERT_EQ(expected_count, may_schedule_compaction_called); - // Entire day offpeak - options.daily_offpeak_time_utc = "00:00-23:59"; - DestroyAndReopen(options); - // It doesn't matter what time it is. It should be just offpeak. - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Mock Sleep for 3 hours. It's still off-peak - mock_clock->MockSleepForSeconds(3 * 3600); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Mock Sleep for 20 hours. It's still off-peak - mock_clock->MockSleepForSeconds(20 * 3600); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Mock Sleep for 59 minutes. It's still off-peak - mock_clock->MockSleepForSeconds(59 * 60); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Mock Sleep for 59 seconds. It's still off-peak - mock_clock->MockSleepForSeconds(59); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - - // Mock Sleep for 1 second (exactly 24h passed). It's still off-peak - mock_clock->MockSleepForSeconds(1); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); - // Another second for sanity check - mock_clock->MockSleepForSeconds(1); - ASSERT_TRUE(MutableDBOptions(dbfull()->GetDBOptions()) - .IsNowOffPeak(mock_clock.get())); + // Changing to the same value should not call MaybeScheduleFlushOrCompaction() + ASSERT_OK( + dbfull()->SetDBOptions({{"daily_offpeak_time_utc", "06:30-11:30"}})); + may_schedule_compaction_called = 0; + ASSERT_OK( + dbfull()->SetDBOptions({{"daily_offpeak_time_utc", "06:30-11:30"}})); + ASSERT_EQ(0, may_schedule_compaction_called); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); Close(); } diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index edc2ebf2e..7e7a89cdf 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -1541,7 +1541,8 @@ class RecoveryTestHelper { test->dbname_, &db_options, file_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "")); + /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "", + options.daily_offpeak_time_utc)); wal_manager.reset( new WalManager(db_options, file_options, /*io_tracer=*/nullptr)); diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 0f7871709..9a626eac8 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -127,11 +127,11 @@ class FlushJobTestBase : public testing::Test { column_families.emplace_back(cf_name, cf_options_); } - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + versions_.reset(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); EXPECT_OK(versions_->Recover(column_families, false)); } diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 32bc4eead..3ad8b7b61 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -186,7 +186,8 @@ Status ImportColumnFamilyJob::Run() { cfd_->NumberLevels(), cfd_->ioptions()->compaction_style, nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks, EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock, - cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay); + cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay, + cfd_->current()->version_set()->offpeak_time_info()); Status s; for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) { @@ -429,4 +430,4 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( return status; } -} // namespace ROCKSDB_NAMESPACE \ No newline at end of file +} // namespace ROCKSDB_NAMESPACE diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 3203c7a00..6292f46e4 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -107,7 +107,7 @@ class MemTableListTest : public testing::Test { table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, /*db_id*/ "", - /*db_session_id*/ ""); + /*db_session_id*/ "", /*daily_offpeak_time_utc*/ ""); std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); cf_descs.emplace_back("one", ColumnFamilyOptions()); @@ -158,7 +158,7 @@ class MemTableListTest : public testing::Test { table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, /*db_id*/ "", - /*db_session_id*/ ""); + /*db_session_id*/ "", /*daily_offpeak_time_utc*/ ""); std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); cf_descs.emplace_back("one", ColumnFamilyOptions()); diff --git a/db/repair.cc b/db/repair.cc index e303eae64..dee5d6c7e 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -122,7 +122,7 @@ class Repairer { vset_(dbname_, &immutable_db_options_, file_options_, raw_table_cache_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id=*/"", db_session_id_), + /*db_id=*/"", db_session_id_, db_options.daily_offpeak_time_utc), next_file_number_(1), db_lock_(nullptr), closed_(false) { @@ -694,7 +694,8 @@ class Repairer { cfd->NumberLevels(), cfd->ioptions()->compaction_style, nullptr /* src_vstorage */, cfd->ioptions()->force_consistency_checks, EpochNumberRequirement::kMightMissing, cfd->ioptions()->clock, - /*bottommost_file_compaction_delay=*/0); + /*bottommost_file_compaction_delay=*/0, + cfd->current()->version_set()->offpeak_time_info()); Status s; VersionEdit dummy_edit; for (const auto* table : cf_id_and_tables.second) { diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index 34db9aba8..00b4a810a 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -38,7 +38,8 @@ class VersionBuilderTest : public testing::Test { mutable_cf_options_(options_), vstorage_(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMustPresent, - ioptions_.clock, options_.bottommost_file_compaction_delay), + ioptions_.clock, options_.bottommost_file_compaction_delay, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)), file_num_(1) { mutable_cf_options_.RefreshDerivedOptions(ioptions_); size_being_compacted_.resize(options_.num_levels); @@ -202,7 +203,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -253,7 +255,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -308,7 +311,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -365,7 +369,8 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -388,7 +393,8 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); VersionEdit version_edit; version_edit.AddFile( @@ -557,7 +563,7 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -702,7 +708,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -747,7 +753,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileAddition) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -887,7 +893,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -961,7 +967,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1142,7 +1148,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1191,7 +1197,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { VersionStorageInfo newer_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &new_vstorage, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(second_builder.SaveTo(&newer_vstorage)); @@ -1278,7 +1284,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1382,7 +1388,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1422,7 +1428,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesInconsistentLinks) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1464,7 +1470,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbage) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1514,7 +1520,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbageLinkedSsts) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1678,7 +1684,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0); + 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1730,7 +1736,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, true /* force_consistency_checks */, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -1741,7 +1748,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { VersionStorageInfo new_vstorage2( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, true /* force_consistency_checks */, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_NOK(version_builder2.Apply(&version_edit)); UnrefFilesInVersion(&new_vstorage); @@ -1780,7 +1788,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { VersionStorageInfo new_vstorage_1( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr /* src_vstorage */, true /* force_consistency_checks */, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder_1.Apply(&version_edit_1)); s = version_builder_1.SaveTo(&new_vstorage_1); @@ -1818,7 +1827,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { VersionStorageInfo new_vstorage_2( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr /* src_vstorage */, true /* force_consistency_checks */, - EpochNumberRequirement::kMightMissing, nullptr, 0); + EpochNumberRequirement::kMightMissing, nullptr, 0, + OffpeakTimeInfo(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder_2.Apply(&version_edit_2)); s = version_builder_2.SaveTo(&new_vstorage_2); diff --git a/db/version_set.cc b/db/version_set.cc index 482b4c90d..17c430575 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2124,7 +2124,8 @@ VersionStorageInfo::VersionStorageInfo( CompactionStyle compaction_style, VersionStorageInfo* ref_vstorage, bool _force_consistency_checks, EpochNumberRequirement epoch_number_requirement, SystemClock* clock, - uint32_t bottommost_file_compaction_delay) + uint32_t bottommost_file_compaction_delay, + OffpeakTimeInfo offpeak_time_info) : internal_comparator_(internal_comparator), user_comparator_(user_comparator), // cfd is nullptr if Version is dummy @@ -2156,7 +2157,8 @@ VersionStorageInfo::VersionStorageInfo( bottommost_file_compaction_delay_(bottommost_file_compaction_delay), finalized_(false), force_consistency_checks_(_force_consistency_checks), - epoch_number_requirement_(epoch_number_requirement) { + epoch_number_requirement_(epoch_number_requirement), + offpeak_time_info_(offpeak_time_info) { if (ref_vstorage != nullptr) { accumulated_file_size_ = ref_vstorage->accumulated_file_size_; accumulated_raw_key_size_ = ref_vstorage->accumulated_raw_key_size_; @@ -2200,9 +2202,9 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset, cfd_ == nullptr ? false : cfd_->ioptions()->force_consistency_checks, epoch_number_requirement, cfd_ == nullptr ? nullptr : cfd_->ioptions()->clock, - cfd_ == nullptr - ? 0 - : mutable_cf_options.bottommost_file_compaction_delay), + cfd_ == nullptr ? 0 + : mutable_cf_options.bottommost_file_compaction_delay, + vset->offpeak_time_info()), vset_(vset), next_(this), prev_(this), @@ -5043,15 +5045,13 @@ void AtomicGroupReadBuffer::Clear() { replay_buffer_.clear(); } -VersionSet::VersionSet(const std::string& dbname, - const ImmutableDBOptions* _db_options, - const FileOptions& storage_options, Cache* table_cache, - WriteBufferManager* write_buffer_manager, - WriteController* write_controller, - BlockCacheTracer* const block_cache_tracer, - const std::shared_ptr& io_tracer, - const std::string& db_id, - const std::string& db_session_id) +VersionSet::VersionSet( + const std::string& dbname, const ImmutableDBOptions* _db_options, + const FileOptions& storage_options, Cache* table_cache, + WriteBufferManager* write_buffer_manager, WriteController* write_controller, + BlockCacheTracer* const block_cache_tracer, + const std::shared_ptr& io_tracer, const std::string& db_id, + const std::string& db_session_id, const std::string& daily_offpeak_time_utc) : column_family_set_(new ColumnFamilySet( dbname, _db_options, storage_options, table_cache, write_buffer_manager, write_controller, block_cache_tracer, io_tracer, @@ -5076,7 +5076,8 @@ VersionSet::VersionSet(const std::string& dbname, file_options_(storage_options), block_cache_tracer_(block_cache_tracer), io_tracer_(io_tracer), - db_session_id_(db_session_id) {} + db_session_id_(db_session_id), + offpeak_time_info_(OffpeakTimeInfo(daily_offpeak_time_utc)) {} VersionSet::~VersionSet() { // we need to delete column_family_set_ because its destructor depends on @@ -6201,7 +6202,7 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname, VersionSet versions(dbname, &db_options, file_options, tc.get(), &wb, &wc, nullptr /*BlockCacheTracer*/, nullptr /*IOTracer*/, /*db_id*/ "", - /*db_session_id*/ ""); + /*db_session_id*/ "", options->daily_offpeak_time_utc); Status status; std::vector dummy; @@ -7242,7 +7243,8 @@ ReactiveVersionSet::ReactiveVersionSet( : VersionSet(dbname, _db_options, _file_options, table_cache, write_buffer_manager, write_controller, /*block_cache_tracer=*/nullptr, io_tracer, /*db_id*/ "", - /*db_session_id*/ "") {} + /*db_session_id*/ "", + /*daily_offpeak_time_utc*/ "") {} ReactiveVersionSet::~ReactiveVersionSet() {} diff --git a/db/version_set.h b/db/version_set.h index 55bce41e9..a6bfc5aa6 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -53,6 +53,7 @@ #endif #include "monitoring/instrumented_mutex.h" #include "options/db_options.h" +#include "options/offpeak_time_info.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_checksum.h" @@ -134,7 +135,8 @@ class VersionStorageInfo { bool _force_consistency_checks, EpochNumberRequirement epoch_number_requirement, SystemClock* clock, - uint32_t bottommost_file_compaction_delay); + uint32_t bottommost_file_compaction_delay, + OffpeakTimeInfo offpeak_time_info); // No copying allowed VersionStorageInfo(const VersionStorageInfo&) = delete; void operator=(const VersionStorageInfo&) = delete; @@ -751,7 +753,8 @@ class VersionStorageInfo { // target sizes. uint64_t estimated_compaction_needed_bytes_; - // Used for computing bottommost files marked for compaction. + // Used for computing bottommost files marked for compaction and checking for + // offpeak time. SystemClock* clock_; uint32_t bottommost_file_compaction_delay_; @@ -763,6 +766,8 @@ class VersionStorageInfo { EpochNumberRequirement epoch_number_requirement_; + OffpeakTimeInfo offpeak_time_info_; + friend class Version; friend class VersionSet; }; @@ -1146,7 +1151,8 @@ class VersionSet { WriteController* write_controller, BlockCacheTracer* const block_cache_tracer, const std::shared_ptr& io_tracer, - const std::string& db_id, const std::string& db_session_id); + const std::string& db_id, const std::string& db_session_id, + const std::string& daily_offpeak_time_utc); // No copying allowed VersionSet(const VersionSet&) = delete; void operator=(const VersionSet&) = delete; @@ -1501,6 +1507,12 @@ class VersionSet { new_options.writable_file_max_buffer_size; } + // TODO - Consider updating together when file options change in SetDBOptions + const OffpeakTimeInfo& offpeak_time_info() { return offpeak_time_info_; } + void ChangeOffpeakTimeInfo(const std::string& daily_offpeak_time_utc) { + offpeak_time_info_.daily_offpeak_time_utc = daily_offpeak_time_utc; + } + const ImmutableDBOptions* db_options() const { return db_options_; } static uint64_t GetNumLiveVersions(Version* dummy_versions); @@ -1651,6 +1663,9 @@ class VersionSet { std::string db_session_id_; + // Off-peak time information used for compaction scoring + OffpeakTimeInfo offpeak_time_info_; + private: // REQUIRES db mutex at beginning. may release and re-acquire db mutex Status ProcessManifestWrites(std::deque& writers, diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 2526e752f..43d4036a3 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -21,6 +21,7 @@ #include "table/block_based/block_based_table_factory.h" #include "table/mock_table.h" #include "table/unique_id_impl.h" +#include "test_util/mock_time_env.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/string_util.h" @@ -132,7 +133,8 @@ class VersionStorageInfoTestBase : public testing::Test { /*src_vstorage=*/nullptr, /*_force_consistency_checks=*/false, EpochNumberRequirement::kMustPresent, ioptions_.clock, - mutable_cf_options_.bottommost_file_compaction_delay) {} + mutable_cf_options_.bottommost_file_compaction_delay, + OffpeakTimeInfo()) {} ~VersionStorageInfoTestBase() override { for (int i = 0; i < vstorage_.num_levels(); ++i) { @@ -1199,11 +1201,11 @@ class VersionSetTestBase { immutable_options_.fs = fs_; immutable_options_.clock = env_->GetSystemClock().get(); - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + versions_.reset(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); reactive_versions_ = std::make_shared( dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, nullptr); @@ -1303,11 +1305,11 @@ class VersionSetTestBase { } void ReopenDB() { - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + versions_.reset(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); EXPECT_OK(versions_->Recover(column_families_, false)); } @@ -1815,11 +1817,11 @@ TEST_F(VersionSetTest, WalAddition) { // Recover a new VersionSet. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(new_versions->Recover(column_families_, /*read_only=*/false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -1882,11 +1884,11 @@ TEST_F(VersionSetTest, WalCloseWithoutSync) { // Recover a new VersionSet. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 2); @@ -1935,11 +1937,11 @@ TEST_F(VersionSetTest, WalDeletion) { // Recover a new VersionSet, only the non-closed WAL should show up. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -1973,11 +1975,11 @@ TEST_F(VersionSetTest, WalDeletion) { // Recover from the new MANIFEST, only the non-closed WAL should show up. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -2093,11 +2095,11 @@ TEST_F(VersionSetTest, DeleteWalsBeforeNonExistingWalNumber) { // Recover a new VersionSet, WAL0 is deleted, WAL1 is not. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -2129,11 +2131,11 @@ TEST_F(VersionSetTest, DeleteAllWals) { // Recover a new VersionSet, all WALs are deleted. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 0); @@ -2171,11 +2173,11 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) { // Recover a new VersionSet, the min log number and the last WAL should be // kept. { - std::unique_ptr new_versions( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr new_versions(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); std::string db_id; ASSERT_OK( new_versions->Recover(column_families_, /*read_only=*/false, &db_id)); @@ -2190,6 +2192,73 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) { } } +TEST_F(VersionSetTest, OffpeakTimeInfoTest) { + Random rnd(test::RandomSeed()); + + // Sets off-peak time from 11:30PM to 4:30AM next day. + // Starting at 1:30PM, use mock sleep to make time pass + // and see if IsNowOffpeak() returns correctly per time changes + int now_hour = 13; + int now_minute = 30; + versions_->ChangeOffpeakTimeInfo("23:30-04:30"); + + auto mock_clock = std::make_shared(env_->GetSystemClock()); + // Add some extra random days to current time + int days = rnd.Uniform(100); + mock_clock->SetCurrentTime(days * 86400 + now_hour * 3600 + now_minute * 60); + + // Starting at 1:30PM. It's not off-peak + ASSERT_FALSE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Now it's at 4:30PM. Still not off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_FALSE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Now it's at 11:30PM. It's off-peak + mock_clock->MockSleepForSeconds(7 * 3600); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Now it's at 2:30AM next day. It's still off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Now it's at 4:30AM. It's still off-peak + mock_clock->MockSleepForSeconds(2 * 3600); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Sleep for one more minute. It's at 4:31AM It's no longer off-peak + mock_clock->MockSleepForSeconds(60); + ASSERT_FALSE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Entire day offpeak + versions_->ChangeOffpeakTimeInfo("00:00-23:59"); + // It doesn't matter what time it is. It should be just offpeak. + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Mock Sleep for 3 hours. It's still off-peak + mock_clock->MockSleepForSeconds(3 * 3600); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Mock Sleep for 20 hours. It's still off-peak + mock_clock->MockSleepForSeconds(20 * 3600); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Mock Sleep for 59 minutes. It's still off-peak + mock_clock->MockSleepForSeconds(59 * 60); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Mock Sleep for 59 seconds. It's still off-peak + mock_clock->MockSleepForSeconds(59); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + + // Mock Sleep for 1 second (exactly 24h passed). It's still off-peak + mock_clock->MockSleepForSeconds(1); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + // Another second for sanity check + mock_clock->MockSleepForSeconds(1); + ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); +} + TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) { // Tests that compensated range deletion size is added to compensated file // size. @@ -2236,11 +2305,11 @@ class VersionSetWithTimestampTest : public VersionSetTest { } void VerifyFullHistoryTsLow(uint64_t expected_ts_low) { - std::unique_ptr vset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + std::unique_ptr vset(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); ASSERT_OK(vset->Recover(column_families_, /*read_only=*/false, /*db_id=*/nullptr)); for (auto* cfd : *(vset->GetColumnFamilySet())) { diff --git a/db/version_util.h b/db/version_util.h index e39f25571..f6042fa03 100644 --- a/db/version_util.h +++ b/db/version_util.h @@ -25,7 +25,8 @@ class OfflineManifestWriter { options.table_cache_numshardbits)), versions_(db_path, &immutable_db_options_, sopt_, tc_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "") {} + /*db_id*/ "", /*db_session_id*/ "", + options.daily_offpeak_time_utc) {} Status Recover(const std::vector& column_families) { return versions_.Recover(column_families, /*read_only*/ false, diff --git a/db/wal_manager_test.cc b/db/wal_manager_test.cc index 0144e1846..abd7cd7ef 100644 --- a/db/wal_manager_test.cc +++ b/db/wal_manager_test.cc @@ -50,11 +50,11 @@ class WalManagerTest : public testing::Test { db_options_.fs = env_->GetFileSystem(); db_options_.clock = env_->GetSystemClock().get(); - versions_.reset( - new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "")); + versions_.reset(new VersionSet( + dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); wal_manager_.reset( new WalManager(db_options_, env_options_, nullptr /*IOTracer*/)); @@ -333,4 +333,3 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } - diff --git a/options/db_options.cc b/options/db_options.cc index b26d18e75..ca72404dd 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -1066,38 +1066,6 @@ void MutableDBOptions::Dump(Logger* log) const { daily_offpeak_time_utc.c_str()); } -bool MutableDBOptions::IsNowOffPeak(SystemClock* clock) const { - if (daily_offpeak_time_utc.empty()) { - return false; - } - int64_t now; - if (clock->GetCurrentTime(&now).ok()) { - constexpr int kSecondsPerDay = 86400; - constexpr int kSecondsPerMinute = 60; - int seconds_since_midnight_to_nearest_minute = - (static_cast(now % kSecondsPerDay) / kSecondsPerMinute) * - kSecondsPerMinute; - int start_time = 0, end_time = 0; - bool success = - TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time); - assert(success); - assert(start_time != end_time); - if (!success) { - // If the validation was done properly, we should never reach here - return false; - } - // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) - if (start_time > end_time) { - return start_time <= seconds_since_midnight_to_nearest_minute || - seconds_since_midnight_to_nearest_minute <= end_time; - } else { - return start_time <= seconds_since_midnight_to_nearest_minute && - seconds_since_midnight_to_nearest_minute <= end_time; - } - } - return false; -} - Status GetMutableDBOptionsFromStrings( const MutableDBOptions& base_options, const std::unordered_map& options_map, diff --git a/options/db_options.h b/options/db_options.h index 85a4d949b..701a83feb 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -136,9 +136,7 @@ struct MutableDBOptions { bool strict_bytes_per_sync; size_t compaction_readahead_size; int max_background_flushes; - std::string daily_offpeak_time_utc; - bool IsNowOffPeak(SystemClock* clock) const; }; Status GetStringFromMutableDBOptions(const ConfigOptions& config_options, diff --git a/options/offpeak_time_info.cc b/options/offpeak_time_info.cc new file mode 100644 index 000000000..678d112f1 --- /dev/null +++ b/options/offpeak_time_info.cc @@ -0,0 +1,48 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "options/offpeak_time_info.h" + +#include "rocksdb/system_clock.h" +#include "util/string_util.h" + +namespace ROCKSDB_NAMESPACE { +OffpeakTimeInfo::OffpeakTimeInfo() : daily_offpeak_time_utc("") {} +OffpeakTimeInfo::OffpeakTimeInfo(const std::string& offpeak_time) + : daily_offpeak_time_utc(offpeak_time) {} + +bool OffpeakTimeInfo::IsNowOffpeak(SystemClock* clock) const { + if (daily_offpeak_time_utc.empty()) { + return false; + } + int64_t now; + if (clock->GetCurrentTime(&now).ok()) { + constexpr int kSecondsPerDay = 86400; + constexpr int kSecondsPerMinute = 60; + int seconds_since_midnight_to_nearest_minute = + (static_cast(now % kSecondsPerDay) / kSecondsPerMinute) * + kSecondsPerMinute; + int start_time = 0, end_time = 0; + bool success = + TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time); + assert(success); + assert(start_time != end_time); + if (!success) { + // If the validation was done properly, we should never reach here + return false; + } + // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) + if (start_time > end_time) { + return start_time <= seconds_since_midnight_to_nearest_minute || + seconds_since_midnight_to_nearest_minute <= end_time; + } else { + return start_time <= seconds_since_midnight_to_nearest_minute && + seconds_since_midnight_to_nearest_minute <= end_time; + } + } + return false; +} + +} // namespace ROCKSDB_NAMESPACE diff --git a/options/offpeak_time_info.h b/options/offpeak_time_info.h new file mode 100644 index 000000000..74b456d3c --- /dev/null +++ b/options/offpeak_time_info.h @@ -0,0 +1,22 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { +class SystemClock; + +struct OffpeakTimeInfo { + OffpeakTimeInfo(); + explicit OffpeakTimeInfo(const std::string& offpeak_time); + std::string daily_offpeak_time_utc; + bool IsNowOffpeak(SystemClock* clock) const; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/src.mk b/src.mk index a019205ae..160ffdb2e 100644 --- a/src.mk +++ b/src.mk @@ -156,6 +156,7 @@ LIB_SOURCES = \ options/configurable.cc \ options/customizable.cc \ options/db_options.cc \ + options/offpeak_time_info.cc \ options/options.cc \ options/options_helper.cc \ options/options_parser.cc \ diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 6edf0637f..b467ab6d3 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1364,7 +1364,8 @@ void DumpManifestFile(Options options, std::string file, bool verbose, bool hex, ImmutableDBOptions immutable_db_options(options); VersionSet versions(dbname, &immutable_db_options, sopt, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ ""); + /*db_id*/ "", /*db_session_id*/ "", + options.daily_offpeak_time_utc); Status s = versions.DumpManifest(options, file, verbose, hex, json, cf_descs); if (!s.ok()) { fprintf(stderr, "Error in processing file %s %s\n", file.c_str(), @@ -1507,7 +1508,8 @@ Status GetLiveFilesChecksumInfoFromVersionSet(Options options, ImmutableDBOptions immutable_db_options(options); VersionSet versions(dbname, &immutable_db_options, sopt, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ ""); + /*db_id*/ "", /*db_session_id*/ "", + options.daily_offpeak_time_utc); std::vector cf_name_list; s = versions.ListColumnFamilies(&cf_name_list, db_path, immutable_db_options.fs.get()); @@ -2328,7 +2330,8 @@ Status ReduceDBLevelsCommand::GetOldNumOfLevels(Options& opt, int* levels) { WriteBufferManager wb(opt.db_write_buffer_size); VersionSet versions(db_path_, &db_options, soptions, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ ""); + /*db_id*/ "", /*db_session_id*/ "", + opt.daily_offpeak_time_utc); std::vector dummy; ColumnFamilyDescriptor dummy_descriptor(kDefaultColumnFamilyName, ColumnFamilyOptions(opt)); diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index c5b4115d1..05d433dbe 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -207,7 +207,8 @@ class FileChecksumTestHelper { WriteBufferManager wb(options_.db_write_buffer_size); ImmutableDBOptions immutable_db_options(options_); VersionSet versions(dbname_, &immutable_db_options, sopt, tc.get(), &wb, - &wc, nullptr, nullptr, "", ""); + &wc, nullptr, nullptr, "", "", + options_.daily_offpeak_time_utc); std::vector cf_name_list; Status s; s = versions.ListColumnFamilies(&cf_name_list, dbname_, From 212b5bf826637c51e0d2bac3aaed9236599d4de4 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 27 Oct 2023 17:07:39 -0700 Subject: [PATCH 239/386] Deep-copy Options in restored db for stress test to avoid race with SetOptions() (#12015) Summary: **Context** DB open will persist the `Options` in memory to options file and verify the file right after the write. The verification is done by comparing the options from parsing the written options file against the `Options` object in memory. Upon inconsistency, corruption such as https://github.com/facebook/rocksdb/blob/main/options/options_parser.cc#L725 will be returned. This verification assumes the `Options` object in memory is not changed from before the write till the verification. This assumption can break during [opening the restored db in stress test](https://github.com/facebook/rocksdb/blob/0f141352d8de2f743d222a6f2ff493a31dd2838c/db_stress_tool/db_stress_test_base.cc#L1784-L1799). This [line](https://github.com/facebook/rocksdb/blob/0f141352d8de2f743d222a6f2ff493a31dd2838c/db_stress_tool/db_stress_test_base.cc#L1770) makes it shares some pointer options (e.g, `std::shared_ptr filter_policy`) with other threads (e.g, SetOptions()) in db stress. And since https://github.com/facebook/rocksdb/pull/11838, filter_policy's field `bloom_before_level ` has now been mutable by SetOptions(). Therefore we started to see stress test failure like below: ``` Failure in DB::Open in backup/restore with: IO error: DB::Open() failed --- Unable to persist Options file: IO error: Unable to persist options.: Corruption: [RocksDBOptionsParser]:failed the verification on BlockBasedTable::: filter_policy.id Verification failed: Backup/restore failed: IO error: DB::Open() failed --- Unable to persist Options file: IO error: Unable to persist options.: Corruption: [RocksDBOptionsParser]:failed the verification on BlockBasedTable::: filter_policy.id db_stress: db_stress_tool/db_stress_test_base.cc:479: void rocksdb::StressTest::ProcessStatus(rocksdb::SharedState*, std::string, rocksdb::Status) const: Assertion `false' failed. ``` **Summary** This PR uses "deep copy" of the `options_` by CreateXXXFromString() to avoid sharing pointer options. **Test plan** Run the below db stress command that failed before this PR and pass after ``` ./db_stress --column_families=1 --threads=2 --preserve_unverified_changes=0 --acquire_snapshot_one_in=10000 --adaptive_readahead=0 --allow_data_in_errors=True --async_io=0 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=0 --backup_max_size=104857600 --backup_one_in=10 --batch_protection_bytes_per_key=0 --block_protection_bytes_per_key=1 --block_size=16384 --bloom_before_level=2147483646 --bloom_bits=0 --bottommost_compression_type=disable --bottommost_file_compaction_delay=86400 --bytes_per_sync=0 --cache_index_and_filter_blocks=0 --cache_size=33554432 --cache_type=tiered_auto_hyper_clock_cache --charge_compression_dictionary_building_buffer=0 --charge_file_metadata=0 --charge_filter_construction=0 --charge_table_reader=1 --checkpoint_one_in=1000000 --checksum_type=kXXH3 --clear_column_family_one_in=0 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=2 --compaction_readahead_size=0 --compaction_ttl=0 --compressed_secondary_cache_ratio=0.3333333333333333 --compressed_secondary_cache_size=0 --compression_checksum=1 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=lz4 --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=1 --db=/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=8388608 --delpercent=4 --delrangepercent=1 --destroy_db_initially=1 --detect_filter_construct_corruption=0 --disable_wal=0 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=0 --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=1 --file_checksum_impl=big --flush_one_in=1000000 --format_version=2 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=14 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=524288 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --long_running_snapshots=1 --manual_wal_flush_one_in=1000 --mark_for_compaction_one_file_in=0 --max_auto_readahead_size=0 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=2500 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16777216 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=4194304 --memtable_max_range_deletions=0 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=0 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=500000 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=0 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=2 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=3600 --progress_reports=0 --read_fault_one_in=1000 --readahead_size=0 --readpercent=50 --recycle_log_file_num=0 --reopen=0 --secondary_cache_fault_one_in=0 --secondary_cache_uri= --set_options_one_in=5 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=104857600 --sst_file_manager_bytes_per_truncate=1048576 --stats_dump_period_sec=600 --subcompactions=3 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=2 --unpartitioned_pinning=0 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=1 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=0 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --wal_compression=zstd --write_buffer_size=4194304 --write_dbid_to_manifest=1 --write_fault_one_in=0 --writepercent=35 ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12015 Reviewed By: pdillinger Differential Revision: D50666136 Pulled By: hx235 fbshipit-source-id: 804acc23aecb4eedfe5c44f732e86291f2420b2b --- db_stress_tool/db_stress_test_base.cc | 57 ++++++++++++++++++++++++--- db_stress_tool/db_stress_test_base.h | 2 + 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 62ddead7b..5263090b5 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -11,6 +11,7 @@ #include #include +#include "rocksdb/options.h" #include "util/compression.h" #ifdef GFLAGS #include "db_stress_tool/db_stress_common.h" @@ -18,6 +19,7 @@ #include "db_stress_tool/db_stress_driver.h" #include "db_stress_tool/db_stress_table_properties_collector.h" #include "db_stress_tool/db_stress_wide_merge_operator.h" +#include "options/options_parser.h" #include "rocksdb/convenience.h" #include "rocksdb/filter_policy.h" #include "rocksdb/secondary_cache.h" @@ -1765,13 +1767,16 @@ Status StressTest::TestBackupRestore( } DB* restored_db = nullptr; std::vector restored_cf_handles; + // Not yet implemented: opening restored BlobDB or TransactionDB + Options restore_options; + if (s.ok() && !FLAGS_use_txn && !FLAGS_use_blob_db) { + s = PrepareOptionsForRestoredDB(&restore_options); + if (!s.ok()) { + from = "PrepareRestoredDBOptions in backup/restore"; + } + } if (s.ok() && !FLAGS_use_txn && !FLAGS_use_blob_db) { - Options restore_options(options_); - restore_options.best_efforts_recovery = false; - restore_options.listeners.clear(); - // Avoid dangling/shared file descriptors, for reliable destroy - restore_options.sst_file_manager = nullptr; std::vector cf_descriptors; // TODO(ajkr): `column_family_names_` is not safe to access here when // `clear_column_family_one_in != 0`. But we can't easily switch to @@ -1889,6 +1894,48 @@ Status StressTest::TestBackupRestore( return s; } +Status StressTest::PrepareOptionsForRestoredDB(Options* options) { + assert(options); + // To avoid race with other threads' operations (e.g, SetOptions()) + // on the same pointer sub-option (e.g, `std::shared_ptr + // filter_policy`) while having the same settings as `options_`, we create a + // new Options object from `options_`'s string to deep copy these pointer + // sub-options + Status s; + ConfigOptions config_opts; + + std::string db_options_str; + s = GetStringFromDBOptions(config_opts, options_, &db_options_str); + if (!s.ok()) { + return s; + } + DBOptions db_options; + s = GetDBOptionsFromString(config_opts, Options(), db_options_str, + &db_options); + if (!s.ok()) { + return s; + } + + std::string cf_options_str; + s = GetStringFromColumnFamilyOptions(config_opts, options_, &cf_options_str); + if (!s.ok()) { + return s; + } + ColumnFamilyOptions cf_options; + s = GetColumnFamilyOptionsFromString(config_opts, Options(), cf_options_str, + &cf_options); + if (!s.ok()) { + return s; + } + + *options = Options(db_options, cf_options); + options->best_efforts_recovery = false; + options->listeners.clear(); + // Avoid dangling/shared file descriptors, for reliable destroy + options->sst_file_manager = nullptr; + + return Status::OK(); +} Status StressTest::TestApproximateSize( ThreadState* thread, uint64_t iteration, const std::vector& rand_column_families, diff --git a/db_stress_tool/db_stress_test_base.h b/db_stress_tool/db_stress_test_base.h index fad4926aa..424570b33 100644 --- a/db_stress_tool/db_stress_test_base.h +++ b/db_stress_tool/db_stress_test_base.h @@ -194,6 +194,8 @@ class StressTest { const std::vector& rand_column_families, const std::vector& rand_keys); + virtual Status PrepareOptionsForRestoredDB(Options* options); + virtual Status TestCheckpoint(ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys); From 60df39e5307ed947f411b8cc19aa29110425601e Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Sat, 28 Oct 2023 09:50:52 -0700 Subject: [PATCH 240/386] Rate limiting stale sst files' deletion during recovery (#12016) Summary: As titled. If SstFileManager is available, deleting stale sst files will be delegated to it so it can be rate limited. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12016 Reviewed By: hx235 Differential Revision: D50670482 Pulled By: jowlyzhang fbshipit-source-id: bde5b76ea1d98e67f6b4f08bfba3db48e46aab4e --- db/db_impl/db_impl.h | 5 ++-- db/db_impl/db_impl_files.cc | 2 +- db/db_impl/db_impl_open.cc | 7 +++-- db/db_sst_test.cc | 26 ++++++++++++++++--- ..._file_deletion_on_recovery_rate_limited.md | 1 + 5 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 621177ef1..6769d7f50 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1393,8 +1393,9 @@ class DBImpl : public DB { autovector cfds_; autovector mutable_cf_opts_; autovector> edit_lists_; - // files_to_delete_ contains sst files - std::unordered_set files_to_delete_; + // Stale SST files to delete found upon recovery. This stores a mapping from + // such a file's absolute path to its parent directory. + std::unordered_map files_to_delete_; bool is_new_db_ = false; }; diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 8f72e17f9..f1e8813f0 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -1003,7 +1003,7 @@ Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) { if (type == kTableFile && number >= next_file_number && recovery_ctx->files_to_delete_.find(normalized_fpath) == recovery_ctx->files_to_delete_.end()) { - recovery_ctx->files_to_delete_.emplace(normalized_fpath); + recovery_ctx->files_to_delete_.emplace(normalized_fpath, path); } } } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 5fd4d8f59..a67de6ff1 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -942,8 +942,11 @@ Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) { recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir()); if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) { mutex_.Unlock(); - for (const auto& fname : recovery_ctx.files_to_delete_) { - s = env_->DeleteFile(fname); + for (const auto& stale_sst_file : recovery_ctx.files_to_delete_) { + s = DeleteDBFile(&immutable_db_options_, stale_sst_file.first, + stale_sst_file.second, + /*force_bg=*/false, + /*force_fg=*/false); if (!s.ok()) { break; } diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index 4293e77a1..95ed405a2 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -937,12 +937,21 @@ INSTANTIATE_TEST_CASE_P(DBWALTestWithParam, DBWALTestWithParam, ::testing::Values(std::make_tuple("", true), std::make_tuple("_wal_dir", false))); -TEST_F(DBSSTTest, OpenDBWithExistingTrash) { +TEST_F(DBSSTTest, OpenDBWithExistingTrashAndObsoleteSstFile) { Options options = CurrentOptions(); - options.sst_file_manager.reset( NewSstFileManager(env_, nullptr, "", 1024 * 1024 /* 1 MB/sec */)); auto sfm = static_cast(options.sst_file_manager.get()); + // Set an extra high trash ratio to prevent immediate/non-rate limited + // deletions + sfm->SetDeleteRateBytesPerSecond(1024 * 1024); + sfm->delete_scheduler()->SetMaxTrashDBRatio(1000.0); + + int bg_delete_file = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::DeleteTrashFile:DeleteFile", + [&](void* /*arg*/) { bg_delete_file++; }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); Destroy(last_options_); @@ -951,13 +960,22 @@ TEST_F(DBSSTTest, OpenDBWithExistingTrash) { ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash")); ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash")); ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash")); - - // Reopen the DB and verify that it deletes existing trash files + // Manually add an obsolete sst file. Obsolete SST files are discovered and + // deleted upon recovery. + constexpr uint64_t kSstFileNumber = 100; + const std::string kObsoleteSstFile = + MakeTableFileName(dbname_, kSstFileNumber); + ASSERT_OK(WriteStringToFile(env_, "abc", kObsoleteSstFile)); + + // Reopen the DB and verify that it deletes existing trash files and obsolete + // SST files with rate limiting. Reopen(options); sfm->WaitForEmptyTrash(); ASSERT_NOK(env_->FileExists(dbname_ + "/" + "001.sst.trash")); ASSERT_NOK(env_->FileExists(dbname_ + "/" + "002.sst.trash")); ASSERT_NOK(env_->FileExists(dbname_ + "/" + "003.sst.trash")); + ASSERT_NOK(env_->FileExists(kObsoleteSstFile)); + ASSERT_EQ(bg_delete_file, 4); } // Create a DB with 2 db_paths, and generate multiple files in the 2 diff --git a/unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md b/unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md new file mode 100644 index 000000000..e57647ea9 --- /dev/null +++ b/unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md @@ -0,0 +1 @@ +Deleting stale files upon recovery are delegated to SstFileManger if available so they can be rate limited. \ No newline at end of file From b3fd3838d4cccd0ab0f380436e48904feb259364 Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Mon, 30 Oct 2023 10:08:19 -0700 Subject: [PATCH 241/386] Remove build dependencies for java tests. (#12021) Summary: Final fix for https://github.com/facebook/rocksdb/issues/12013 - Reverting back changes on CirleCI explicit image declaration. - Removed CMake dependencies between java classed and java test classes. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12021 Reviewed By: akankshamahajan15 Differential Revision: D50745392 Pulled By: ajkr fbshipit-source-id: 6a7a1da1e7e4da8da72130c9272915974e10fffc --- .circleci/config.yml | 1 - java/CMakeLists.txt | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 952aa5409..711e471c1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -600,7 +600,6 @@ jobs: build-windows-vs2019: executor: name: win/server-2019 - version: 2023.08.1 size: 2xlarge environment: THIRDPARTY_HOME: C:/Users/circleci/thirdparty diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 45f1a6c94..97a22eb41 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -557,13 +557,12 @@ else () endif() -get_target_property(ROCKSDBJNI_CLASSES_JAR_FILE rocksdbjni_classes JAR_FILE) add_jar( rocksdbjni_test_classes SOURCES ${JAVA_MAIN_CLASSES} ${JAVA_TEST_CLASSES} - INCLUDE_JARS ${ROCKSDBJNI_CLASSES_JAR_FILE} ${JAVA_TESTCLASSPATH} + INCLUDE_JARS ${JAVA_TESTCLASSPATH} GENERATE_NATIVE_HEADERS rocksdbjni_test_headers DESTINATION ${JNI_OUTPUT_DIR} ) From 76402c034e8fa75809df3ed89a5742f044e980e9 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Mon, 30 Oct 2023 10:47:04 -0700 Subject: [PATCH 242/386] Fix incorrect parameters order in env_basic_test.cc (#11997) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11997 Reviewed By: jaykorean Differential Revision: D50608182 Pulled By: ajkr fbshipit-source-id: d33cfdb5adfea91175c8fa21e8b80e22f728f6c6 --- env/env_basic_test.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/env/env_basic_test.cc b/env/env_basic_test.cc index 11b07509c..93bb2dba0 100644 --- a/env/env_basic_test.cc +++ b/env/env_basic_test.cc @@ -57,10 +57,10 @@ static Env* GetTestEnv() { static std::shared_ptr env_guard; static Env* custom_env = nullptr; if (custom_env == nullptr) { - const char* uri = getenv("TEST_ENV_URI"); - if (uri != nullptr) { - EXPECT_OK(Env::CreateFromUri(ConfigOptions(), uri, "", &custom_env, - &env_guard)); + const char* env_uri = getenv("TEST_ENV_URI"); + if (env_uri != nullptr) { + EXPECT_OK(Env::CreateFromUri(ConfigOptions(), env_uri, /*fs_uri=*/"", + &custom_env, &env_guard)); } } EXPECT_NE(custom_env, nullptr); @@ -71,10 +71,10 @@ static Env* GetTestFS() { static std::shared_ptr fs_env_guard; static Env* fs_env = nullptr; if (fs_env == nullptr) { - const char* uri = getenv("TEST_FS_URI"); - if (uri != nullptr) { - EXPECT_OK( - Env::CreateFromUri(ConfigOptions(), uri, "", &fs_env, &fs_env_guard)); + const char* fs_uri = getenv("TEST_FS_URI"); + if (fs_uri != nullptr) { + EXPECT_OK(Env::CreateFromUri(ConfigOptions(), /*env_uri=*/"", fs_uri, + &fs_env, &fs_env_guard)); } } EXPECT_NE(fs_env, nullptr); From 2818a74b95f0bab434dc65f0d271ac7a27c787a7 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Tue, 31 Oct 2023 07:39:41 -0700 Subject: [PATCH 243/386] Initialize merge operator explicitly in PrepareOptionsForRestoredDB() (#12033) Summary: We are seeing the following stress test failure: `Failure in DB::Get in backup/restore with: Invalid argument: merge_operator is not properly initialized. Verification failed: Backup/restore failed: Invalid argument: merge_operator is not properly initialized.`. The reason is likely that `GetColumnFamilyOptionsFromString()` does not set merge operator if it's a customized merge operator. Fixing it by initializing merge operator explicitly. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12033 Test Plan: this repro gives the error consistently before this PR ``` ./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=0 --allow_concurrent_memtable_write=1 --allow_data_in_errors=True --async_io=0 --atomic_flush=1 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_max_size=1048576000000 --backup_one_in=50 --batch_protection_bytes_per_key=8 --block_protection_bytes_per_key=2 --block_size=16384 --bloom_before_level=2147483646 --bloom_bits=31.014388066505518 --bottommost_compression_type=lz4hc --bottommost_file_compaction_delay=0 --bytes_per_sync=0 --cache_index_and_filter_blocks=0 --cache_size=33554432 --cache_type=fixed_hyper_clock_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=1 --charge_filter_construction=0 --charge_table_reader=1 --checkpoint_one_in=1000000 --checksum_type=kxxHash --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=3 --compaction_readahead_size=0 --compaction_ttl=10 --compressed_secondary_cache_ratio=0.0 --compressed_secondary_cache_size=0 --compression_checksum=1 --compression_max_dict_buffer_bytes=4095 --compression_max_dict_bytes=16384 --compression_parallel_threads=1 --compression_type=none --compression_use_zstd_dict_trainer=1 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=0 --db=/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=0 --delpercent=4 --delrangepercent=1 --destroy_db_initially=1 --detect_filter_construct_corruption=0 --disable_wal=1 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=0 --expected_values_dir=/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=0 --file_checksum_impl=xxh64 --flush_one_in=1000000 --format_version=2 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=10 --index_type=2 --ingest_external_file_one_in=0 --initial_auto_readahead_size=16384 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --long_running_snapshots=1 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=10 --max_auto_readahead_size=524288 --max_background_compactions=1 --max_bytes_for_level_base=67108864 --max_key=100 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=1048576 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=8388608 --memtable_max_range_deletions=1000 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=2 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=2 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=0 --open_files=-1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=16 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=0 --partition_filters=0 --partition_pinning=0 --pause_background_one_in=1000000 --periodic_compaction_seconds=0 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=1 --preserve_internal_time_seconds=0 --progress_reports=0 --read_fault_one_in=0 --readahead_size=0 --readpercent=50 --recycle_log_file_num=0 --reopen=0 --secondary_cache_fault_one_in=0 --set_options_one_in=0 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --subcompactions=1 --sync=0 --sync_fault_injection=1 --target_file_size_base=16777216 --target_file_size_multiplier=1 --test_batches_snapshots=0 --top_level_index_pinning=0 --unpartitioned_pinning=1 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=0 --use_merge=1 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=10 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=100000 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=524288 --wal_compression=zstd --write_buffer_size=33554432 --write_dbid_to_manifest=0 --write_fault_one_in=0 --writepercent=35 ``` Reviewed By: hx235 Differential Revision: D50825558 Pulled By: cbi42 fbshipit-source-id: 8468dc0444c112415a515af8291ef3abec8a42de --- db_stress_tool/db_stress_test_base.cc | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 5263090b5..1d8bdc428 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1894,6 +1894,18 @@ Status StressTest::TestBackupRestore( return s; } +void InitializeMergeOperator(Options& options) { + if (FLAGS_use_full_merge_v1) { + options.merge_operator = MergeOperators::CreateDeprecatedPutOperator(); + } else { + if (FLAGS_use_put_entity_one_in > 0) { + options.merge_operator = std::make_shared(); + } else { + options.merge_operator = MergeOperators::CreatePutOperator(); + } + } +} + Status StressTest::PrepareOptionsForRestoredDB(Options* options) { assert(options); // To avoid race with other threads' operations (e.g, SetOptions()) @@ -1933,6 +1945,8 @@ Status StressTest::PrepareOptionsForRestoredDB(Options* options) { options->listeners.clear(); // Avoid dangling/shared file descriptors, for reliable destroy options->sst_file_manager = nullptr; + // GetColumnFamilyOptionsFromString does not create customized merge operator + InitializeMergeOperator(*options); return Status::OK(); } @@ -3418,15 +3432,8 @@ void InitializeOptionsFromFlags( options.memtable_factory.reset(new VectorRepFactory()); break; } - if (FLAGS_use_full_merge_v1) { - options.merge_operator = MergeOperators::CreateDeprecatedPutOperator(); - } else { - if (FLAGS_use_put_entity_one_in > 0) { - options.merge_operator = std::make_shared(); - } else { - options.merge_operator = MergeOperators::CreatePutOperator(); - } - } + + InitializeMergeOperator(options); if (FLAGS_enable_compaction_filter) { options.compaction_filter_factory = From e0c45c15a7af615435d1f346cf8b712e6e749f5b Mon Sep 17 00:00:00 2001 From: Adam Retter Date: Tue, 31 Oct 2023 12:23:34 -0700 Subject: [PATCH 244/386] Fix the ZStd checksum (#12005) Summary: Somehow we had the wrong checksum when validating the ZStd 1.5.5 download for RocksJava in the previous Pull Request - https://github.com/facebook/rocksdb/pull/9304. This PR fixes that. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12005 Reviewed By: jaykorean Differential Revision: D50840338 Pulled By: cbi42 fbshipit-source-id: 8a92779d3bef013d812eecb89aaaf33fc73991ec --- .circleci/config.yml | 7 +++++-- Makefile | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 711e471c1..aaeb8538e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -227,6 +227,9 @@ executors: # $ docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -it zjay437/rocksdb:0.5 bash # option `--cap-add=SYS_PTRACE --security-opt seccomp=unconfined` is used to enable gdb to attach an existing process - image: zjay437/rocksdb:0.6 + linux-java-docker: + docker: + - image: evolvedbinary/rocksjava:centos6_x64-be jobs: build-macos: @@ -653,7 +656,7 @@ jobs: - post-pmd-steps build-linux-java-static: - executor: linux-docker + executor: linux-java-docker resource_class: large steps: - pre-steps @@ -666,7 +669,7 @@ jobs: which javac && javac -version - run: name: "Build RocksDBJava Static Library" - command: make V=1 J=8 -j8 rocksdbjavastatic + command: scl enable devtoolset-7 'make V=1 J=8 -j8 rocksdbjavastatic' - post-steps build-macos-java: diff --git a/Makefile b/Makefile index 904983e8b..3f308c6dd 100644 --- a/Makefile +++ b/Makefile @@ -2107,7 +2107,7 @@ LZ4_VER ?= 1.9.4 LZ4_SHA256 ?= 0b0e3aa07c8c063ddf40b082bdf7e37a1562bda40a0ff5272957f3e987e0e54b LZ4_DOWNLOAD_BASE ?= https://github.com/lz4/lz4/archive ZSTD_VER ?= 1.5.5 -ZSTD_SHA256 ?= 9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4 +ZSTD_SHA256 ?= 98e9c3d949d1b924e28e01eccb7deed865eefebf25c2f21c702e5cd5b63b85e1 ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 From 0b057a7accbf3f7a123432277a8586e9236714ac Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 31 Oct 2023 16:10:48 -0700 Subject: [PATCH 245/386] Initialize comparator explicitly in PrepareOptionsForRestoredDB() (#12034) Summary: This is to fix below error seeing in stress test: ``` Failure in DB::Open in backup/restore with: Invalid argument: Cannot open a column family and disable user-defined timestamps feature if its existing persist_user_defined_timestamps flag is not false. ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12034 Reviewed By: cbi42 Differential Revision: D50860689 Pulled By: jowlyzhang fbshipit-source-id: ebc6cf0a75caa43d3d3bd58e3d5c2ac754cc637c --- db_stress_tool/db_stress_test_base.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 1d8bdc428..14814d51e 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1945,8 +1945,12 @@ Status StressTest::PrepareOptionsForRestoredDB(Options* options) { options->listeners.clear(); // Avoid dangling/shared file descriptors, for reliable destroy options->sst_file_manager = nullptr; - // GetColumnFamilyOptionsFromString does not create customized merge operator + // GetColumnFamilyOptionsFromString does not create customized merge operator, + // and comparator. InitializeMergeOperator(*options); + if (FLAGS_user_timestamp_size > 0) { + CheckAndSetOptionsForUserTimestamp(*options); + } return Status::OK(); } From 04225a2cfa41c89b88b1f93b7498dd2c225105cb Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Tue, 31 Oct 2023 16:13:36 -0700 Subject: [PATCH 246/386] Fix for RecoverFromRetryableBGIOError starting with recovery_in_prog_ false (#11991) Summary: cbi42 helped investigation and found a potential scenario where `RecoverFromRetryableBGIOError()` may start with `recovery_in_prog_ ` set as false. (and other booleans like `bg_error_` and `soft_error_no_bg_work_`) **Thread 1** - `StartRecoverFromRetryableBGIOError()`): (mutex held) sets `recovery_in_prog_ = true` **Thread 1's `recovery_thread_`** - (waits for mutex and acquires it) - `RecoverFromRetryableBGIOError()` -> `ResumeImpl()` -> `ClearBGError()`: sets `recovery_in_prog_ = false` - `ClearBGError()` -> `NotifyOnErrorRecoveryEnd()`: releases `mutex` **Thread 2** - `StartRecoverFromRetryableBGIOError()`): (mutex held) sets `recovery_in_prog_ = true` - Waits for Thread 1 (`recovery_thread_`) to finish **Thread 1's `recovery_thread_`** - re-lock mutex in `NotifyOnErrorRecoveryEnd()` - Still inside `RecoverFromRetryableBGIOError()`: sets `recovery_in_prog_ = false` - Done **Thread 2's `recovery_thread_`** - recovery thread started with `recovery_in_prog_` set as `false` # Fix - Remove double-clearing `bg_error_`, `recovery_in_prog_` and other fields after `ResumeImpl()` already returned `OK()`. - Minor typo and linter fixes in `DBErrorHandlingFSTest` Pull Request resolved: https://github.com/facebook/rocksdb/pull/11991 Test Plan: - `DBErrorHandlingFSTest::MultipleRecoveryThreads` added to reproduce the scenario. - Adding `assert(recovery_in_prog_);` at the start of `ErrorHandler::RecoverFromRetryableBGIOError()` fails the test without the fix and succeeds with the fix as expected. Reviewed By: cbi42 Differential Revision: D50506113 Pulled By: jaykorean fbshipit-source-id: 6dabe01e9ecd3fc50bbe9019587f2f4858bed9c6 --- db/error_handler.cc | 17 +-- db/error_handler_fs_test.cc | 282 +++++++++++++++++++++++------------- db/event_helpers.cc | 2 + 3 files changed, 191 insertions(+), 110 deletions(-) diff --git a/db/error_handler.cc b/db/error_handler.cc index 21c3a686f..6797371b9 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -556,6 +556,7 @@ Status ErrorHandler::ClearBGError() { // old_bg_error is only for notifying listeners, so may not be checked old_bg_error.PermitUncheckedError(); // Clear and check the recovery IO and BG error + is_db_stopped_.store(false, std::memory_order_release); bg_error_ = Status::OK(); recovery_error_ = IOStatus::OK(); bg_error_.PermitUncheckedError(); @@ -671,11 +672,14 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( // wait the previous recover thread to finish and create a new thread // to recover from the bg error. db_mutex_->Unlock(); + TEST_SYNC_POINT( + "StartRecoverFromRetryableBGIOError:BeforeWaitingForOtherThread"); old_recovery_thread->join(); + TEST_SYNC_POINT( + "StartRecoverFromRetryableBGIOError:AfterWaitingForOtherThread"); db_mutex_->Lock(); } - TEST_SYNC_POINT("StartRecoverFromRetryableBGIOError::in_progress"); recovery_thread_.reset( new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this)); @@ -689,6 +693,7 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError( // Automatic recover from Retryable BG IO error. Must be called after db // mutex is released. void ErrorHandler::RecoverFromRetryableBGIOError() { + assert(recovery_in_prog_); TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart"); TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeStart2"); InstrumentedMutexLock l(db_mutex_); @@ -754,22 +759,12 @@ void ErrorHandler::RecoverFromRetryableBGIOError() { // recover from the retryable IO error and no other BG errors. Clean // the bg_error and notify user. TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverSuccess"); - Status old_bg_error = bg_error_; - is_db_stopped_.store(false, std::memory_order_release); - bg_error_ = Status::OK(); - bg_error_.PermitUncheckedError(); - EventHelpers::NotifyOnErrorRecoveryEnd( - db_options_.listeners, old_bg_error, bg_error_, db_mutex_); if (bg_error_stats_ != nullptr) { RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT); RecordInHistogram(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count); } - recovery_in_prog_ = false; - if (soft_error_no_bg_work_) { - soft_error_no_bg_work_ = false; - } return; } else { // In this case: 1) recovery_error_ is more serious or not retryable diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 56a0f57aa..56d2fcfc0 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -155,9 +155,9 @@ class ErrorHandlerFSListener : public EventListener { FaultInjectionTestFS* fault_fs_; }; -TEST_F(DBErrorHandlingFSTest, FLushWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWriteError) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -200,9 +200,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) { // All the NoSpace IOError will be handled as the regular BG Error no matter the // retryable flag is set of not. So the auto resume for retryable IO Error will // not be triggered. Also, it is mapped as hard error. -TEST_F(DBErrorHandlingFSTest, FLushWriteNoSpaceError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWriteNoSpaceError) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -244,9 +244,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteNoSpaceError) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWriteRetryableError) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -318,9 +318,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWriteFileScopeError) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -402,9 +402,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, FLushWALWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWALWriteRetryableError) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -446,9 +446,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWALWriteRetryableError) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, FLushWALAtomicWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWALAtomicWriteRetryableError) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -492,9 +492,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWALAtomicWriteRetryableError) { } // The flush error is injected before we finish the table build -TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWritNoWALRetryableError1) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -548,9 +548,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) { } // The retryable IO error is injected before we sync table -TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWriteNoWALRetryableError2) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -592,9 +592,9 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) { } // The retryable IO error is injected before we close the table file -TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError3) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_F(DBErrorHandlingFSTest, FlushWriteNoWALRetryableError3) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -636,8 +636,8 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError3) { } TEST_F(DBErrorHandlingFSTest, ManifestWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -677,8 +677,8 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteError) { } TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -720,8 +720,8 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { } TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -766,8 +766,8 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) { } TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -811,8 +811,8 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) { } TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -863,8 +863,8 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteError) { ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); return; } - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -933,8 +933,8 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteError) { } TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1005,8 +1005,8 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableError) { } TEST_F(DBErrorHandlingFSTest, CompactionWriteError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1047,8 +1047,8 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteError) { } TEST_F(DBErrorHandlingFSTest, DISABLED_CompactionWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1094,8 +1094,8 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_CompactionWriteRetryableError) { } TEST_F(DBErrorHandlingFSTest, DISABLED_CompactionWriteFileScopeError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1186,8 +1186,8 @@ TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) { ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); return; } - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1231,8 +1231,8 @@ TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) { } TEST_F(DBErrorHandlingFSTest, FailRecoverFlushError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1260,8 +1260,8 @@ TEST_F(DBErrorHandlingFSTest, WALWriteError) { ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); return; } - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1333,8 +1333,8 @@ TEST_F(DBErrorHandlingFSTest, WALWriteError) { } TEST_F(DBErrorHandlingFSTest, WALWriteRetryableError) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1428,8 +1428,8 @@ TEST_F(DBErrorHandlingFSTest, MultiCFWALWriteError) { ROCKSDB_GTEST_SKIP("Test requires non-mock environment"); return; } - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1760,10 +1760,10 @@ TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) { // to soft error and trigger auto resume. During auto resume, SwitchMemtable // is disabled to avoid small SST tables. Write can still be applied before // the bg error is cleaned unless the memtable is full. -TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) { +TEST_F(DBErrorHandlingFSTest, FlushWritNoWALRetryableErrorAutoRecover1) { // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1784,7 +1784,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) { ASSERT_OK(Put(Key(1), "val1", wo)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( {{"RecoverFromRetryableBGIOError:LoopOut", - "FLushWritNoWALRetryableeErrorAutoRecover1:1"}}); + "FlushWritNoWALRetryableeErrorAutoRecover1:1"}}); SyncPoint::GetInstance()->SetCallBack( "BuildTable:BeforeFinishBuildTable", [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); @@ -1793,7 +1793,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) { s = Flush(); ASSERT_EQ("val1", Get(Key(1))); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritNoWALRetryableeErrorAutoRecover1:1"); + TEST_SYNC_POINT("FlushWritNoWALRetryableeErrorAutoRecover1:1"); ASSERT_EQ("val1", Get(Key(1))); ASSERT_EQ("val1", Get(Key(1))); SyncPoint::GetInstance()->DisableProcessing(); @@ -1830,10 +1830,94 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) { +TEST_F(DBErrorHandlingFSTest, MultipleRecoveryThreads) { + // This test creates a scenario where second write's recovery can get started + // while mutex is released for a short period during + // NotifyOnErrorRecoveryEnd() from the first write's recovery. This is to make + // sure RecoverFromRetryableBGIOError() from the second write's recovery + // thread does not start with recovery_in_prog_ = false; + + std::shared_ptr listener = + std::make_shared(); + Options options = GetDefaultOptions(); + options.env = fault_env_.get(); + options.create_if_missing = true; + options.listeners.emplace_back(listener); + options.max_bgerror_resume_count = 100; + options.bgerror_resume_retry_interval = 1000000; // 1 second + options.statistics = CreateDBStatistics(); + + listener->EnableAutoRecovery(false); + DestroyAndReopen(options); + + IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); + error_msg.SetRetryable(true); + + WriteOptions wo = WriteOptions(); + wo.disableWAL = true; + fault_fs_->SetFilesystemActive(false, error_msg); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"NotifyOnErrorRecoveryEnd:MutexUnlocked:1", + "MultipleRecoveryThreads:1"}, + {"MultipleRecoveryThreads:2", + "NotifyOnErrorRecoveryEnd:MutexUnlocked:2"}, + {"StartRecoverFromRetryableBGIOError:BeforeWaitingForOtherThread", + "MultipleRecoveryThreads:3"}, + {"RecoverFromRetryableBGIOError:RecoverSuccess", + "MultipleRecoveryThreads:4"}, + {"MultipleRecoveryThreads:4", + "StartRecoverFromRetryableBGIOError:AfterWaitingForOtherThread"}}); + SyncPoint::GetInstance()->EnableProcessing(); + + // First write with read fault injected and recovery will start + { + ASSERT_OK(Put(Key(1), "val1", wo)); + Status s = Flush(); + ASSERT_NOK(s); + } + // Remove read fault injection so that first recovery can go through + fault_fs_->SetFilesystemActive(true); + + // At this point, first recovery is now at NotifyOnErrorRecoveryEnd. Mutex is + // released. + TEST_SYNC_POINT("MultipleRecoveryThreads:1"); + + ROCKSDB_NAMESPACE::port::Thread second_write([&] { + // Second write with read fault injected + fault_fs_->SetFilesystemActive(false, error_msg); + ASSERT_OK(Put(Key(2), "val2", wo)); + Status s = Flush(); + ASSERT_NOK(s); + }); + // Second bg thread before waiting for the first thread's recovery thread + TEST_SYNC_POINT("MultipleRecoveryThreads:3"); + // First thread's recovery thread continues + TEST_SYNC_POINT("MultipleRecoveryThreads:2"); + // Wait for the first thread's recovery to finish + // (this sets recovery_in_prog_ = false) + // And second thread continues and starts recovery thread + TEST_SYNC_POINT("MultipleRecoveryThreads:4"); + second_write.join(); + // Remove error injection so that second thread recovery can go through + fault_fs_->SetFilesystemActive(true); + + // Set up sync point so that we can wait for the recovery thread to finish + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"RecoverFromRetryableBGIOError:RecoverSuccess", + "MultipleRecoveryThreads:6"}}); + + // Wait for the second thread's recovery to be done + TEST_SYNC_POINT("MultipleRecoveryThreads:6"); + + SyncPoint::GetInstance()->DisableProcessing(); + SyncPoint::GetInstance()->ClearAllCallBacks(); + Destroy(options); +} + +TEST_F(DBErrorHandlingFSTest, FlushWritNoWALRetryableErrorAutoRecover2) { // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1891,10 +1975,10 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) { // Auto resume fromt the flush retryable IO error. Activate the FS before the // first resume. Resume is successful -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { +TEST_F(DBErrorHandlingFSTest, FlushWritRetryableErrorAutoRecover1) { // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1933,10 +2017,10 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { // Auto resume fromt the flush retryable IO error and set the retry limit count. // Never activate the FS and auto resume should fail at the end -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { +TEST_F(DBErrorHandlingFSTest, FlushWritRetryableErrorAutoRecover2) { // Fail all the resume and let user to resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -1953,18 +2037,18 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { ASSERT_OK(Put(Key(1), "val1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"FLushWritRetryableeErrorAutoRecover2:0", + {{"FlushWritRetryableeErrorAutoRecover2:0", "RecoverFromRetryableBGIOError:BeforeStart"}, {"RecoverFromRetryableBGIOError:LoopOut", - "FLushWritRetryableeErrorAutoRecover2:1"}}); + "FlushWritRetryableeErrorAutoRecover2:1"}}); SyncPoint::GetInstance()->SetCallBack( "BuildTable:BeforeFinishBuildTable", [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:0"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:1"); + TEST_SYNC_POINT("FlushWritRetryableeErrorAutoRecover2:0"); + TEST_SYNC_POINT("FlushWritRetryableeErrorAutoRecover2:1"); fault_fs_->SetFilesystemActive(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); @@ -1986,8 +2070,8 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { // Fail the first resume and let the second resume be successful. TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { // Fail the first resume and let the second resume be successful - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2039,8 +2123,8 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableErrorAutoRecover) { // Fail the first resume and let the second resume be successful - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2094,8 +2178,8 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableErrorAutoRecover) { TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableErrorAutoRecover) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2184,8 +2268,8 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableErrorAutoRecover) { // compaction, the FS is set to active and compaction is successful, so // the test will hit the CompactionJob::FinishCompactionOutputFile1 sync // point. - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2238,8 +2322,8 @@ TEST_F(DBErrorHandlingFSTest, CompactionWriteRetryableErrorAutoRecover) { } TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2341,8 +2425,8 @@ TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) { TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover2) { // Fail the first recover and try second time. - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2442,10 +2526,10 @@ TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover2) { // Fail auto resume from a flush retryable error and verify that // OnErrorRecoveryEnd listener callback is called -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAbortRecovery) { +TEST_F(DBErrorHandlingFSTest, FlushWritRetryableErrorAbortRecovery) { // Activate the FS before the first resume - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2736,9 +2820,9 @@ TEST_F(DBErrorHandlingFSTest, CompactionReadRetryableErrorAutoRecover) { class DBErrorHandlingFencingTest : public DBErrorHandlingFSTest, public testing::WithParamInterface {}; -TEST_P(DBErrorHandlingFencingTest, FLushWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); +TEST_P(DBErrorHandlingFencingTest, FlushWriteFenced) { + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2765,8 +2849,8 @@ TEST_P(DBErrorHandlingFencingTest, FLushWriteFenced) { } TEST_P(DBErrorHandlingFencingTest, ManifestWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2800,8 +2884,8 @@ TEST_P(DBErrorHandlingFencingTest, ManifestWriteFenced) { } TEST_P(DBErrorHandlingFencingTest, CompactionWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; @@ -2841,8 +2925,8 @@ TEST_P(DBErrorHandlingFencingTest, CompactionWriteFenced) { } TEST_P(DBErrorHandlingFencingTest, WALWriteFenced) { - std::shared_ptr listener( - new ErrorHandlerFSListener()); + std::shared_ptr listener = + std::make_shared(); Options options = GetDefaultOptions(); options.env = fault_env_.get(); options.create_if_missing = true; diff --git a/db/event_helpers.cc b/db/event_helpers.cc index 700c5f22c..65f6a5a48 100644 --- a/db/event_helpers.cc +++ b/db/event_helpers.cc @@ -230,6 +230,8 @@ void EventHelpers::NotifyOnErrorRecoveryEnd( db_mutex->AssertHeld(); // release lock while notifying events db_mutex->Unlock(); + TEST_SYNC_POINT("NotifyOnErrorRecoveryEnd:MutexUnlocked:1"); + TEST_SYNC_POINT("NotifyOnErrorRecoveryEnd:MutexUnlocked:2"); for (auto& listener : listeners) { BackgroundErrorRecoveryInfo info; info.old_bg_error = old_bg_error; From 4b013dcbed2df84fde3901d7655b9b91c557454d Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 1 Nov 2023 12:04:11 -0700 Subject: [PATCH 247/386] Remove VersionEdit's friends pattern (#12024) Summary: Almost each of VersionEdit private member has its own getter and setter. Current code access them with a combination of directly accessing private members and via getter and setters. There is no obvious benefits to have this pattern except potential performance gains. I tried this simple benchmark for removing the friends pattern completely, and there is no obvious regression. So I think it would good to remove VersionEdit's friends completely. ```TEST_TMPDIR=/dev/shm/rocksdb1 ./db_bench -benchmarks=fillseq -memtablerep=vector -allow_concurrent_memtable_write=false -num_column_families=10 -num=50000000``` With change: fillseq : 2.994 micros/op 333980 ops/sec 149.710 seconds 50000000 operations; 36.9 MB/s fillseq : 3.033 micros/op 329656 ops/sec 151.673 seconds 50000000 operations; 36.5 MB/s fillseq : 2.991 micros/op 334369 ops/sec 149.535 seconds 50000000 operations; 37.0 MB/s Without change: fillseq : 3.015 micros/op 331715 ops/sec 150.732 seconds 50000000 operations; 36.7 MB/s fillseq : 3.044 micros/op 328553 ops/sec 152.182 seconds 50000000 operations; 36.3 MB/s fillseq : 3.091 micros/op 323520 ops/sec 154.550 seconds 50000000 operations; 35.8 MB/s Pull Request resolved: https://github.com/facebook/rocksdb/pull/12024 Reviewed By: pdillinger Differential Revision: D50806066 Pulled By: jowlyzhang fbshipit-source-id: 35d287ce638a38c30f243f85992e615b4c90eb27 --- db/version_edit.h | 15 ++-- db/version_edit_handler.cc | 151 ++++++++++++++++++------------------- db/version_edit_handler.h | 4 +- db/version_set.cc | 80 ++++++++++---------- 4 files changed, 123 insertions(+), 127 deletions(-) diff --git a/db/version_edit.h b/db/version_edit.h index 5d7687204..80792e496 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -618,6 +618,8 @@ class VersionEdit { } uint32_t GetColumnFamily() const { return column_family_; } + const std::string& GetColumnFamilyName() const { return column_family_name_; } + // set column family ID by calling SetColumnFamily() void AddColumnFamily(const std::string& name) { assert(!is_column_family_drop_); @@ -648,6 +650,9 @@ class VersionEdit { remaining_entries_ = remaining_entries; } bool IsInAtomicGroup() const { return is_in_atomic_group_; } + void SetRemainingEntries(uint32_t remaining_entries) { + remaining_entries_ = remaining_entries; + } uint32_t GetRemainingEntries() const { return remaining_entries_; } bool HasFullHistoryTsLow() const { return !full_history_ts_low_.empty(); } @@ -678,16 +683,6 @@ class VersionEdit { std::string DebugJSON(int edit_num, bool hex_key = false) const; private: - friend class ReactiveVersionSet; - friend class VersionEditHandlerBase; - friend class ListColumnFamiliesHandler; - friend class VersionEditHandler; - friend class VersionEditHandlerPointInTime; - friend class DumpManifestHandler; - friend class VersionSet; - friend class Version; - friend class AtomicGroupReadBuffer; - bool GetLevel(Slice* input, int* level, const char** msg); const char* DecodeNewFile4From(Slice* input); diff --git a/db/version_edit_handler.cc b/db/version_edit_handler.cc index 7f8e30390..90afc0938 100644 --- a/db/version_edit_handler.cc +++ b/db/version_edit_handler.cc @@ -43,7 +43,7 @@ void VersionEditHandlerBase::Iterate(log::Reader& reader, break; } ColumnFamilyData* cfd = nullptr; - if (edit.is_in_atomic_group_) { + if (edit.IsInAtomicGroup()) { if (read_buffer_.IsFull()) { for (auto& e : read_buffer_.replay_buffer()) { s = ApplyVersionEdit(e, &cfd); @@ -101,20 +101,18 @@ void VersionEditHandlerBase::Iterate(log::Reader& reader, Status ListColumnFamiliesHandler::ApplyVersionEdit( VersionEdit& edit, ColumnFamilyData** /*unused*/) { Status s; - if (edit.is_column_family_add_) { - if (column_family_names_.find(edit.column_family_) != - column_family_names_.end()) { + uint32_t cf_id = edit.GetColumnFamily(); + if (edit.IsColumnFamilyAdd()) { + if (column_family_names_.find(cf_id) != column_family_names_.end()) { s = Status::Corruption("Manifest adding the same column family twice"); } else { - column_family_names_.insert( - {edit.column_family_, edit.column_family_name_}); + column_family_names_.insert({cf_id, edit.GetColumnFamilyName()}); } - } else if (edit.is_column_family_drop_) { - if (column_family_names_.find(edit.column_family_) == - column_family_names_.end()) { + } else if (edit.IsColumnFamilyDrop()) { + if (column_family_names_.find(cf_id) == column_family_names_.end()) { s = Status::Corruption("Manifest - dropping non-existing column family"); } else { - column_family_names_.erase(edit.column_family_); + column_family_names_.erase(cf_id); } } return s; @@ -201,9 +199,9 @@ Status VersionEditHandler::Initialize() { Status VersionEditHandler::ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) { Status s; - if (edit.is_column_family_add_) { + if (edit.IsColumnFamilyAdd()) { s = OnColumnFamilyAdd(edit, cfd); - } else if (edit.is_column_family_drop_) { + } else if (edit.IsColumnFamilyDrop()) { s = OnColumnFamilyDrop(edit, cfd); } else if (edit.IsWalAddition()) { s = OnWalAddition(edit); @@ -227,22 +225,22 @@ Status VersionEditHandler::OnColumnFamilyAdd(VersionEdit& edit, assert(cfd != nullptr); *cfd = nullptr; + const std::string& cf_name = edit.GetColumnFamilyName(); Status s; if (cf_in_builders || cf_in_not_found) { s = Status::Corruption("MANIFEST adding the same column family twice: " + - edit.column_family_name_); + cf_name); } if (s.ok()) { - auto cf_options = name_to_options_.find(edit.column_family_name_); + auto cf_options = name_to_options_.find(cf_name); // implicitly add persistent_stats column family without requiring user // to specify ColumnFamilyData* tmp_cfd = nullptr; bool is_persistent_stats_column_family = - edit.column_family_name_.compare(kPersistentStatsColumnFamilyName) == 0; + cf_name.compare(kPersistentStatsColumnFamilyName) == 0; if (cf_options == name_to_options_.end() && !is_persistent_stats_column_family) { - column_families_not_found_.emplace(edit.column_family_, - edit.column_family_name_); + column_families_not_found_.emplace(edit.GetColumnFamily(), cf_name); } else { if (is_persistent_stats_column_family) { ColumnFamilyOptions cfo; @@ -270,7 +268,7 @@ Status VersionEditHandler::OnColumnFamilyDrop(VersionEdit& edit, if (cf_in_builders) { tmp_cfd = DestroyCfAndCleanup(edit); } else if (cf_in_not_found) { - column_families_not_found_.erase(edit.column_family_); + column_families_not_found_.erase(edit.GetColumnFamily()); } else { s = Status::Corruption("MANIFEST - dropping non-existing column family"); } @@ -305,10 +303,10 @@ Status VersionEditHandler::OnNonCfOperation(VersionEdit& edit, } ColumnFamilyData* tmp_cfd = nullptr; if (s.ok()) { - auto builder_iter = builders_.find(edit.column_family_); + auto builder_iter = builders_.find(edit.GetColumnFamily()); assert(builder_iter != builders_.end()); tmp_cfd = version_set_->GetColumnFamilySet()->GetColumnFamily( - edit.column_family_); + edit.GetColumnFamily()); assert(tmp_cfd != nullptr); // It's important to handle file boundaries before `MaybeCreateVersion` // because `VersionEditHandlerPointInTime::MaybeCreateVersion` does @@ -362,11 +360,12 @@ void VersionEditHandler::CheckColumnFamilyId(const VersionEdit& edit, // record. Once we encounter column family drop record, // we will delete the column family from // column_families_not_found. - bool in_not_found = column_families_not_found_.find(edit.column_family_) != + uint32_t cf_id = edit.GetColumnFamily(); + bool in_not_found = column_families_not_found_.find(cf_id) != column_families_not_found_.end(); // in builders means that user supplied that column family // option AND that we encountered column family add record - bool in_builders = builders_.find(edit.column_family_) != builders_.end(); + bool in_builders = builders_.find(cf_id) != builders_.end(); // They cannot both be true assert(!(in_not_found && in_builders)); *cf_in_not_found = in_not_found; @@ -378,17 +377,17 @@ void VersionEditHandler::CheckIterationResult(const log::Reader& reader, assert(s != nullptr); if (!s->ok()) { // Do nothing here. - } else if (!version_edit_params_.has_log_number_ || - !version_edit_params_.has_next_file_number_ || - !version_edit_params_.has_last_sequence_) { + } else if (!version_edit_params_.HasLogNumber() || + !version_edit_params_.HasNextFile() || + !version_edit_params_.HasLastSequence()) { std::string msg("no "); - if (!version_edit_params_.has_log_number_) { + if (!version_edit_params_.HasLogNumber()) { msg.append("log_file_number, "); } - if (!version_edit_params_.has_next_file_number_) { + if (!version_edit_params_.HasNextFile()) { msg.append("next_file_number, "); } - if (!version_edit_params_.has_last_sequence_) { + if (!version_edit_params_.HasLastSequence()) { msg.append("last_sequence, "); } msg = msg.substr(0, msg.size() - 2); @@ -409,11 +408,11 @@ void VersionEditHandler::CheckIterationResult(const log::Reader& reader, } if (s->ok()) { version_set_->GetColumnFamilySet()->UpdateMaxColumnFamily( - version_edit_params_.max_column_family_); + version_edit_params_.GetMaxColumnFamily()); version_set_->MarkMinLogNumberToKeep( - version_edit_params_.min_log_number_to_keep_); - version_set_->MarkFileNumberUsed(version_edit_params_.prev_log_number_); - version_set_->MarkFileNumberUsed(version_edit_params_.log_number_); + version_edit_params_.GetMinLogNumberToKeep()); + version_set_->MarkFileNumberUsed(version_edit_params_.GetPrevLogNumber()); + version_set_->MarkFileNumberUsed(version_edit_params_.GetLogNumber()); for (auto* cfd : *(version_set_->GetColumnFamilySet())) { if (cfd->IsDropped()) { continue; @@ -464,9 +463,9 @@ void VersionEditHandler::CheckIterationResult(const log::Reader& reader, if (s->ok()) { version_set_->manifest_file_size_ = reader.GetReadOffset(); assert(version_set_->manifest_file_size_ > 0); - version_set_->next_file_number_.store( - version_edit_params_.next_file_number_ + 1); - SequenceNumber last_seq = version_edit_params_.last_sequence_; + version_set_->next_file_number_.store(version_edit_params_.GetNextFile() + + 1); + SequenceNumber last_seq = version_edit_params_.GetLastSequence(); assert(last_seq != kMaxSequenceNumber); if (last_seq != kMaxSequenceNumber && last_seq > version_set_->last_allocated_sequence_.load()) { @@ -488,46 +487,46 @@ void VersionEditHandler::CheckIterationResult(const log::Reader& reader, // sequence number zeroed through compaction. version_set_->descriptor_last_sequence_ = last_seq; } - version_set_->prev_log_number_ = version_edit_params_.prev_log_number_; + version_set_->prev_log_number_ = version_edit_params_.GetPrevLogNumber(); } } ColumnFamilyData* VersionEditHandler::CreateCfAndInit( const ColumnFamilyOptions& cf_options, const VersionEdit& edit) { + uint32_t cf_id = edit.GetColumnFamily(); ColumnFamilyData* cfd = version_set_->CreateColumnFamily(cf_options, read_options_, &edit); assert(cfd != nullptr); cfd->set_initialized(); - assert(builders_.find(edit.column_family_) == builders_.end()); - builders_.emplace(edit.column_family_, + assert(builders_.find(cf_id) == builders_.end()); + builders_.emplace(cf_id, VersionBuilderUPtr(new BaseReferencedVersionBuilder(cfd))); if (track_missing_files_) { - cf_to_missing_files_.emplace(edit.column_family_, - std::unordered_set()); - cf_to_missing_blob_files_high_.emplace(edit.column_family_, - kInvalidBlobFileNumber); + cf_to_missing_files_.emplace(cf_id, std::unordered_set()); + cf_to_missing_blob_files_high_.emplace(cf_id, kInvalidBlobFileNumber); } return cfd; } ColumnFamilyData* VersionEditHandler::DestroyCfAndCleanup( const VersionEdit& edit) { - auto builder_iter = builders_.find(edit.column_family_); + uint32_t cf_id = edit.GetColumnFamily(); + auto builder_iter = builders_.find(cf_id); assert(builder_iter != builders_.end()); builders_.erase(builder_iter); if (track_missing_files_) { - auto missing_files_iter = cf_to_missing_files_.find(edit.column_family_); + auto missing_files_iter = cf_to_missing_files_.find(cf_id); assert(missing_files_iter != cf_to_missing_files_.end()); cf_to_missing_files_.erase(missing_files_iter); auto missing_blob_files_high_iter = - cf_to_missing_blob_files_high_.find(edit.column_family_); + cf_to_missing_blob_files_high_.find(cf_id); assert(missing_blob_files_high_iter != cf_to_missing_blob_files_high_.end()); cf_to_missing_blob_files_high_.erase(missing_blob_files_high_iter); } ColumnFamilyData* ret = - version_set_->GetColumnFamilySet()->GetColumnFamily(edit.column_family_); + version_set_->GetColumnFamilySet()->GetColumnFamily(cf_id); assert(ret != nullptr); ret->SetDropped(); ret->UnrefAndTryDelete(); @@ -598,33 +597,33 @@ Status VersionEditHandler::LoadTables(ColumnFamilyData* cfd, Status VersionEditHandler::ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, const VersionEdit& edit) { Status s; - if (edit.has_db_id_) { + if (edit.HasDbId()) { version_set_->db_id_ = edit.GetDbId(); - version_edit_params_.SetDBId(edit.db_id_); + version_edit_params_.SetDBId(edit.GetDbId()); } if (cfd != nullptr) { - if (edit.has_log_number_) { - if (cfd->GetLogNumber() > edit.log_number_) { + if (edit.HasLogNumber()) { + if (cfd->GetLogNumber() > edit.GetLogNumber()) { ROCKS_LOG_WARN( version_set_->db_options()->info_log, "MANIFEST corruption detected, but ignored - Log numbers in " "records NOT monotonically increasing"); } else { - cfd->SetLogNumber(edit.log_number_); - version_edit_params_.SetLogNumber(edit.log_number_); + cfd->SetLogNumber(edit.GetLogNumber()); + version_edit_params_.SetLogNumber(edit.GetLogNumber()); } } - if (edit.has_comparator_) { + if (edit.HasComparatorName()) { bool mark_sst_files_has_no_udt = false; // If `persist_user_defined_timestamps` flag is recorded in manifest, it // is guaranteed to be in the same VersionEdit as comparator. Otherwise, // it's not recorded and it should have default value true. s = ValidateUserDefinedTimestampsOptions( - cfd->user_comparator(), edit.comparator_, + cfd->user_comparator(), edit.GetComparatorName(), cfd->ioptions()->persist_user_defined_timestamps, - edit.persist_user_defined_timestamps_, &mark_sst_files_has_no_udt); + edit.GetPersistUserDefinedTimestamps(), &mark_sst_files_has_no_udt); if (!s.ok() && cf_to_cmp_names_) { - cf_to_cmp_names_->emplace(cfd->GetID(), edit.comparator_); + cf_to_cmp_names_->emplace(cfd->GetID(), edit.GetComparatorName()); } if (mark_sst_files_has_no_udt) { cfds_to_mark_no_udt_.insert(cfd->GetID()); @@ -637,29 +636,29 @@ Status VersionEditHandler::ExtractInfoFromVersionEdit(ColumnFamilyData* cfd, } if (s.ok()) { - if (edit.has_prev_log_number_) { - version_edit_params_.SetPrevLogNumber(edit.prev_log_number_); + if (edit.HasPrevLogNumber()) { + version_edit_params_.SetPrevLogNumber(edit.GetPrevLogNumber()); } - if (edit.has_next_file_number_) { - version_edit_params_.SetNextFile(edit.next_file_number_); + if (edit.HasNextFile()) { + version_edit_params_.SetNextFile(edit.GetNextFile()); } - if (edit.has_max_column_family_) { - version_edit_params_.SetMaxColumnFamily(edit.max_column_family_); + if (edit.HasMaxColumnFamily()) { + version_edit_params_.SetMaxColumnFamily(edit.GetMaxColumnFamily()); } - if (edit.has_min_log_number_to_keep_) { - version_edit_params_.min_log_number_to_keep_ = - std::max(version_edit_params_.min_log_number_to_keep_, - edit.min_log_number_to_keep_); + if (edit.HasMinLogNumberToKeep()) { + version_edit_params_.SetMinLogNumberToKeep( + std::max(version_edit_params_.GetMinLogNumberToKeep(), + edit.GetMinLogNumberToKeep())); } - if (edit.has_last_sequence_) { + if (edit.HasLastSequence()) { // `VersionEdit::last_sequence_`s are assumed to be non-decreasing. This // is legacy behavior that cannot change without breaking downgrade // compatibility. - assert(!version_edit_params_.has_last_sequence_ || - version_edit_params_.last_sequence_ <= edit.last_sequence_); - version_edit_params_.SetLastSequence(edit.last_sequence_); + assert(!version_edit_params_.HasLastSequence() || + version_edit_params_.GetLastSequence() <= edit.GetLastSequence()); + version_edit_params_.SetLastSequence(edit.GetLastSequence()); } - if (!version_edit_params_.has_prev_log_number_) { + if (!version_edit_params_.HasPrevLogNumber()) { version_edit_params_.SetPrevLogNumber(0); } } @@ -765,7 +764,7 @@ void VersionEditHandlerPointInTime::CheckIterationResult( ColumnFamilyData* VersionEditHandlerPointInTime::DestroyCfAndCleanup( const VersionEdit& edit) { ColumnFamilyData* cfd = VersionEditHandler::DestroyCfAndCleanup(edit); - auto v_iter = versions_.find(edit.column_family_); + auto v_iter = versions_.find(edit.GetColumnFamily()); if (v_iter != versions_.end()) { delete v_iter->second; versions_.erase(v_iter); @@ -777,7 +776,7 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion( const VersionEdit& edit, ColumnFamilyData* cfd, bool force_create_version) { assert(cfd != nullptr); if (!force_create_version) { - assert(edit.column_family_ == cfd->GetID()); + assert(edit.GetColumnFamily() == cfd->GetID()); } auto missing_files_iter = cf_to_missing_files_.find(cfd->GetID()); assert(missing_files_iter != cf_to_missing_files_.end()); @@ -860,9 +859,9 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion( const bool has_missing_files = !missing_files.empty() || has_missing_blob_files; - bool missing_info = !version_edit_params_.has_log_number_ || - !version_edit_params_.has_next_file_number_ || - !version_edit_params_.has_last_sequence_; + bool missing_info = !version_edit_params_.HasLogNumber() || + !version_edit_params_.HasNextFile() || + !version_edit_params_.HasLastSequence(); // Create version before apply edit. The version will represent the state // before applying the version edit. diff --git a/db/version_edit_handler.h b/db/version_edit_handler.h index dd55a4de9..af0817e4a 100644 --- a/db/version_edit_handler.h +++ b/db/version_edit_handler.h @@ -133,8 +133,8 @@ class VersionEditHandler : public VersionEditHandlerBase { bool HasMissingFiles() const; void GetDbId(std::string* db_id) const { - if (db_id && version_edit_params_.has_db_id_) { - *db_id = version_edit_params_.db_id_; + if (db_id && version_edit_params_.HasDbId()) { + *db_id = version_edit_params_.GetDbId(); } } diff --git a/db/version_set.cc b/db/version_set.cc index 17c430575..b66c9b8cd 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5002,15 +5002,15 @@ struct VersionSet::ManifestWriter { Status AtomicGroupReadBuffer::AddEdit(VersionEdit* edit) { assert(edit); - if (edit->is_in_atomic_group_) { + if (edit->IsInAtomicGroup()) { TEST_SYNC_POINT("AtomicGroupReadBuffer::AddEdit:AtomicGroup"); if (replay_buffer_.empty()) { - replay_buffer_.resize(edit->remaining_entries_ + 1); + replay_buffer_.resize(edit->GetRemainingEntries() + 1); TEST_SYNC_POINT_CALLBACK( "AtomicGroupReadBuffer::AddEdit:FirstInAtomicGroup", edit); } read_edits_in_atomic_group_++; - if (read_edits_in_atomic_group_ + edit->remaining_entries_ != + if (read_edits_in_atomic_group_ + edit->GetRemainingEntries() != static_cast(replay_buffer_.size())) { TEST_SYNC_POINT_CALLBACK( "AtomicGroupReadBuffer::AddEdit:IncorrectAtomicGroupSize", edit); @@ -5209,15 +5209,15 @@ Status VersionSet::ProcessManifestWrites( // don't update, then Recover can report corrupted atomic group because // the `remaining_entries_` do not match. if (!batch_edits.empty()) { - if (batch_edits.back()->is_in_atomic_group_ && - batch_edits.back()->remaining_entries_ > 0) { + if (batch_edits.back()->IsInAtomicGroup() && + batch_edits.back()->GetRemainingEntries() > 0) { assert(group_start < batch_edits.size()); const auto& edit_list = last_writer->edit_list; size_t k = 0; while (k < edit_list.size()) { - if (!edit_list[k]->is_in_atomic_group_) { + if (!edit_list[k]->IsInAtomicGroup()) { break; - } else if (edit_list[k]->remaining_entries_ == 0) { + } else if (edit_list[k]->GetRemainingEntries() == 0) { ++k; break; } @@ -5225,8 +5225,10 @@ Status VersionSet::ProcessManifestWrites( } for (auto i = group_start; i < batch_edits.size(); ++i) { assert(static_cast(k) <= - batch_edits.back()->remaining_entries_); - batch_edits[i]->remaining_entries_ -= static_cast(k); + batch_edits.back()->GetRemainingEntries()); + batch_edits[i]->SetRemainingEntries( + batch_edits[i]->GetRemainingEntries() - + static_cast(k)); } } } @@ -5269,10 +5271,10 @@ Status VersionSet::ProcessManifestWrites( assert(ucmp); std::optional edit_ts_sz = ucmp->timestamp_size(); for (const auto& e : last_writer->edit_list) { - if (e->is_in_atomic_group_) { - if (batch_edits.empty() || !batch_edits.back()->is_in_atomic_group_ || - (batch_edits.back()->is_in_atomic_group_ && - batch_edits.back()->remaining_entries_ == 0)) { + if (e->IsInAtomicGroup()) { + if (batch_edits.empty() || !batch_edits.back()->IsInAtomicGroup() || + (batch_edits.back()->IsInAtomicGroup() && + batch_edits.back()->GetRemainingEntries() == 0)) { group_start = batch_edits.size(); } } else if (group_start != std::numeric_limits::max()) { @@ -5311,7 +5313,7 @@ Status VersionSet::ProcessManifestWrites( // remaining_entries_. size_t k = 0; while (k < batch_edits.size()) { - while (k < batch_edits.size() && !batch_edits[k]->is_in_atomic_group_) { + while (k < batch_edits.size() && !batch_edits[k]->IsInAtomicGroup()) { ++k; } if (k == batch_edits.size()) { @@ -5319,19 +5321,19 @@ Status VersionSet::ProcessManifestWrites( } size_t i = k; while (i < batch_edits.size()) { - if (!batch_edits[i]->is_in_atomic_group_) { + if (!batch_edits[i]->IsInAtomicGroup()) { break; } - assert(i - k + batch_edits[i]->remaining_entries_ == - batch_edits[k]->remaining_entries_); - if (batch_edits[i]->remaining_entries_ == 0) { + assert(i - k + batch_edits[i]->GetRemainingEntries() == + batch_edits[k]->GetRemainingEntries()); + if (batch_edits[i]->GetRemainingEntries() == 0) { ++i; break; } ++i; } - assert(batch_edits[i - 1]->is_in_atomic_group_); - assert(0 == batch_edits[i - 1]->remaining_entries_); + assert(batch_edits[i - 1]->IsInAtomicGroup()); + assert(0 == batch_edits[i - 1]->GetRemainingEntries()); std::vector tmp; for (size_t j = k; j != i; ++j) { tmp.emplace_back(batch_edits[j]); @@ -5514,7 +5516,7 @@ Status VersionSet::ProcessManifestWrites( new_manifest_file_size = descriptor_log_->file()->GetFileSize(); } - if (first_writer.edit_list.front()->is_column_family_drop_) { + if (first_writer.edit_list.front()->IsColumnFamilyDrop()) { TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:0"); TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:1"); TEST_SYNC_POINT("VersionSet::LogAndApply::ColumnFamilyDrop:2"); @@ -5556,13 +5558,13 @@ Status VersionSet::ProcessManifestWrites( // Install the new versions if (s.ok()) { - if (first_writer.edit_list.front()->is_column_family_add_) { + if (first_writer.edit_list.front()->IsColumnFamilyAdd()) { assert(batch_edits.size() == 1); assert(new_cf_options != nullptr); assert(max_last_sequence == descriptor_last_sequence_); CreateColumnFamily(*new_cf_options, read_options, first_writer.edit_list.front()); - } else if (first_writer.edit_list.front()->is_column_family_drop_) { + } else if (first_writer.edit_list.front()->IsColumnFamilyDrop()) { assert(batch_edits.size() == 1); assert(max_last_sequence == descriptor_last_sequence_); first_writer.cfd->SetDropped(); @@ -5575,22 +5577,22 @@ Status VersionSet::ProcessManifestWrites( for (const auto& e : batch_edits) { ColumnFamilyData* cfd = nullptr; if (!e->IsColumnFamilyManipulation()) { - cfd = column_family_set_->GetColumnFamily(e->column_family_); + cfd = column_family_set_->GetColumnFamily(e->GetColumnFamily()); // e would not have been added to batch_edits if its corresponding // column family is dropped. assert(cfd); } if (cfd) { - if (e->has_log_number_ && e->log_number_ > cfd->GetLogNumber()) { - cfd->SetLogNumber(e->log_number_); + if (e->HasLogNumber() && e->GetLogNumber() > cfd->GetLogNumber()) { + cfd->SetLogNumber(e->GetLogNumber()); } if (e->HasFullHistoryTsLow()) { cfd->SetFullHistoryTsLow(e->GetFullHistoryTsLow()); } } - if (e->has_min_log_number_to_keep_) { + if (e->HasMinLogNumberToKeep()) { last_min_log_number_to_keep = - std::max(last_min_log_number_to_keep, e->min_log_number_to_keep_); + std::max(last_min_log_number_to_keep, e->GetMinLogNumberToKeep()); } } @@ -5607,7 +5609,7 @@ Status VersionSet::ProcessManifestWrites( descriptor_last_sequence_ = max_last_sequence; manifest_file_number_ = pending_manifest_file_number_; manifest_file_size_ = new_manifest_file_size; - prev_log_number_ = first_writer.edit_list.front()->prev_log_number_; + prev_log_number_ = first_writer.edit_list.front()->GetPrevLogNumber(); } else { std::string version_edits; for (auto& e : batch_edits) { @@ -5755,7 +5757,7 @@ Status VersionSet::LogAndApply( int num_cfds = static_cast(column_family_datas.size()); if (num_cfds == 1 && column_family_datas[0] == nullptr) { assert(edit_lists.size() == 1 && edit_lists[0].size() == 1); - assert(edit_lists[0][0]->is_column_family_add_); + assert(edit_lists[0][0]->IsColumnFamilyAdd()); assert(new_cf_options != nullptr); } std::deque writers; @@ -5819,7 +5821,7 @@ void VersionSet::LogAndApplyCFHelper(VersionEdit* edit, edit->SetNextFile(next_file_number_.load()); assert(!edit->HasLastSequence()); edit->SetLastSequence(*max_last_sequence); - if (edit->is_column_family_drop_) { + if (edit->IsColumnFamilyDrop()) { // if we drop column family, we have to make sure to save max column family, // so that we don't reuse existing ID edit->SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily()); @@ -5837,12 +5839,12 @@ Status VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd, assert(!edit->IsColumnFamilyManipulation()); assert(max_last_sequence != nullptr); - if (edit->has_log_number_) { - assert(edit->log_number_ >= cfd->GetLogNumber()); - assert(edit->log_number_ < next_file_number_.load()); + if (edit->HasLogNumber()) { + assert(edit->GetLogNumber() >= cfd->GetLogNumber()); + assert(edit->GetLogNumber() < next_file_number_.load()); } - if (!edit->has_prev_log_number_) { + if (!edit->HasPrevLogNumber()) { edit->SetPrevLogNumber(prev_log_number_); } edit->SetNextFile(next_file_number_.load()); @@ -5934,7 +5936,7 @@ Status VersionSet::Recover( handler.Iterate(reader, &log_read_status); s = handler.status(); if (s.ok()) { - log_number = handler.GetVersionEditParams().log_number_; + log_number = handler.GetVersionEditParams().GetLogNumber(); current_manifest_file_size = reader.GetReadOffset(); assert(current_manifest_file_size != 0); handler.GetDbId(db_id); @@ -7105,7 +7107,7 @@ uint64_t VersionSet::GetObsoleteSstFilesSize() const { ColumnFamilyData* VersionSet::CreateColumnFamily( const ColumnFamilyOptions& cf_options, const ReadOptions& read_options, const VersionEdit* edit) { - assert(edit->is_column_family_add_); + assert(edit->IsColumnFamilyAdd()); MutableCFOptions dummy_cf_options; Version* dummy_versions = @@ -7114,7 +7116,7 @@ ColumnFamilyData* VersionSet::CreateColumnFamily( // by avoiding calling "delete" explicitly (~Version is private) dummy_versions->Ref(); auto new_cfd = column_family_set_->CreateColumnFamily( - edit->column_family_name_, edit->column_family_, dummy_versions, + edit->GetColumnFamilyName(), edit->GetColumnFamily(), dummy_versions, cf_options); Version* v = new Version(new_cfd, this, file_options_, @@ -7131,7 +7133,7 @@ ColumnFamilyData* VersionSet::CreateColumnFamily( // cfd is not available to client new_cfd->CreateNewMemtable(*new_cfd->GetLatestMutableCFOptions(), LastSequence()); - new_cfd->SetLogNumber(edit->log_number_); + new_cfd->SetLogNumber(edit->GetLogNumber()); return new_cfd; } From 8e1adab5cecad129131a4eceabe645b9442acb9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Thu, 2 Nov 2023 08:00:08 -0700 Subject: [PATCH 248/386] add RocksDB#clipColumnFamily to Java API (#11868) Summary: ### main change: - add java clipColumnFamily api in Rocksdb.java The method signature of the new API is ``` public void clipColumnFamily(final ColumnFamilyHandle columnFamilyHandle, final byte[] beginKey, final byte[] endKey) ``` ### Test add unit test RocksDBTest#clipColumnFamily() Pull Request resolved: https://github.com/facebook/rocksdb/pull/11868 Reviewed By: jaykorean Differential Revision: D50889783 Pulled By: cbi42 fbshipit-source-id: 7f545171ad9adb9c20bdd92efae2e6bc55d5703f --- java/rocksjni/rocksjni.cc | 55 +++++++++++++++++++ java/src/main/java/org/rocksdb/RocksDB.java | 23 ++++++++ .../test/java/org/rocksdb/RocksDBTest.java | 22 ++++++++ .../add_clip_column_family_to_java_api.md | 1 + 4 files changed, 101 insertions(+) create mode 100644 unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 920b443b9..12358ee9b 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -1166,6 +1166,61 @@ void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BIIJ( } } +/* + * Class: org_rocksdb_RocksDB + * Method: clipColumnFamily + * Signature: (JJ[BII[BII)V + */ +void Java_org_rocksdb_RocksDB_clipColumnFamily( + JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, + jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, + jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { + auto* db = reinterpret_cast(jdb_handle); + auto* cf_handle = + reinterpret_cast(jcf_handle); + if (cf_handle != nullptr) { + jbyte* begin_key = new jbyte[jbegin_key_len]; + env->GetByteArrayRegion(jbegin_key, jbegin_key_off, jbegin_key_len, + begin_key); + if (env->ExceptionCheck()) { + // exception thrown: ArrayIndexOutOfBoundsException + delete[] begin_key; + return; + } + ROCKSDB_NAMESPACE::Slice begin_key_slice(reinterpret_cast(begin_key), + jbegin_key_len); + + jbyte* end_key = new jbyte[jend_key_len]; + env->GetByteArrayRegion(jend_key, jend_key_off, jend_key_len, end_key); + if (env->ExceptionCheck()) { + // exception thrown: ArrayIndexOutOfBoundsException + delete[] begin_key; + delete[] end_key; + return; + } + ROCKSDB_NAMESPACE::Slice end_key_slice(reinterpret_cast(end_key), + jend_key_len); + + ROCKSDB_NAMESPACE::Status s = + db->ClipColumnFamily(cf_handle, begin_key_slice, end_key_slice); + + // cleanup + delete[] begin_key; + delete[] end_key; + + if (s.ok()) { + return; + } + + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); + return; + } else { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( + env, ROCKSDB_NAMESPACE::Status::InvalidArgument( + "Invalid ColumnFamilyHandle.")); + } +} + /* * Class: org_rocksdb_RocksDB * Method: getDirect diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 453c4b033..303697139 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -3687,6 +3687,26 @@ public void compactRange( columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_); } + /** + * ClipColumnFamily() will clip the entries in the CF according to the range + * [begin_key, end_key). Returns OK on success, and a non-OK status on error. + * Any entries outside this range will be completely deleted (including + * tombstones). + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param beginKey First key to clip within database (inclusive) + * @param endKey Last key to clip within database (exclusive) + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public void clipColumnFamily(final ColumnFamilyHandle columnFamilyHandle, final byte[] beginKey, + final byte[] endKey) throws RocksDBException { + clipColumnFamily(nativeHandle_, + columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, beginKey, 0, + beginKey.length, endKey, 0, endKey.length); + } + /** * Change the options for the column family handle. * @@ -4751,6 +4771,9 @@ private native void deleteRange( final int beginKeyOffset, final int beginKeyLength, final byte[] endKey, final int endKeyOffset, final int endKeyLength, final long cfHandle) throws RocksDBException; + private native void clipColumnFamily(final long handle, final long cfHandle, + final byte[] beginKey, final int beginKeyOffset, final int beginKeyLength, + final byte[] endKey, final int endKeyOffset, final int endKeyLength) throws RocksDBException; private native void merge(final long handle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index fa1d1bd05..ed6e989a8 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -546,6 +546,28 @@ public void deleteRange() throws RocksDBException { } } + @Test + public void clipColumnFamily() throws RocksDBException { + try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath())) { + db.put("key1".getBytes(), "value".getBytes()); + db.put("key2".getBytes(), "12345678".getBytes()); + db.put("key3".getBytes(), "abcdefg".getBytes()); + db.put("key4".getBytes(), "xyz".getBytes()); + db.put("key5".getBytes(), "qwer".getBytes()); + assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); + assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); + assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); + assertThat(db.get("key4".getBytes())).isEqualTo("xyz".getBytes()); + assertThat(db.get("key5".getBytes())).isEqualTo("qwer".getBytes()); + db.clipColumnFamily(db.getDefaultColumnFamily(), "key2".getBytes(), "key4".getBytes()); + assertThat(db.get("key1".getBytes())).isNull(); + assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); + assertThat(db.get("key3".getBytes())).isEqualTo("abcdefg".getBytes()); + assertThat(db.get("key4".getBytes())).isNull(); + assertThat(db.get("key5".getBytes())).isNull(); + } + } + @Test public void getIntProperty() throws RocksDBException { try ( diff --git a/unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md b/unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md new file mode 100644 index 000000000..3da9e99a5 --- /dev/null +++ b/unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md @@ -0,0 +1 @@ +Added clipColumnFamily() to the Java API to clip the entries in the CF according to the range [begin_key, end_key). \ No newline at end of file From a42910537dfa0d91f09194039d45cf9fafcc4cba Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 2 Nov 2023 13:27:59 -0700 Subject: [PATCH 249/386] Save the correct user comparator name in OPTIONS file (#12037) Summary: I noticed the user comparator name in OPTIONS file can be incorrect when working on a recent stress test failure. The name of the comparator retrieved via the "Comparator::GetRootComparator" API is saved in OPTIONS file as the user comparator. The intention was to get the user comparator wrapped in the internal comparator. However `ImmutableCFOptions.user_comparator` has always been a user comparator of type `Comparator`. The corresponding `GetRootComparator` API is also defined only for user comparator type `Comparator`, not the internal key comparator type `InternalKeyComparator`. For built in comparator `BytewiseComparator` and `ReverseBytewiseComparator`, there is no difference between `Comparator::Name` and `Comparator::GetRootComparator::Name` because these built in comparators' root comparator is themselves. However, for built in comparator `BytewiseComparatorWithU64Ts` and `ReverseBytewiseComparatorWithU64Ts`, there are differences. So this change update the logic to persist the user comparator's name, not its root comparator's name. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12037 Test Plan: The restore flow in stress test, which relies on converting Options object to string and back to Options object is updated to help validate comparator object can be correctly serialized and deserialized with the OPTIONS file mechanism Updated unit test to use a comparator that has a root comparator that is not itself. Reviewed By: cbi42 Differential Revision: D50909750 Pulled By: jowlyzhang fbshipit-source-id: 9086d7135c7a6f4b5565fb47fce194ea0a024f52 --- db_stress_tool/db_stress_test_base.cc | 8 +++++--- options/cf_options.cc | 9 +-------- options/options_test.cc | 2 ++ 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 14814d51e..e790842e5 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1945,11 +1945,13 @@ Status StressTest::PrepareOptionsForRestoredDB(Options* options) { options->listeners.clear(); // Avoid dangling/shared file descriptors, for reliable destroy options->sst_file_manager = nullptr; - // GetColumnFamilyOptionsFromString does not create customized merge operator, - // and comparator. + // GetColumnFamilyOptionsFromString does not create customized merge operator. InitializeMergeOperator(*options); if (FLAGS_user_timestamp_size > 0) { - CheckAndSetOptionsForUserTimestamp(*options); + // Check OPTIONS string loading can bootstrap the correct user comparator + // from object registry. + assert(options->comparator); + assert(options->comparator == test::BytewiseComparatorWithU64TsWrapper()); } return Status::OK(); diff --git a/options/cf_options.cc b/options/cf_options.cc index 26d124783..f026b0bc9 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -658,19 +658,12 @@ static std::unordered_map // it's a const pointer of const Comparator* const auto* ptr = static_cast(addr); - // Since the user-specified comparator will be wrapped by - // InternalKeyComparator, we should persist the - // user-specified one instead of InternalKeyComparator. if (*ptr == nullptr) { *value = kNullptrString; } else if (opts.mutable_options_only) { *value = ""; } else { - const Comparator* root_comp = (*ptr)->GetRootComparator(); - if (root_comp == nullptr) { - root_comp = (*ptr); - } - *value = root_comp->ToString(opts); + *value = (*ptr)->ToString(opts); } return Status::OK(); })}, diff --git a/options/options_test.cc b/options/options_test.cc index a05ed0c8c..a70d479dd 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -1588,6 +1588,7 @@ TEST_F(OptionsTest, GetMutableCFOptions) { TEST_F(OptionsTest, ColumnFamilyOptionsSerialization) { Options options; ColumnFamilyOptions base_opt, new_opt; + base_opt.comparator = test::BytewiseComparatorWithU64TsWrapper(); Random rnd(302); ConfigOptions config_options; config_options.input_strings_escaped = false; @@ -1608,6 +1609,7 @@ TEST_F(OptionsTest, ColumnFamilyOptionsSerialization) { base_options_file_content, &new_opt)); ASSERT_OK( RocksDBOptionsParser::VerifyCFOptions(config_options, base_opt, new_opt)); + ASSERT_EQ(base_opt.comparator, new_opt.comparator); if (base_opt.compaction_filter) { delete base_opt.compaction_filter; } From 2648e0a747303e63796315049b9005c7320356c0 Mon Sep 17 00:00:00 2001 From: 914022466 <914022466@qq.com> Date: Thu, 2 Nov 2023 13:45:37 -0700 Subject: [PATCH 250/386] Fix a bug when ingest plaintable sst file (#11969) Summary: Plaintable doesn't support SeekToLast. And GetIngestedFileInfo is using SeekToLast without checking the validity. We are using IngestExternalFile or CreateColumnFamilyWithImport with some sst file in PlainTable format . But after running for a while, compaction error often happens. Such as ![image](https://github.com/facebook/rocksdb/assets/13954644/b4fa49fc-73fc-49ce-96c6-f198a30800b8) I simply add some std::cerr log to find why. ![image](https://github.com/facebook/rocksdb/assets/13954644/2cf1d5ff-48cc-4125-b917-87090f764fcd) It shows that the smallest key is always equal to largest key. ![image](https://github.com/facebook/rocksdb/assets/13954644/6d43e978-0be0-4306-aae3-f9e4ae366395) Then I found the root cause is that PlainTable do not support SeekToLast, so the smallest key is always the same with the largest I try to write an unit test. But it's not easy to reproduce this error. (This PR is similar to https://github.com/facebook/rocksdb/pull/11266. Sorry for open another PR) Pull Request resolved: https://github.com/facebook/rocksdb/pull/11969 Reviewed By: ajkr Differential Revision: D50933854 Pulled By: cbi42 fbshipit-source-id: 6c6af53c1388922cbabbe64ed3be1cdc58df5431 --- db/external_sst_file_ingestion_job.cc | 29 +++++++++++++++++++++++++-- db/import_column_family_job.cc | 28 ++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 1e461b3a7..4f49c6bfe 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -792,8 +792,33 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( } file_to_ingest->smallest_internal_key.SetFrom(key); - iter->SeekToLast(); - pik_status = ParseInternalKey(iter->key(), &key, allow_data_in_errors); + Slice largest; + if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) { + // PlainTable iterator does not support SeekToLast(). + largest = iter->key(); + for (; iter->Valid(); iter->Next()) { + if (cfd_->internal_comparator().Compare(iter->key(), largest) > 0) { + largest = iter->key(); + } + } + if (!iter->status().ok()) { + return iter->status(); + } + } else { + iter->SeekToLast(); + if (!iter->Valid()) { + if (iter->status().ok()) { + // The file contains at least 1 key since iter is valid after + // SeekToFirst(). + return Status::Corruption("Can not find largest key in sst file"); + } else { + return iter->status(); + } + } + largest = iter->key(); + } + + pik_status = ParseInternalKey(largest, &key, allow_data_in_errors); if (!pik_status.ok()) { return Status::Corruption("Corrupted key in external file. ", pik_status.getState()); diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 3ad8b7b61..f8eec0502 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -366,8 +366,32 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( bool bound_set = false; if (iter->Valid()) { file_to_import->smallest_internal_key.DecodeFrom(iter->key()); - iter->SeekToLast(); - file_to_import->largest_internal_key.DecodeFrom(iter->key()); + Slice largest; + if (strcmp(cfd_->ioptions()->table_factory->Name(), "PlainTable") == 0) { + // PlainTable iterator does not support SeekToLast(). + largest = iter->key(); + for (; iter->Valid(); iter->Next()) { + if (cfd_->internal_comparator().Compare(iter->key(), largest) > 0) { + largest = iter->key(); + } + } + if (!iter->status().ok()) { + return iter->status(); + } + } else { + iter->SeekToLast(); + if (!iter->Valid()) { + if (iter->status().ok()) { + // The file contains at least 1 key since iter is valid after + // SeekToFirst(). + return Status::Corruption("Can not find largest key in sst file"); + } else { + return iter->status(); + } + } + largest = iter->key(); + } + file_to_import->largest_internal_key.DecodeFrom(largest); bound_set = true; } From 8505b26db19871a8c8782a35a7b5be9d321d45e0 Mon Sep 17 00:00:00 2001 From: Changyu Bi <102700264+cbi42@users.noreply.github.com> Date: Fri, 3 Nov 2023 09:53:22 -0700 Subject: [PATCH 251/386] Fix stress test error message for black/whitebox test to catch failures (#12039) Summary: black/whitebox crash test relies on error/fail keyword in stderr to catch stress test failure. If a db_stress run prints an error message without these keyword, and then is killed before it graceful exits and prints out "Verification failed" here (https://github.com/facebook/rocksdb/blob/2648e0a747303e63796315049b9005c7320356c0/db_stress_tool/db_stress_driver.cc#L256), the error won't be caught. This is more likely to happen if db_stress is printing a stack trace. This PR fixes some error messages. Ideally in the future we should not rely on searching for keywords in stderr to determine failed stress tests. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12039 Test Plan: ``` Added the following change on top of this PR to simulate exit without relevant keyword: @@ -1586,6 +1587,8 @@ class NonBatchedOpsStressTest : public StressTest { assert(thread); assert(!rand_column_families.empty()); assert(!rand_keys.empty()); + fprintf(stderr, "Inconsistency"); + thread->shared->SafeTerminate(); python3 ./tools/db_crashtest.py blackbox --simple --verify_iterator_with_expected_state_one_in=1 --interval=10 will print a stack trace but continue to run db_stress. ``` Reviewed By: jaykorean Differential Revision: D50960076 Pulled By: cbi42 fbshipit-source-id: 5c60a1be04ce4a43adbd33f040d54434f2ae24c9 --- db_stress_tool/db_stress_test_base.cc | 17 ++++++----- db_stress_tool/no_batched_ops_stress.cc | 40 +++++++++++++++---------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index e790842e5..20077558f 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1485,11 +1485,11 @@ void StressTest::VerifyIterator(ThreadState* thread, } if (op == kLastOpSeekToFirst && ro.iterate_lower_bound != nullptr) { - // SeekToFirst() with lower bound is not well defined. + // SeekToFirst() with lower bound is not well-defined. *diverged = true; return; } else if (op == kLastOpSeekToLast && ro.iterate_upper_bound != nullptr) { - // SeekToLast() with higher bound is not well defined. + // SeekToLast() with higher bound is not well-defined. *diverged = true; return; } else if (op == kLastOpSeek && ro.iterate_lower_bound != nullptr && @@ -1500,7 +1500,7 @@ void StressTest::VerifyIterator(ThreadState* thread, options_.comparator->CompareWithoutTimestamp( *ro.iterate_lower_bound, /*a_has_ts=*/false, *ro.iterate_upper_bound, /*b_has_ts*/ false) >= 0))) { - // Lower bound behavior is not well defined if it is larger than + // Lower bound behavior is not well-defined if it is larger than // seek key or upper bound. Disable the check for now. *diverged = true; return; @@ -1512,7 +1512,7 @@ void StressTest::VerifyIterator(ThreadState* thread, options_.comparator->CompareWithoutTimestamp( *ro.iterate_lower_bound, /*a_has_ts=*/false, *ro.iterate_upper_bound, /*b_has_ts=*/false) >= 0))) { - // Uppder bound behavior is not well defined if it is smaller than + // Upper bound behavior is not well-defined if it is smaller than // seek key or lower bound. Disable the check for now. *diverged = true; return; @@ -1540,13 +1540,13 @@ void StressTest::VerifyIterator(ThreadState* thread, } } fprintf(stderr, - "Control interator is invalid but iterator has key %s " + "Control iterator is invalid but iterator has key %s " "%s\n", iter->key().ToString(true).c_str(), op_logs.c_str()); *diverged = true; } else if (cmp_iter->Valid()) { - // Iterator is not valid. It can be legimate if it has already been + // Iterator is not valid. It can be legitimate if it has already been // out of upper or lower bound, or filtered out by prefix iterator. const Slice& total_order_key = cmp_iter->key(); @@ -1569,7 +1569,7 @@ void StressTest::VerifyIterator(ThreadState* thread, return; } fprintf(stderr, - "Iterator stays in prefix but contol doesn't" + "Iterator stays in prefix but control doesn't" " iterator key %s control iterator key %s %s\n", iter->key().ToString(true).c_str(), cmp_iter->key().ToString(true).c_str(), op_logs.c_str()); @@ -1616,7 +1616,8 @@ void StressTest::VerifyIterator(ThreadState* thread, } if (*diverged) { - fprintf(stderr, "Control CF %s\n", cmp_cfh->GetName().c_str()); + fprintf(stderr, "VerifyIterator failed. Control CF %s\n", + cmp_cfh->GetName().c_str()); thread->stats.AddErrors(1); // Fail fast to preserve the DB state. thread->shared->SetVerificationFailure(); diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 302903939..5513fe31e 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -612,7 +612,7 @@ class NonBatchedOpsStressTest : public StressTest { } Status s = NewTxn(wo, &txn); if (!s.ok()) { - fprintf(stderr, "NewTxn: %s\n", s.ToString().c_str()); + fprintf(stderr, "NewTxn error: %s\n", s.ToString().c_str()); thread->shared->SafeTerminate(); } } @@ -662,7 +662,8 @@ class NonBatchedOpsStressTest : public StressTest { assert(false); } if (!s.ok()) { - fprintf(stderr, "Transaction put: %s\n", s.ToString().c_str()); + fprintf(stderr, "Transaction put error: %s\n", + s.ToString().c_str()); thread->shared->SafeTerminate(); } } else { @@ -1696,11 +1697,14 @@ class NonBatchedOpsStressTest : public StressTest { thread->shared->SetVerificationFailure(); if (iter->Valid()) { fprintf(stderr, - "Expected state has key %s, iterator is at key %s\n", + "Verification failed. Expected state has key %s, iterator " + "is at key %s\n", Slice(Key(j)).ToString(true).c_str(), iter->key().ToString(true).c_str()); } else { - fprintf(stderr, "Expected state has key %s, iterator is invalid\n", + fprintf(stderr, + "Verification failed. Expected state has key %s, iterator " + "is invalid\n", Slice(Key(j)).ToString(true).c_str()); } fprintf(stderr, "Column family: %s, op_logs: %s\n", @@ -1750,7 +1754,8 @@ class NonBatchedOpsStressTest : public StressTest { if (static_cast(curr) <= last_key) { thread->shared->SetVerificationFailure(); fprintf(stderr, - "TestIterateAgainstExpected found unexpectedly small key\n"); + "TestIterateAgainstExpected failed: found unexpectedly small " + "key\n"); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); fprintf(stderr, "Last op found key: %s, expected at least: %s\n", @@ -1807,7 +1812,8 @@ class NonBatchedOpsStressTest : public StressTest { if (last_key <= static_cast(curr)) { thread->shared->SetVerificationFailure(); fprintf(stderr, - "TestIterateAgainstExpected found unexpectedly large key\n"); + "TestIterateAgainstExpected failed: found unexpectedly large " + "key\n"); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); fprintf(stderr, "Last op found key: %s, expected at most: %s\n", @@ -1869,7 +1875,8 @@ class NonBatchedOpsStressTest : public StressTest { if (static_cast(curr) < mid) { thread->shared->SetVerificationFailure(); fprintf(stderr, - "TestIterateAgainstExpected found unexpectedly small key\n"); + "TestIterateAgainstExpected failed: found unexpectedly small " + "key\n"); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); fprintf(stderr, "Last op found key: %s, expected at least: %s\n", @@ -1892,7 +1899,8 @@ class NonBatchedOpsStressTest : public StressTest { if (mid < static_cast(curr)) { thread->shared->SetVerificationFailure(); fprintf(stderr, - "TestIterateAgainstExpected found unexpectedly large key\n"); + "TestIterateAgainstExpected failed: found unexpectedly large " + "key\n"); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); fprintf(stderr, "Last op found key: %s, expected at most: %s\n", @@ -1932,7 +1940,9 @@ class NonBatchedOpsStressTest : public StressTest { post_read_expected_value)) { // Fail fast to preserve the DB state. thread->shared->SetVerificationFailure(); - fprintf(stderr, "Iterator has key %s, but expected state does not.\n", + fprintf(stderr, + "Verification failed: iterator has key %s, but expected " + "state does not.\n", iter->key().ToString(true).c_str()); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); @@ -1950,9 +1960,9 @@ class NonBatchedOpsStressTest : public StressTest { GetIntVal(iter->key().ToString(), &next); if (next <= curr) { thread->shared->SetVerificationFailure(); - fprintf( - stderr, - "TestIterateAgainstExpected found unexpectedly small key\n"); + fprintf(stderr, + "TestIterateAgainstExpected failed: found unexpectedly " + "small key\n"); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); fprintf(stderr, "Last op found key: %s, expected at least: %s\n", @@ -1975,9 +1985,9 @@ class NonBatchedOpsStressTest : public StressTest { GetIntVal(iter->key().ToString(), &prev); if (curr <= prev) { thread->shared->SetVerificationFailure(); - fprintf( - stderr, - "TestIterateAgainstExpected found unexpectedly large key\n"); + fprintf(stderr, + "TestIterateAgainstExpected failed: found unexpectedly " + "large key\n"); fprintf(stderr, "Column family: %s, op_logs: %s\n", cfh->GetName().c_str(), op_logs.c_str()); fprintf(stderr, "Last op found key: %s, expected at most: %s\n", From b48480cfd05b6c4cc7eb50e4af55afdae37ca171 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 3 Nov 2023 16:27:11 -0700 Subject: [PATCH 252/386] Enable `TestIterateAgainstExpected()` in more crash tests (#12040) Summary: db_stress flag `verify_iterator_with_expected_state_one_in` is only enabled for in crash test if --simple flag is set. This PR enables it for all supported crash tests by enabling it by default. This adds coverage for --txn and --enable_ts crash tests. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12040 Test Plan: ran crash tests that disabled this flag before for a few hours ``` python3 ./tools/db_crashtest.py blackbox --verify_iterator_with_expected_state_one_in=1 --txn --txn_write_policy=[0,1,2] python3 ./tools/db_crashtest.py blackbox --verify_iterator_with_expected_state_one_in=1 --enable_ts ``` Reviewed By: ajkr, hx235 Differential Revision: D50980001 Pulled By: cbi42 fbshipit-source-id: 3daf6b4c32bdddc5df057240068162aa1a907587 --- db_stress_tool/no_batched_ops_stress.cc | 8 ++++++-- tools/db_crashtest.py | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 5513fe31e..27a20fd5a 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -1836,7 +1836,9 @@ class NonBatchedOpsStressTest : public StressTest { op_logs += "P"; } - if (thread->rand.OneIn(2)) { + // Write-prepared and Write-unprepared do not support Refresh() yet. + if (!(FLAGS_use_txn && FLAGS_txn_write_policy != 0) && + thread->rand.OneIn(2)) { pre_read_expected_values.clear(); post_read_expected_values.clear(); // Refresh after forward/backward scan to allow higher chance of SV @@ -1845,7 +1847,9 @@ class NonBatchedOpsStressTest : public StressTest { pre_read_expected_values.push_back( shared->Get(rand_column_family, i + lb)); } - iter->Refresh(); + Status rs = iter->Refresh(); + assert(rs.ok()); + op_logs += "Refresh "; for (int64_t i = 0; i < static_cast(expected_values_size); ++i) { post_read_expected_values.push_back( shared->Get(rand_column_family, i + lb)); diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index c09bc4d65..a10ad14fa 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -224,6 +224,7 @@ [0, 0, 0, 600, 3600, 86400] ), "auto_readahead_size" : lambda: random.choice([0, 1]), + "verify_iterator_with_expected_state_one_in": 5, } _TEST_DIR_ENV_VAR = "TEST_TMPDIR" @@ -364,7 +365,6 @@ def is_direct_io_supported(dbname): "write_buffer_size": 32 * 1024 * 1024, "level_compaction_dynamic_level_bytes": lambda: random.randint(0, 1), "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), - "verify_iterator_with_expected_state_one_in": 5, } blackbox_simple_default_params = { @@ -387,6 +387,8 @@ def is_direct_io_supported(dbname): "enable_compaction_filter": 0, # `CfConsistencyStressTest::TestIngestExternalFile()` is not implemented. "ingest_external_file_one_in": 0, + # `CfConsistencyStressTest::TestIterateAgainstExpected()` is not implemented. + "verify_iterator_with_expected_state_one_in": 0, } # For pessimistic transaction db @@ -522,6 +524,8 @@ def is_direct_io_supported(dbname): "use_put_entity_one_in": 0, "use_get_entity": 0, "use_multi_get_entity": 0, + # `MultiOpsTxnsStressTest::TestIterateAgainstExpected()` is not implemented. + "verify_iterator_with_expected_state_one_in": 0, } multiops_wc_txn_params = { From 19768a923a814a7510423b57329c50587362541e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Mon, 6 Nov 2023 07:38:42 -0800 Subject: [PATCH 253/386] Add jni Support for API CreateColumnFamilyWithImport (#11646) Summary: - Add the following missing options to src/main/java/org/rocksdb/ImportColumnFamilyOptions.java and in java/rocksjni/import_column_family_options.cc in RocksJava. - Add the struct to src/main/java/org/rocksdb/ExportImportFilesMetaData.java and in java/rocksjni/export_import_files_metadatajni.cc in RocksJava. - Add New Java API `createColumnFamilyWithImport` to src/main/java/org/rocksdb/RocksDB.java - Add New Java API `exportColumnFamily` to src/main/java/org/rocksdb/Checkpoint.java Pull Request resolved: https://github.com/facebook/rocksdb/pull/11646 Test Plan: - added unit tests for exportColumnFamily in org.rocksdb.CheckpointTest - added unit tests for createColumnFamilyWithImport to org.rocksdb.ImportColumnFamilyTest Reviewed By: ajkr Differential Revision: D50889700 Pulled By: cbi42 fbshipit-source-id: d623b35e445bba62a0d3c007d74352e937678f6c --- java/CMakeLists.txt | 4 + java/Makefile | 1 + java/rocksjni/checkpoint.cc | 35 +++++++ .../export_import_files_metadatajni.cc | 22 +++++ java/rocksjni/import_column_family_options.cc | 59 +++++++++++ java/rocksjni/rocksjni.cc | 57 +++++++++++ .../src/main/java/org/rocksdb/Checkpoint.java | 9 ++ .../rocksdb/ExportImportFilesMetaData.java | 18 ++++ .../rocksdb/ImportColumnFamilyOptions.java | 44 +++++++++ .../java/org/rocksdb/LiveFileMetaData.java | 14 +++ java/src/main/java/org/rocksdb/RocksDB.java | 44 +++++++++ .../test/java/org/rocksdb/CheckPointTest.java | 18 ++++ .../org/rocksdb/ImportColumnFamilyTest.java | 98 +++++++++++++++++++ src.mk | 2 + 14 files changed, 425 insertions(+) create mode 100644 java/rocksjni/export_import_files_metadatajni.cc create mode 100644 java/rocksjni/import_column_family_options.cc create mode 100644 java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java create mode 100644 java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java create mode 100644 java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 97a22eb41..ee158016b 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -40,7 +40,9 @@ set(JNI_NATIVE_SOURCES rocksjni/env_options.cc rocksjni/event_listener.cc rocksjni/event_listener_jnicallback.cc + rocksjni/export_import_files_metadatajni.cc rocksjni/filter.cc + rocksjni/import_column_family_options.cc rocksjni/ingest_external_file_options.cc rocksjni/iterator.cc rocksjni/jnicallback.cc @@ -157,6 +159,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/EnvOptions.java src/main/java/org/rocksdb/EventListener.java src/main/java/org/rocksdb/Experimental.java + src/main/java/org/rocksdb/ExportImportFilesMetaData.java src/main/java/org/rocksdb/ExternalFileIngestionInfo.java src/main/java/org/rocksdb/Filter.java src/main/java/org/rocksdb/FilterPolicyType.java @@ -169,6 +172,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/HistogramData.java src/main/java/org/rocksdb/HistogramType.java src/main/java/org/rocksdb/Holder.java + src/main/java/org/rocksdb/ImportColumnFamilyOptions.java src/main/java/org/rocksdb/IndexShorteningMode.java src/main/java/org/rocksdb/IndexType.java src/main/java/org/rocksdb/InfoLogLevel.java diff --git a/java/Makefile b/java/Makefile index 100a2c66f..e71589e9e 100644 --- a/java/Makefile +++ b/java/Makefile @@ -141,6 +141,7 @@ JAVA_TESTS = \ org.rocksdb.util.JNIComparatorTest\ org.rocksdb.FilterTest\ org.rocksdb.FlushTest\ + org.rocksdb.ImportColumnFamilyTest\ org.rocksdb.InfoLogLevelTest\ org.rocksdb.KeyExistsTest \ org.rocksdb.KeyMayExistTest\ diff --git a/java/rocksjni/checkpoint.cc b/java/rocksjni/checkpoint.cc index d7cfd813b..cef5f3ca8 100644 --- a/java/rocksjni/checkpoint.cc +++ b/java/rocksjni/checkpoint.cc @@ -69,3 +69,38 @@ void Java_org_rocksdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } } + +/* + * Class: org_rocksdb_Checkpoint + * Method: exportColumnFamily + * Signature: (JJLjava/lang/String;)Lorg/rocksdb/ExportImportFilesMetaData; + */ +jlong Java_org_rocksdb_Checkpoint_exportColumnFamily( + JNIEnv* env, jobject /*jobj*/, jlong jcheckpoint_handle, + jlong jcolumn_family_handle, jstring jexport_path) { + const char* export_path = env->GetStringUTFChars(jexport_path, 0); + if (export_path == nullptr) { + // exception thrown: OutOfMemoryError + return 0; + } + + auto* checkpoint = + reinterpret_cast(jcheckpoint_handle); + + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + + ROCKSDB_NAMESPACE::ExportImportFilesMetaData* metadata = nullptr; + + ROCKSDB_NAMESPACE::Status s = checkpoint->ExportColumnFamily( + column_family_handle, export_path, &metadata); + + env->ReleaseStringUTFChars(jexport_path, export_path); + + if (!s.ok()) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); + } + + return GET_CPLUSPLUS_POINTER(metadata); +} diff --git a/java/rocksjni/export_import_files_metadatajni.cc b/java/rocksjni/export_import_files_metadatajni.cc new file mode 100644 index 000000000..213977ac2 --- /dev/null +++ b/java/rocksjni/export_import_files_metadatajni.cc @@ -0,0 +1,22 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "include/org_rocksdb_ExportImportFilesMetaData.h" +#include "include/org_rocksdb_LiveFileMetaData.h" +#include "rocksjni/portal.h" + +/* + * Class: org_rocksdb_ExportImportFilesMetaData + * Method: disposeInternal + * Signature: (J)V + */ +void Java_org_rocksdb_ExportImportFilesMetaData_disposeInternal( + JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle) { + auto* metadata = + reinterpret_cast(jhandle); + assert(metadata != nullptr); + delete metadata; +} diff --git a/java/rocksjni/import_column_family_options.cc b/java/rocksjni/import_column_family_options.cc new file mode 100644 index 000000000..1a9bded51 --- /dev/null +++ b/java/rocksjni/import_column_family_options.cc @@ -0,0 +1,59 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include + +#include "include/org_rocksdb_ImportColumnFamilyOptions.h" +#include "rocksdb/options.h" +#include "rocksjni/cplusplus_to_java_convert.h" + +/* + * Class: org_rocksdb_ImportColumnFamilyOptions + * Method: newImportColumnFamilyOptions + * Signature: ()J + */ +jlong Java_org_rocksdb_ImportColumnFamilyOptions_newImportColumnFamilyOptions( + JNIEnv *, jclass) { + ROCKSDB_NAMESPACE::ImportColumnFamilyOptions *opts = + new ROCKSDB_NAMESPACE::ImportColumnFamilyOptions(); + return GET_CPLUSPLUS_POINTER(opts); +} + +/* + * Class: org_rocksdb_ImportColumnFamilyOptions + * Method: setMoveFiles + * Signature: (JZ)V + */ +void Java_org_rocksdb_ImportColumnFamilyOptions_setMoveFiles( + JNIEnv *, jobject, jlong jhandle, jboolean jmove_files) { + auto *options = + reinterpret_cast(jhandle); + options->move_files = static_cast(jmove_files); +} + +/* + * Class: org_rocksdb_ImportColumnFamilyOptions + * Method: moveFiles + * Signature: (J)Z + */ +jboolean Java_org_rocksdb_ImportColumnFamilyOptions_moveFiles(JNIEnv *, jobject, + jlong jhandle) { + auto *options = + reinterpret_cast(jhandle); + return static_cast(options->move_files); +} + +/* + * Class: org_rocksdb_ImportColumnFamilyOptions + * Method: disposeInternal + * Signature: (J)V + */ +void Java_org_rocksdb_ImportColumnFamilyOptions_disposeInternal(JNIEnv *, + jobject, + jlong jhandle) { + delete reinterpret_cast( + jhandle); +} \ No newline at end of file diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 12358ee9b..8d445838e 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -490,6 +490,63 @@ jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__J_3J_3_3B( return jcf_handles; } +/* + * Class: org_rocksdb_RocksDB + * Method: createColumnFamilyWithImport + * Signature: (J[BIJJ[J)J + */ +jlong Java_org_rocksdb_RocksDB_createColumnFamilyWithImport( + JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jcf_name, + jint jcf_name_len, jlong j_cf_options, jlong j_cf_import_options, + jlongArray j_metadata_handle_array) { + auto* db = reinterpret_cast(jdb_handle); + jboolean has_exception = JNI_FALSE; + const std::string cf_name = + ROCKSDB_NAMESPACE::JniUtil::byteString( + env, jcf_name, jcf_name_len, + [](const char* str, const size_t len) { + return std::string(str, len); + }, + &has_exception); + if (has_exception == JNI_TRUE) { + // exception occurred + return 0; + } + auto* cf_options = + reinterpret_cast(j_cf_options); + + auto* cf_import_options = + reinterpret_cast( + j_cf_import_options); + + std::vector metadatas; + jlong* ptr_metadata_handle_array = + env->GetLongArrayElements(j_metadata_handle_array, nullptr); + if (j_metadata_handle_array == nullptr) { + // exception thrown: OutOfMemoryError + return 0; + } + const jsize array_size = env->GetArrayLength(j_metadata_handle_array); + for (jsize i = 0; i < array_size; ++i) { + const ROCKSDB_NAMESPACE::ExportImportFilesMetaData* metadata_ptr = + reinterpret_cast( + ptr_metadata_handle_array[i]); + metadatas.push_back(metadata_ptr); + } + env->ReleaseLongArrayElements(j_metadata_handle_array, + ptr_metadata_handle_array, JNI_ABORT); + + ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle = nullptr; + ROCKSDB_NAMESPACE::Status s = db->CreateColumnFamilyWithImport( + *cf_options, cf_name, *cf_import_options, metadatas, &cf_handle); + if (!s.ok()) { + // error occurred + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); + return 0; + } + return GET_CPLUSPLUS_POINTER(cf_handle); +} + /* * Class: org_rocksdb_RocksDB * Method: dropColumnFamily diff --git a/java/src/main/java/org/rocksdb/Checkpoint.java b/java/src/main/java/org/rocksdb/Checkpoint.java index c9b3886c0..347221df6 100644 --- a/java/src/main/java/org/rocksdb/Checkpoint.java +++ b/java/src/main/java/org/rocksdb/Checkpoint.java @@ -50,6 +50,12 @@ public void createCheckpoint(final String checkpointPath) createCheckpoint(nativeHandle_, checkpointPath); } + public ExportImportFilesMetaData exportColumnFamily(final ColumnFamilyHandle columnFamilyHandle, + final String exportPath) throws RocksDBException { + return new ExportImportFilesMetaData( + exportColumnFamily(nativeHandle_, columnFamilyHandle.nativeHandle_, exportPath)); + } + private Checkpoint(final RocksDB db) { super(newCheckpoint(db.nativeHandle_)); } @@ -59,4 +65,7 @@ private Checkpoint(final RocksDB db) { private native void createCheckpoint(long handle, String checkpointPath) throws RocksDBException; + + private native long exportColumnFamily(long handle, long columnFamilyHandle, String exportPath) + throws RocksDBException; } diff --git a/java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java b/java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java new file mode 100644 index 000000000..1589f631c --- /dev/null +++ b/java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java @@ -0,0 +1,18 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * The metadata that describes a column family. + */ +public class ExportImportFilesMetaData extends RocksObject { + ExportImportFilesMetaData(final long nativeHandle) { + super(nativeHandle); + } + + @Override protected native void disposeInternal(final long handle); +} diff --git a/java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java b/java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java new file mode 100644 index 000000000..652bd19dc --- /dev/null +++ b/java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java @@ -0,0 +1,44 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * ImportColumnFamilyOptions is used by + * {@link RocksDB#createColumnFamilyWithImport(ColumnFamilyDescriptor, ImportColumnFamilyOptions, + * ExportImportFilesMetaData)}. + */ +public class ImportColumnFamilyOptions extends RocksObject { + public ImportColumnFamilyOptions() { + super(newImportColumnFamilyOptions()); + } + + /** + * Can be set to true to move the files instead of copying them. + * + * @return true if files will be moved + */ + public boolean moveFiles() { + return moveFiles(nativeHandle_); + } + + /** + * Can be set to true to move the files instead of copying them. + * + * @param moveFiles true if files should be moved instead of copied + * + * @return the reference to the current IngestExternalFileOptions. + */ + public ImportColumnFamilyOptions setMoveFiles(final boolean moveFiles) { + setMoveFiles(nativeHandle_, moveFiles); + return this; + } + + private static native long newImportColumnFamilyOptions(); + private native boolean moveFiles(final long handle); + private native void setMoveFiles(final long handle, final boolean move_files); + @Override protected final native void disposeInternal(final long handle); +} diff --git a/java/src/main/java/org/rocksdb/LiveFileMetaData.java b/java/src/main/java/org/rocksdb/LiveFileMetaData.java index 4c670e3ae..cb0f1a302 100644 --- a/java/src/main/java/org/rocksdb/LiveFileMetaData.java +++ b/java/src/main/java/org/rocksdb/LiveFileMetaData.java @@ -54,4 +54,18 @@ public byte[] columnFamilyName() { public int level() { return level; } + + public long newLiveFileMetaDataHandle() { + return newLiveFileMetaDataHandle(columnFamilyName(), columnFamilyName().length, level(), + fileName(), path(), size(), smallestSeqno(), largestSeqno(), smallestKey(), + smallestKey().length, largestKey(), largestKey().length, numReadsSampled(), + beingCompacted(), numEntries(), numDeletions()); + } + + private native long newLiveFileMetaDataHandle(final byte[] columnFamilyName, + final int columnFamilyNameLength, final int level, final String fileName, final String path, + final long size, final long smallestSeqno, final long largestSeqno, final byte[] smallestKey, + final int smallestKeyLength, final byte[] largestKey, final int largestKeyLength, + final long numReadsSampled, final boolean beingCompacted, final long numEntries, + final long numDeletions); } diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 303697139..93c3cf1c1 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -766,6 +766,46 @@ public List createColumnFamilies( return columnFamilyHandles; } + /** + * Creates a new column family with the name columnFamilyName and + * import external SST files specified in `metadata` allocates a + * ColumnFamilyHandle within an internal structure. + * The ColumnFamilyHandle is automatically disposed with DB disposal. + * + * @param columnFamilyDescriptor column family to be created. + * @return {@link org.rocksdb.ColumnFamilyHandle} instance. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public ColumnFamilyHandle createColumnFamilyWithImport( + final ColumnFamilyDescriptor columnFamilyDescriptor, + final ImportColumnFamilyOptions importColumnFamilyOptions, + final ExportImportFilesMetaData metadata) throws RocksDBException { + List metadatas = new ArrayList<>(); + metadatas.add(metadata); + return createColumnFamilyWithImport( + columnFamilyDescriptor, importColumnFamilyOptions, metadatas); + } + + public ColumnFamilyHandle createColumnFamilyWithImport( + final ColumnFamilyDescriptor columnFamilyDescriptor, + final ImportColumnFamilyOptions importColumnFamilyOptions, + final List metadatas) throws RocksDBException { + final int metadataNum = metadatas.size(); + final long[] metadataHandleList = new long[metadataNum]; + for (int i = 0; i < metadataNum; i++) { + metadataHandleList[i] = metadatas.get(i).getNativeHandle(); + } + final ColumnFamilyHandle columnFamilyHandle = new ColumnFamilyHandle(this, + createColumnFamilyWithImport(nativeHandle_, columnFamilyDescriptor.getName(), + columnFamilyDescriptor.getName().length, + columnFamilyDescriptor.getOptions().nativeHandle_, + importColumnFamilyOptions.nativeHandle_, metadataHandleList)); + ownedColumnFamilyHandles.add(columnFamilyHandle); + return columnFamilyHandle; + } + /** * Drops the column family specified by {@code columnFamilyHandle}. This call * only records a drop record in the manifest and prevents the column @@ -4713,6 +4753,10 @@ private native long[] createColumnFamilies(final long handle, private native long[] createColumnFamilies( final long handle, final long[] columnFamilyOptionsHandles, final byte[][] columnFamilyNames) throws RocksDBException; + private native long createColumnFamilyWithImport(final long handle, final byte[] columnFamilyName, + final int columnFamilyNamelen, final long columnFamilyOptions, + final long importColumnFamilyOptions, final long[] metadataHandleList) + throws RocksDBException; private native void dropColumnFamily( final long handle, final long cfHandle) throws RocksDBException; private native void dropColumnFamilies(final long handle, diff --git a/java/src/test/java/org/rocksdb/CheckPointTest.java b/java/src/test/java/org/rocksdb/CheckPointTest.java index 2b3cc7a3b..3b0b5d86a 100644 --- a/java/src/test/java/org/rocksdb/CheckPointTest.java +++ b/java/src/test/java/org/rocksdb/CheckPointTest.java @@ -57,6 +57,24 @@ public void checkPoint() throws RocksDBException { } } + @Test + public void exportColumnFamily() throws RocksDBException { + try (final Options options = new Options().setCreateIfMissing(true)) { + try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key".getBytes(), "value".getBytes()); + try (final Checkpoint checkpoint = Checkpoint.create(db)) { + ExportImportFilesMetaData metadata1 = + checkpoint.exportColumnFamily(db.getDefaultColumnFamily(), + checkpointFolder.getRoot().getAbsolutePath() + "/export_column_family1"); + db.put("key2".getBytes(), "value2".getBytes()); + ExportImportFilesMetaData metadata2 = + checkpoint.exportColumnFamily(db.getDefaultColumnFamily(), + checkpointFolder.getRoot().getAbsolutePath() + "/export_column_family2"); + } + } + } + } + @Test(expected = IllegalArgumentException.class) public void failIfDbIsNull() { try (final Checkpoint ignored = Checkpoint.create(null)) { diff --git a/java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java b/java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java new file mode 100644 index 000000000..ee569d497 --- /dev/null +++ b/java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java @@ -0,0 +1,98 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.rocksdb.util.BytewiseComparator; + +public class ImportColumnFamilyTest { + private static final String SST_FILE_NAME = "test.sst"; + private static final String DB_DIRECTORY_NAME = "test_db"; + + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Rule public TemporaryFolder checkpointFolder = new TemporaryFolder(); + + @Test + public void testImportColumnFamily() throws RocksDBException { + try (final Options options = new Options().setCreateIfMissing(true)) { + try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key".getBytes(), "value".getBytes()); + db.put("key1".getBytes(), "value1".getBytes()); + + try (final Checkpoint checkpoint = Checkpoint.create(db); + final ImportColumnFamilyOptions importColumnFamilyOptions = + new ImportColumnFamilyOptions()) { + ExportImportFilesMetaData default_cf_metadata = + checkpoint.exportColumnFamily(db.getDefaultColumnFamily(), + checkpointFolder.getRoot().getAbsolutePath() + "/default_cf_metadata"); + ColumnFamilyDescriptor columnFamilyDescriptor = + new ColumnFamilyDescriptor("new_cf".getBytes()); + final ColumnFamilyHandle importCfHandle = db.createColumnFamilyWithImport( + columnFamilyDescriptor, importColumnFamilyOptions, default_cf_metadata); + assertThat(db.get(importCfHandle, "key".getBytes())).isEqualTo("value".getBytes()); + assertThat(db.get(importCfHandle, "key1".getBytes())).isEqualTo("value1".getBytes()); + } + } + } + } + + @Test + public void ImportMultiColumnFamilyTest() throws RocksDBException { + try (final Options options = new Options().setCreateIfMissing(true)) { + try (final RocksDB db1 = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath() + "db1"); + final RocksDB db2 = + RocksDB.open(options, dbFolder.getRoot().getAbsolutePath() + "db2");) { + db1.put("key".getBytes(), "value".getBytes()); + db1.put("key1".getBytes(), "value1".getBytes()); + db2.put("key2".getBytes(), "value2".getBytes()); + db2.put("key3".getBytes(), "value3".getBytes()); + try (final Checkpoint checkpoint1 = Checkpoint.create(db1); + final Checkpoint checkpoint2 = Checkpoint.create(db2); + final ImportColumnFamilyOptions importColumnFamilyOptions = + new ImportColumnFamilyOptions()) { + ExportImportFilesMetaData default_cf_metadata1 = + checkpoint1.exportColumnFamily(db1.getDefaultColumnFamily(), + checkpointFolder.getRoot().getAbsolutePath() + "/default_cf_metadata1"); + ExportImportFilesMetaData default_cf_metadata2 = + checkpoint2.exportColumnFamily(db2.getDefaultColumnFamily(), + checkpointFolder.getRoot().getAbsolutePath() + "/default_cf_metadata2"); + + ColumnFamilyDescriptor columnFamilyDescriptor = + new ColumnFamilyDescriptor("new_cf".getBytes()); + + List importMetaDatas = new ArrayList(); + importMetaDatas.add(default_cf_metadata1); + importMetaDatas.add(default_cf_metadata2); + + final ColumnFamilyHandle importCfHandle = db1.createColumnFamilyWithImport( + columnFamilyDescriptor, importColumnFamilyOptions, importMetaDatas); + assertThat(db1.get(importCfHandle, "key".getBytes())).isEqualTo("value".getBytes()); + assertThat(db1.get(importCfHandle, "key1".getBytes())).isEqualTo("value1".getBytes()); + assertThat(db1.get(importCfHandle, "key2".getBytes())).isEqualTo("value2".getBytes()); + assertThat(db1.get(importCfHandle, "key3".getBytes())).isEqualTo("value3".getBytes()); + } + } + } + } +} diff --git a/src.mk b/src.mk index 160ffdb2e..a16f16b87 100644 --- a/src.mk +++ b/src.mk @@ -662,10 +662,12 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/compression_options.cc \ java/rocksjni/concurrent_task_limiter.cc \ java/rocksjni/config_options.cc \ + java/rocksjni/export_import_files_metadatajni.cc \ java/rocksjni/env.cc \ java/rocksjni/env_options.cc \ java/rocksjni/event_listener.cc \ java/rocksjni/event_listener_jnicallback.cc \ + java/rocksjni/import_column_family_options.cc \ java/rocksjni/ingest_external_file_options.cc \ java/rocksjni/filter.cc \ java/rocksjni/iterator.cc \ From 520c64fd2e603488ae1858157898e8c0ff43b368 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 6 Nov 2023 07:41:36 -0800 Subject: [PATCH 254/386] Add missing status check in ExternalSstFileIngestionJob and ImportColumnFamilyJob (#12042) Summary: .. and update some unit tests that failed with this change. See comment in ExternalSSTFileBasicTest.IngestFileWithCorruptedDataBlock for more explanation. The missing status check is not caught by `ASSERT_STATUS_CHECKED=1` due to this line: https://github.com/facebook/rocksdb/blob/8505b26db19871a8c8782a35a7b5be9d321d45e0/table/block_based/block.h#L394. Will explore if we can remove it. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12042 Test Plan: existing unit tests. Reviewed By: ajkr Differential Revision: D50994769 Pulled By: cbi42 fbshipit-source-id: c91615bccd6094a91634c50b98401d456cbb927b --- db/external_sst_file_basic_test.cc | 35 ++++++++++++++++----------- db/external_sst_file_ingestion_job.cc | 6 +++-- db/import_column_family_job.cc | 2 ++ 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index cd1d7dd0e..749a172ac 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -1548,6 +1548,11 @@ TEST_F(ExternalSSTFileBasicTest, RangeDeletionEndComesBeforeStart) { } TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { + bool verify_checksums_before_ingest = std::get<1>(GetParam()); + if (!verify_checksums_before_ingest) { + ROCKSDB_GTEST_BYPASS("Bypassing test when !verify_checksums_before_ingest"); + return; + } bool change_checksum_called = false; const auto& change_checksum = [&](void* arg) { if (!change_checksum_called) { @@ -1565,24 +1570,20 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { SyncPoint::GetInstance()->EnableProcessing(); int file_id = 0; bool write_global_seqno = std::get<0>(GetParam()); - bool verify_checksums_before_ingest = std::get<1>(GetParam()); do { Options options = CurrentOptions(); DestroyAndReopen(options); std::map true_data; Status s = GenerateAndAddExternalFile( options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++, - write_global_seqno, verify_checksums_before_ingest, &true_data); - if (verify_checksums_before_ingest) { - ASSERT_NOK(s); - } else { - ASSERT_OK(s); - } + write_global_seqno, /*verify_checksums_before_ingest=*/true, + &true_data); + ASSERT_NOK(s); change_checksum_called = false; } while (ChangeOptionsForFileIngestionTest()); } -TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) { +TEST_P(ExternalSSTFileBasicTest, IngestFileWithCorruptedDataBlock) { if (!random_rwfile_supported_) { ROCKSDB_GTEST_SKIP("Test requires NewRandomRWFile support"); return; @@ -1590,15 +1591,21 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) { SyncPoint::GetInstance()->DisableProcessing(); int file_id = 0; EnvOptions env_options; + Random rnd(301); do { Options options = CurrentOptions(); + options.compression = kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 4 * 1024; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); std::string file_path = sst_files_dir_ + std::to_string(file_id++); SstFileWriter sst_file_writer(env_options, options); Status s = sst_file_writer.Open(file_path); ASSERT_OK(s); + // This should write more than 2 data blocks. for (int i = 0; i != 100; ++i) { std::string key = Key(i); - std::string value = Key(i) + std::to_string(0); + std::string value = rnd.RandomString(200); ASSERT_OK(sst_file_writer.Put(key, value)); } ASSERT_OK(sst_file_writer.Finish()); @@ -1609,11 +1616,11 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) { ASSERT_GT(file_size, 8); std::unique_ptr rwfile; ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions())); - // Manually corrupt the file - // We deterministically corrupt the first byte because we currently - // cannot choose a random offset. The reason for this limitation is that - // we do not checksum property block at present. - const uint64_t offset = 0; + // Corrupt the second data block. + // We need to corrupt a non-first and non-last data block + // since we access them to get smallest and largest internal + // key in the file in GetIngestedFileInfo(). + const uint64_t offset = 5000; char scratch[8] = {0}; Slice buf; ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch)); diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 4f49c6bfe..a4a194714 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -769,8 +769,6 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( std::unique_ptr iter(table_reader->NewIterator( ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion)); - std::unique_ptr range_del_iter( - table_reader->NewRangeTombstoneIterator(ro)); // Get first (smallest) and last (largest) key from file. file_to_ingest->smallest_internal_key = @@ -829,8 +827,12 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( file_to_ingest->largest_internal_key.SetFrom(key); bounds_set = true; + } else if (!iter->status().ok()) { + return iter->status(); } + std::unique_ptr range_del_iter( + table_reader->NewRangeTombstoneIterator(ro)); // We may need to adjust these key bounds, depending on whether any range // deletion tombstones extend past them. const Comparator* ucmp = cfd_->internal_comparator().user_comparator(); diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index f8eec0502..9c285a3d8 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -393,6 +393,8 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( } file_to_import->largest_internal_key.DecodeFrom(largest); bound_set = true; + } else if (!iter->status().ok()) { + return iter->status(); } std::unique_ptr range_del_iter{ From 6979e9dc6a70943fe1e41de77658d0a039d20eb7 Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Mon, 6 Nov 2023 09:15:00 -0800 Subject: [PATCH 255/386] Create blog post from report on JNI performance work (#11818) Summary: We did some investigation into the performance of JNI for workloads emulating how data is carried between Java and C++ for RocksDB. The repo for our performance work lives at https://github.com/evolvedbinary/jni-benchmarks This is a report text from that work, extracted as a blog post. Along with some supporting files (png, pdf of graphs). Pull Request resolved: https://github.com/facebook/rocksdb/pull/11818 Reviewed By: jaykorean Differential Revision: D50907467 Pulled By: pdillinger fbshipit-source-id: ec6a43c83bd9ad94a3d11cfd87031e613acf7659 --- .../2023-11-06-java-jni-benchmarks.markdown | 291 ++++++++++++++++++ .../fig_1024_1_copyout_nopoolbig.png | Bin 0 -> 98023 bytes .../fig_1024_1_none_nopoolbig.png | Bin 0 -> 118918 bytes .../fig_1024_1_none_nopoolsmall.png | Bin 0 -> 144099 bytes .../jni-get-benchmarks/optimization-graph.png | Bin 0 -> 25214 bytes 5 files changed, 291 insertions(+) create mode 100644 docs/_posts/2023-11-06-java-jni-benchmarks.markdown create mode 100644 docs/_posts/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png create mode 100644 docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png create mode 100644 docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png create mode 100644 docs/_posts/jni-get-benchmarks/optimization-graph.png diff --git a/docs/_posts/2023-11-06-java-jni-benchmarks.markdown b/docs/_posts/2023-11-06-java-jni-benchmarks.markdown new file mode 100644 index 000000000..5eb47ef3a --- /dev/null +++ b/docs/_posts/2023-11-06-java-jni-benchmarks.markdown @@ -0,0 +1,291 @@ +--- +title: Java API Performance Improvements +layout: post +author: alanpaxton +category: blog +--- +# RocksDB Java API Performance Improvements + +Evolved Binary has been working on several aspects of how the Java API to RocksDB can be improved. Two aspects of this which are of particular importance are performance and the developer experience. + +* We have built some synthetic benchmark code to determine which are the most efficient methods of transferring data between Java and C++. +* We have used the results of the synthetic benchmarking to guide plans for rationalising the API interfaces. +* We have made some opportunistic performance optimizations/fixes within the Java API which have already yielded noticable improvements. + +## Synthetic JNI API Performance Benchmarks +The synthetic benchmark repository contains tests designed to isolate the Java to/from C++ interaction of a canonical data intensive Key/Value Store implemented in C++ with a Java (JNI) API layered on top. + +JNI provides several mechanisms for allowing transfer of data between Java buffers and C++ buffers. These mechanisms are not trivial, because they require the JNI system to ensure that Java memory under the control of the JVM is not moved or garbage collected whilst it is being accessed outside the direct control of the JVM. + +We set out to determine which of multiple options for transfer of data from `C++` to `Java` and vice-versa were the most efficient. We used the [Java Microbenchmark Harness](https://github.com/openjdk/jmh) to set up repeatable benchmarks to measure all the options. + +We explore these and some other potential mechanisms in the detailed results (in our [Synthetic JNI performance repository](https://github.com/evolvedbinary/jni-benchmarks/blob/main/DataBenchmarks.md)) + +We summarise this work here: + +### The Model + +* In `C++` we represent the on-disk data as an in-memory map of `(key, value)` + pairs. +* For a fetch query, we expect the result to be a Java object with access to the + contents of the _value_. This may be a standard Java object which does the job + of data access (a `byte[]` or a `ByteBuffer`) or an object of our own devising + which holds references to the value in some form (a `FastBuffer` pointing to + `com.sun.unsafe.Unsafe` unsafe memory, for instance). + +### Data Types + +There are several potential data types for holding data for transfer, and they +are unsurprisingly quite connected underneath. + +#### Byte Array + +The simplest data container is a _raw_ array of bytes (`byte[]`). + +There are 3 different mechanisms for transferring data between a `byte[]` and +C++ + +* At the C++ side, the method + [`JNIEnv.GetArrayCritical()`](https://docs.oracle.com/en/java/javase/13/docs/specs/jni/functions.html#getprimitivearraycritical) + allows access to a C++ pointer to the underlying array. +* The `JNIEnv` methods `GetByteArrayElements()` and `ReleaseByteArrayElements()` + fetch references/copies to and from the contents of a byte array, with less + concern for critical sections than the _critical_ methods, though they are + consequently more likely/certain to result in (extra) copies. +* The `JNIEnv` methods `GetByteArrayRegion()` and `SetByteArrayRegion()` + transfer raw C++ buffer data to and from the contents of a byte array. These + must ultimately do some data pinning for the duration of copies; the + mechanisms may be similar or different to the _critical_ operations, and + therefore performance may differ. + +#### Byte Buffer + +A `ByteBuffer` abstracts the contents of a collection of bytes, and was in fact +introduced to support a range of higher-performance I/O operations in some +circumstances. + +There are 2 types of byte buffers in Java, _indirect_ and _direct_. Indirect +byte buffers are the standard, and the memory they use is on-heap as with all +usual Java objects. In contrast, direct byte buffers are used to wrap off-heap +memory which is accessible to direct network I/O. Either type of `ByteBuffer` +can be allocated at the Java side, using the `allocate()` and `allocateDirect()` +methods respectively. + +Direct byte buffers can be created in C++ using the JNI method +[`JNIEnv.NewDirectByteBuffer()`](https://docs.oracle.com/en/java/javase/13/docs/specs/jni/functions.html#newdirectbytebuffer) +to wrap some native (C++) memory. + +Direct byte buffers can be accessed in C++ using the +[`JNIEnv.GetDirectBufferAddress()`](https://docs.oracle.com/en/java/javase/13/docs/specs/jni/functions.html#GetDirectBufferAddress) +and measured using +[`JNIEnv.GetDirectBufferCapacity()`](https://docs.oracle.com/en/java/javase/13/docs/specs/jni/functions.html#GetDirectBufferCapacity) + +#### Unsafe Memory + +The call `com.sun.unsafe.Unsafe.allocateMemory()` returns a handle which is (of course) just a pointer to raw memory, and +can be used as such on the C++ side. We could turn it into a byte buffer on the +C++ side by calling `JNIEnv.NewDirectByteBuffer()`, or simply use it as a native +C++ buffer at the expected address, assuming we record or remember how much +space was allocated. + +A custom `FastBuffer` class provides access to unsafe memory from the Java side. + + +#### Allocation + +For these benchmarks, allocation has been excluded from the benchmark costs by +pre-allocating a quantity of buffers of the appropriate kind as part of the test +setup. Each run of the benchmark acquires an existing buffer from a pre-allocated +FIFO list, and returns it afterwards. A small test has +confirmed that the request and return cycle is of insignificant cost compared to +the benchmark API call. + +### GetJNIBenchmark Performance + +Benchmarks ran for a duration of order 6 hours on an otherwise unloaded VM, + the error bars are small and we can have strong confidence in the values + derived and plotted. + +![Raw JNI Get](./jni-get-benchmarks/fig_1024_1_none_nopoolbig.png). + +Comparing all the benchmarks as the data size tends large, the conclusions we +can draw are: + +- Indirect byte buffers add cost; they are effectively an overhead on plain + `byte[]` and the JNI-side only allows them to be accessed via their + encapsulated `byte[]`. +- `SetRegion` and `GetCritical` mechanisms for copying data into a `byte[]` are + of very comparable performance; presumably the behaviour behind the scenes of + `SetRegion` is very similar to that of declaring a critical region, doing a + `memcpy()` and releasing the critical region. +- `GetElements` methods for transferring data from C++ to Java are consistently + less efficient than `SetRegion` and `GetCritical`. +- Getting into a raw memory buffer, passed as an address (the `handle` of an + `Unsafe` or of a netty `ByteBuf`) is of similar cost to the more efficient + `byte[]` operations. +- Getting into a direct `nio.ByteBuffer` is of similar cost again; while the + ByteBuffer is passed over JNI as an ordinary Java object, JNI has a specific + method for getting hold of the address of the direct buffer, and using this, the + `get()` cost with a ByteBuffer is just that of the underlying C++ `memcpy()`. + +At small(er) data sizes, we can see whether other factors are important. + +![Raw JNI Get](./jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png) + +- Indirect byte buffers are the most significant overhead here. Again, we can + conclude that this is due to pure overhead compared to `byte[]` operations. +- At the lowest data sizes, netty `ByteBuf`s and unsafe memory are marginally + more efficient than `byte[]`s or (slightly less efficient) direct + `nio.Bytebuffer`s. This may be explained by even the small cost of + calling the JNI model on the C++ side simply to acquire a + direct buffer address. The margins (nanoseconds) here are extremely small. + +#### Post processing the results + +Our benchmark model for post-processing is to transfer the results into a +`byte[]`. Where the result is already a `byte[]` this may seem like an unfair +extra cost, but the aim is to model the least cost processing step for any kind +of result. + +- Copying into a `byte[]` using the bulk methods supported by `byte[]`, + `nio.ByteBuffer` have comparable performance. +- Accessing the contents of an `Unsafe` buffer using the supplied unsafe methods + is inefficient. The access is word by + word, in Java. +- Accessing the contents of a netty `ByteBuf` is similarly inefficient; again + the access is presumably word by word, using normal + Java mechanisms. + +![Copy out JNI Get](./jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png). + +### PutJNIBenchmark + +We benchmarked `Put` methods in a similar synthetic fashion in less depth, but enough to confirm that the performance profile is similar/symmetrical. As with `get()` using `GetElements` is the least performant way of implementing transfers to/from Java objects in C++/JNI, and other JNI mechanisms do not differ greatly one from another. + +## Lessons from Synthetic API + +Performance analysis shows that for `get()`, fetching into allocated `byte[]` is +equally as efficient as any other mechanism, as long as JNI region methods are used +for the internal data transfer. Copying out or otherwise using the +result on the Java side is straightforward and efficient. Using `byte[]` avoids the manual memory +management required with direct `nio.ByteBuffer`s, which extra work does not +appear to provide any gain. A C++ implementation using the `GetRegion` JNI +method is probably to be preferred to using `GetCritical` because while their +performance is equal, `GetRegion` is a higher-level/simpler abstraction. + +Vitally, whatever JNI transfer mechanism is chosen, the buffer allocation +mechanism and pattern is crucial to achieving good performance. We experimented +with making use of netty's pooled allocator part of the benchmark, and the +difference of `getIntoPooledNettyByteBuf`, using the allocator, compared to +`getIntoNettyByteBuf` using the same pre-allocate on setup as every other +benchmark, is significant. + +Equally importantly, transfer of data to or from buffers should where possible +be done in bulk, using array copy or buffer copy mechanisms. Thought should +perhaps be given to supporting common transformations in the underlying C++ +layer. + +## API Recommendations + +Of course there is some noise within the results. but we can agree: + + * Don't make copies you don't need to make + * Don't allocate/deallocate when you can avoid it + +Translating this into designing an efficient API, we want to: + + * Support API methods that return results in buffers supplied by the client. + * Support `byte[]`-based APIs as the simplest way of getting data into a usable configuration for a broad range of Java use. + * Support direct `ByteBuffer`s as these can reduce copies when used as part of a chain of `ByteBuffer`-based operations. This sort of sophisticated streaming model is most likely to be used by clients where performance is important, and so we decide to support it. + * Support indirect `ByteBuffer`s for a combination of reasons: + * API consistency between direct and indirect buffers + * Simplicity of implementation, as we can wrap `byte[]`-oriented methods + * Continue to support methods which allocate return buffers per-call, as these are the easiest to use on initial encounter with the RocksDB API. + +High performance Java interaction with RocksDB ultimately requires architectural decisions by the client + * Use more complex (client supplied buffer) API methods where performance matters + * Don't allocate/deallocate where you don't need to + * recycle your own buffers where this makes sense + * or make sure that you are supplying the ultimate destination buffer (your cache, or a target network buffer) as input to RocksDB `get()` and `put()` calls + +We are currently implementing a number of extra methods consistently across the Java fetch and store APIs to RocksDB in the PR [Java API consistency between RocksDB.put() , .merge() and Transaction.put() , .merge()](https://github.com/facebook/rocksdb/pull/11019) according to these principles. + +## Optimizations + +### Reduce Copies within API Implementation + +Having analysed JNI performance as described, we reviewed the core of RocksJNI for opportunities to improve the performance. We noticed one thing in particular; some of the `get()` methods of the Java API had not been updated to take advantage of the new [`PinnableSlice`](http://rocksdb.org/blog/2017/08/24/pinnableslice.html) methods. + +Fixing this turned out to be a straightforward change, which has now been incorporated in the codebase [Improve Java API `get()` performance by reducing copies](https://github.com/facebook/rocksdb/pull/10970) + +#### Performance Results + +Using the JMH performances tests we updated as part of the above PR, we can see a small but consistent improvement in performance for all of the different get method variants which we have enhanced in the PR. + +```sh +java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,50000 -p keySize=128 -p valueSize=1024,16384 -p columnFamilyTestType="1_column_family" GetBenchmarks.get GetBenchmarks.preallocatedByteBufferGet GetBenchmarks.preallocatedGet +``` +The y-axis shows `ops/sec` in throughput, so higher is better. + +![image](./jni-get-benchmarks/optimization-graph.png) + +### Analysis + +Before the invention of the Pinnable Slice the simplest RocksDB (native) API `Get()` looked like this: + +```cpp +Status Get(const ReadOptions& options, + ColumnFamilyHandle* column_family, const Slice& key, + std::string* value) +``` + +After PinnableSlice the correct way for new code to implement a `get()` is like this + +```cpp +Status Get(const ReadOptions& options, + ColumnFamilyHandle* column_family, const Slice& key, + PinnableSlice* value) +``` + +But of course RocksDB has to support legacy code, so there is an `inline` method in `db.h` which re-implements the former using the latter. +And RocksJava API implementation seamlessly continues to use the `std::string`-based `get()` + +Let's examine what happens when get() is called from Java + +```cpp +jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BIIJ( + JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, + jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, + jlong jcf_handle) +``` + + 1. Create an empty `std::string value` + 2. Call `DB::Get()` using the `std::string` variant + 3. Copy the resultant `std::string` into Java, using the JNI `SetByteArrayRegion()` method + +So stage (3) costs us a copy into Java. It's mostly unavoidable that there will be at least the one copy from a C++ buffer into a Java buffer. + +But what does stage 2 do ? + + * Create a `PinnableSlice(std::string&)` which uses the value as the slice's backing buffer. + * Call `DB::Get()` using the PinnableSlice variant + * Work out if the slice has pinned data, in which case copy the pinned data into value and release it. + * ..or, if the slice has not pinned data, it is already in value (because we tried, but couldn't pin anything). + +So stage (2) costs us a copy into a `std::string`. But! It's just a naive `std::string` that we have copied a large buffer into. And in RocksDB, the buffer is or can be large, so an extra copy something we need to worry about. + +Luckily this is easy to fix. In the Java API (JNI) implementation: + + 1. Create a PinnableSlice() which uses its own default backing buffer. + 2. Call `DB::Get()` using the `PinnableSlice` variant of the RocksDB API + 3. Copy the data indicated by the `PinnableSlice` straight into the Java output buffer using the JNI `SetByteArrayRegion()` method, then release the slice. + 4. Work out if the slice has successfully pinned data, in which case copy the pinned data straight into the Java output buffer using the JNI `SetByteArrayRegion()` method, then release the pin. + 5. ..or, if the slice has not pinned data, it is in the pinnable slice's default backing buffer. All that is left, is to copy it straight into the Java output buffer using the JNI SetByteArrayRegion() method. + +In the case where the `PinnableSlice` has succesfully pinned the data, this saves us the intermediate copy to the `std::string`. In the case where it hasn't, we still have the extra copy so the observed performance improvement depends on when the data can be pinned. Luckily, our benchmarking suggests that the pin is happening in a significant number of cases. + +On discussion with the RocksDB core team we understand that the core `PinnableSlice` optimization is most likely to succeed when pages are loaded from the block cache, rather than when they are in `memtable`. And it might be possible to successfully pin in the `memtable` as well, with some extra coding effort. This would likely improve the results for these benchmarks. + + + + diff --git a/docs/_posts/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png b/docs/_posts/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png new file mode 100644 index 0000000000000000000000000000000000000000..2d662dde8320500528610fb71c5537b826f9f9c4 GIT binary patch literal 98023 zcmeFZby(C}7dAY0gM|u$sGt%8A`KFXh_tk{N_VG}i6Vj`-3ZbpDa{cDrMpw4OS)&i zwFidh_&x6*@Adxua9v&p&dmH~@4f0?_gZVez9%Vsy zm6;_s6O-}w8yHP3beY%XEHRkFlIY(K7h4Z1%nl4j z4J__)`W3B92b8YtxZt|~sjKUGo^RcG(up6h{_6PN;>_2=*kaPMDyyHkA(woRZpu(4 zS@bL8`JD$Z(j44hLw$kzXnHEk9CoAMKmI+#lMt@J-z#4jQ{Gesa;2_`OV&`^O4e|C zFyDc-;D4;rj^^(o{E>J0px)N+u9s*cU}g9pzX(@!4(R9p)173SzhNF6_}`oVcN+iG zjsIE7f4hS6ES+kyBNW_neBn%e8IPBylCOl>UF@^WPlye35N&U7-x1DZIV4^k#>>-Z z*=blC#%sYz-jQn=+bz9QXK|uEh|4lYdmLA7>Kv6(%h#E{5T3M;(lJeNv3h;ov2eD6 z{tD&(n~`yGci~aoW_=~h)3!bK!=p>ZI48HNiB|)SHK9C%W}KP2wP8d*pQlO$v5FM0 zHm6D&Q0Rs`WjoIW_gaZZmD zCGnq`pXy$Vcm5 zrLj&KQsI273#l&MW2gBhyRq#Vx+)4Sm*FTQdkP<~@fS@Ot;wmV7;2oylrT#p zapm?$1B_tV@q@WacHC z_7<&nPdmq+H7%HV<+alHjF=UazT2{vHz6gZg%bN`A5sDA#e(c?gWxkDvLs|= z4oz!va&jZ^EctLgr<12o$3EG$&tT@)ckd!TLG6jQ&!Uo&3NkX@G2($AtAjam9M;#S zi&JT3V`wg265@Tl*!277hvvGvJ`HslnU8a|{2J!w=3|{XsXcZBZxy^;)+Te<9Om^d zUAjce#3V(`JAW@f+q@xKByTdi(+c#C{=$WulCiR0H)dZ^{~WX!EtsqMKz-@*Wje4r zm-PY4OHNKhW8JpB$#7zd@aQ}ocC9(z)|7W+zA?E4SF($E#Dk^g9rXqnLGx61{=#CX zsp0aU{)0!3$V*Cko?=vuyZLPIC^|gq(bL7tFJdG^I}CV1+_JZCKj&K=p%RfXH~*Yc zv}&NOtsSqJs-8ENkSM3F-eZuDENJ9$?^>9*xA%M$<;IWViO=QdWWf}C-@S_7)`>9TlZVQC2poP>@*!+GVC%L;vJ!N+4%cW7v7MQCYh^Ok?T$pSB^pfp{K za~}Ntn}(TLdTywC{Lkzf__xOIr!|u1=dCVLQ|na)Fk9qzdq;z-z%_yakWf;l!b@t8 zC$l<=U0f2&`C;>9>_!2{k01B(^NZ;;$xm*c74#2%{pL+$0JG-&Xl(eZzkQ)P?{eQW zZtx&zxv&zw(S}dS=O~XIiv&NhwnQbO{KYJCc5cr1{d@B&(Z4g^b8D>X)NqHroAbLm+4rkR5T># z^d#ivapPt+DRvc2PUiF~I5_0tEJ&D&*V83=CRx9im(zpNG{#8G^)B7@?^_vC%5%eV z_kZ$Nm%(8-NKTxHdc&A^*VorKZ#*U6=l%OgAD@%mPD|b8j?Oi_OR9w{L%~<(6^RB1 z2OXvhm#k{IhBNb=YC^d3Mn1mr@%Fa(R?fRR62(1TFlV(g=udy+hN{1Yb1wQQj?c1! zHg*Il!0O^;;=6Z87sgwqEQf2FX9p{ZRazxDx})J3B{o(U3~jNpQoQmUU1rbjc#^kI zm7lBrytps5H4*e)0}N5|)~%=TLOev9a$K8Wc;j%`W69VM3-{wEPFRnA@=r)g68D}5 zpOpZ%f4yv(vazWt8aA>4zQJdH;qy#>??y^PLj#f=sE7=h26l~dR0h-A)rw9@q@gZS zkmT&yMC4>%b#m#|onM$}pNm^teXj$7iFf@^DH(@J$RkZnHmx5=lhq6Jnl(Lb9l3ws z_0L&Zyku=N+3_h}HkOW$PaOj7*s-PX4^BrmvNdq4tqwEaPiH+WeHqm=j8qP}ZCIGL zuI_kDu!)Skysyj3XLWyKyqD5%phkk`;83*fwr*uEetGun*$oX@5fKsOMcIuzsKHgY zCM8`(mbcK0D_Wb9H8DxSd6Seobg#rvh2F$X7Dvz8-N+m8iFxxL$T9!>IlGhj;HD_+3?c z1TU_D?8R+tvh5e!b>A|VP4pCY-o4IaGr>8=E*`>J|E;_{_>-7FS!%1Hy3>OEiQ~tA zxu7q4OWg=sF=n2dn|qmop*l$|zh&8x53OV>claEO%|x3*t(3ZYnEmhf!XC_JA=WKQ zV4^8pk35l|ynOX)6)P)iKq!xG5NP+@pufhdKWv|jZf%{V^!d>X=Aq)X zN(gai?fQCp*N`U&1kJ_G*!DW*kaAnawHG<%&o~qgEwpNIFf%jj4A+Fh7eU0(1JqCj z=urYJJw8638I)*#ytVe^sZ&9(Ph1Joz|Q((7Y2uhg32jfx^U~J9hpC)V`73%tJ_5i zhNZ5qI(+c=*VU;FGp?wteA_zC)|qY0@_2EAx$F!JAh;99jy+Vj>)$glV^;;xPvZXl z0HtiBKoHUFti=a!??*$m;ihC~&q@GV3*@#=2;?vge967 zIp^hvpmC3STyRb&$jA&@Qq&$d4aXG}bltrk?l}7zp0w2SxR#olntZMo5Ra}o^NFEor7J9M zV3L^l#vj+86&2ShotGmaYA`bu{CRqSn3x0o4H64)JjD2=gq+i?1TaYegwkH|aK7#{ za|?XPz78HdxXu#pw5asK*H`<-Ij>#d=t&S74eILEcMSC6N1%YTE$8h2&~ zg2~P$YHU z^;=RkOuv?vR*NweYJsIy0a}o_bLZs>IF7EA+R?~`9PPCjsc;jB%jA}QB|Gr{{@HW- zjED~4^YSiFPfpJr818d>STTJ8MM`1ke!e_%)}yrl8XzxYWevdD@3pdx+8=o^4?(u# zoJi?i#K6JPy1tlGyx^h=fS)?hdc1{H(<2w+eO0J!Px~0N%W^4Y6`1*3X57ZjYmAI3 z&BQvrj0VG1fsF7CVCXVCdyAZ+A~-GN-EtvN zd45SYE#Am}ujT&c4ft;Hup7C}#2)tc_Cq-AG!EQu!D5HuWfqo3TxlV5*+~)-QV|f# zTF!Xz_e5q5|ETxx-(#=0pV7n)yI{*h;MBo7`PTmko;Y{T1k%6tO5Sr`uif3R1AOT@ zX6Z6tFMP+_F3x2OXJr`ZbDJe~R04XVW1+yCmt!zxYn0l<3?lAUXURhZg zsu!kY+p}lSWhSQDWyd0_Q+4(A^_MSP_~*8;@L7=|cls2)1ukyV)Ph;}(e{j3oIiIj zN;O&RXMZc?yd~#qz`cn@sdqEe8Snw8#UTGE8<-(qI%BrfZ5u2dbtmww$(Mf)lV1O( z(M2|A9e4&s6A;?Un-Eh2!yS_rx$BiEMg?Ox)^Z%G0MS+>A4)aEQWh{YG*rqrZM@KB zSsMttAH~S8yu%5gT0k*wV>;Pn4HUry5ZmK#3+q$1#Vn(vqu5ka%_3}2ZB-SkhU3f& z<}xeyovU_hf`A~2*Hl#snz>%KzGUez!uA!;A52{#e3)`$L>TS$4-+N7b3ySJ zm3*6Ib58P@j~~AlJHm&{`pp91+?R3=Xetyqtp~-qm?^GDGnYP9UUDY^yU<)rt;~! zT7Fys;3oZrAcqlaEl3~A0tSj1aev(SZ%iyqw(ExTjW4&ITvt+&Jj2R-R zsHjlX5|NZ-FLfj4km6nN0Y47OXT7S$&r8Wu{4Q8W1Oqh1JNaYXu9KUt5pCdt%bzf) zWJ6*oaP-jlvSq>!jG740FHfAxYxVa3m%`dOav$&mn;Fl;rc z+Yot>`BkUYdHn@*W#u5o{E6$li*1??Bav?NZJJ)AV~ME>0I;G{Q;(>EP`_D|kdV+` zy($jA(T5uHP$|S2AOgld2pGucSC z2)p!tF%@9E5>jTAwRS^NoZPcM6^ulScG1vUJIB_l8U>hW6wnR7PEB@O;o}?!V9S2| z_yIWm{u&O}J>Ty}ne8wjsp&yZ+#`t(Xbi=$fY3MN++gP2LzlcT${=d;&&-> z-Wb=QXJL_r?2>2f(mv0jV1KsJ`l#}U{#;x=t#gr@U6HAUi<~{ovwFV<28i=*{`~W7 zA0(MBZBEm9?ne1Rz2|(w8rHJ0W`=@ttQqx>TL$H$`^5^2U?rB78j5UZc8D~{Z z+MvAT;X~Yp^nBe+gNdQ zAGnziE=w!$S@b|IfL|@dEqbmO+|_U~DO${uo1tw+9F7{wK${O(B`!@7u@ z$tD0q!&Mpm0U_|kx*@AkTb#UGz5^p*_KU0zQV^86WE*$dGz}I$UTT5kB+f6Jx&UD0 zUT=|8mi4%TyrSY-Dv1%ccYy~{p-e@~zn;&1ij5tw;hC1x*8YUkkdUZmX31&gcvF^r zHGj}gB~!2dq7%Suhk>^o%4+=cPY*HH0ftC+cXuy6mjf?K1YQ>c63{!kk}+R=VPU}m zcDC^CgwzOfa;Fb~tE{N5HqPpX+{wVmsHyFwd^)*#M`0#@#ryQICnG)sm#Z#+U)eF(l(>DU%NaV5{%G7_^IND@e zyrzcF>3wE8k;a2Qx`=@87>?tzIhUNFC^W&$sgX#P_nY)i>V&%1V9y z{JDJHHqH)$>=(#?l+~^jG5`XxX=AhCFzFg8Rxg-e;9G;(sexc=&*SOB(t~N=-<%!~ zdpspAD(WsHBQyW&4bvC|2YL7t9wGTVo>RZPlQKjy7SitKv23+`8wv$G=eYp$5uBT7 z4x-Y!zL+DP<=o>5W;L^a_F%Pv?gFIR1WK~ul^PdUhkjuxD%1c6f@2c;v`E3h4=E&3 zp|j0nPpRG>qWFc@yC4Rlb2r+?PE+^toyr+xo43P5&#q&0$fb-y)W*)o@@M6Bk6Kj> zT4d#^&qX?YO#W^Q1PoAHV+y&6iHY(mDvz7i#58a+>%m^}si~$5FJB`D zX91D})5eV};sFuBL8#fZYXc#6Kk?RRcBhFfoEC!n#yth?IKg4?-l-7Z^swtI*tI)C zLi#||A~&i2c#;iAn%>YF?him0NDeiuMPyRaM09WT@I*#!3*Z#%N`{=}LxEt@?Q0C4(qU}og?m9RR<3>~X-kuNZXN&~xg z!BpNjq%_gMf{Q`ugxJyX#XM?px;M3>LwUuK?^AR%4IP~*7*`!2!%jFD$R?)pr}<#v z;_9i2DBK?3u!G2EJy*>!?#VM%2UU#AnjU0iWSMX*RDO3JK75EgKo$fxAZ~K>%YZ9i zsGv{PgA_@)uoj96tdNAVCr?8{E(bB)q;NrDvbR{H?2Hp2oK*<89rMc|Fg=JVs{p%F zAdw~2{B4uJVY+NnpjmQZIK`yfuKjox zq>nOOm)!a4TX9q?}qEaz+ie7*q|oaqDv% zy0cgBrwO1e=hv@atWXFmt@%L;q&LuZA0hR=85kG{(5i9Tc_}{%OFEgO28bDP83;}# zLSX>dLx144Y}<%m0L0;i+EsR#L_2Vm8-V0d8N@drz^<)bRoDW&25gPbzn{e|pbB5e7054S-65FAqyQ zIu^jEA2)n|43r!=OT?2Sm0)C)L{JM11S*V?ZEg2?XTF_SEv3Zv@s}Eud5|Wzm{+s4 zjv!y*LCS(~FgPfR#Vc*SU~O@Lma}sE(1sJ_6A`{lfJz_KdRmBGmIWc|8uXPs$pibc z2FFMXUjD8J04G^2Zv1B7IXeMGHEUlu*vN0+ZZ7~< zfRCFd?i>%^OMGsEsmOC^AOG?b;H{gZgo#7V%SbD#s+ypDVAiyTDv-v`MIWQ0vVco% zZfP<2@$Lekz&gOjmfte|cu`-uA`EqtBG^8_PYrN2V^Y}X`b1L8-J$iKV#EHhS+BvVBR7cova(bu>b_Jej zA}^P%>%!B6HG^W=IC$QolrO&y0_QIa?mQd}Y@xK~Vi6R)*(`qF1=^90n)-KIh7nWl z@8bv^0nZN=h!or+c!5GNZJ-%sik&xn{Qa*$)I^ynmVX@gQ_2|?qef7D7m}JLNFl+V zR$7li9U5Gk7gPwtPyu5+Ip>_Y3Z=`sd37Mma9ZX55RXOj@XL}RambyZAny|tlt4nS zFfv=ow8TVw=ujghIPu;2w)9j~f)F|kt(6YMPo_p*F{tpNMWqAG;z}vb^cpg6qY<^@VQ?Y6=hw0Gx=Ur9d{2J)Y^D9!Re%98uLPgiC zjY(?)$f;OVj0$&N)ldAzT{o`h1xK$p`}-#Z5`Amlb}CY+eJ~a+KiZh}FV4KtnnQB> zv?0_Fk^2O=x|5Gm&kNcmlGXAR03WP^%%nyfmO{I0U9gtCvp=X%VgS|o0i4ifOoS!s z@rroX+}b*hl8rd&C@Y8w?G*bkPw%K48iDwO3i@?GjR@^HjKS3XH1B}_P>j46J~!ik z!6w9aA4V+{a6QN-R+!SsXOA_#+D`6(hLPN$bN~G{6j5wW>L6)!=Ac(qg8E~YyZp15pmMVimyw11lB$m?;)zS zZnY+o0MX`icl%Tp3J?hgq-UgN@EpXvK}{v-=llZ$1HQ(8 zo*6bLwywClSETU}{rknPsXgA&D`CSW*V)*@$NK(R@@*O4DI}!bMzQbTFTz)ecsFvD4^d+#1PE3j z#>(a!Z`B(tDozgF0vBw`cRbj&y@-!-dhfUQ+xDDr4iq)x#`mOW=G;AN_Ed1}dH`!5*-cQkx(n{^r+x;U z8sr>C=X!7(Nc&X@uSF8+IO|AuJTnNVTZPZrZt)x3M zUv3w`dA|q`kID(pT@fcDw>DKG!e#d01sT~Q4gI*miu##WCA2c_q~vCl6t z@}XCHMhY|#c>Mk1Y5#E%F7p10nxd8|8VolVL5`V9*3}B~3eFLknTJS719i|s$hQ|# zHK|M^Qej?`#dW~-0YRr-*%O+I(sO)#IPaL7Kbs)hb$&m=rAGk2uJGfGmxV=24$nc? z^V`z5CAYbObfo{=5}jvd6w<;xCQ6fpjC};>CI2OBaAtfH@Djt;4$-a zQ2o570}a)N3^L%0gxzw+Y^?dmNA}Z`_S(I8d8ZTPdvsfj%%By&&7M~d2kY6M#N2#I zFau$EwT;(w^!)dfJ6?Dn2kD;wL0EZTT-VeKBy-#4@Q&CLqXm)Q;4Fv;23d0%f}$^tbye}|y33X9VkF)v@H`uN)W z-V%(Ogq6P{d|LzwSR%QnnYONmA=<4cVYklB%_?$Ax$2|(r)jUJ`y&^(E%s$Np-Uo? z_}z{iBy8xsh=Ea8VtRUxuMew&xp`~{oa$+U*fAn9()AZXmx*-wca5AUoY1>++1Ov+ zr%#?}RQ&n#1gtFL4WS#}b^E%tLJtfA~_cpJwGpo zK6Hp6ypbW;Yr@OD{^{u{3VM1zNR2NLzN|Z>G7Cb{b5QXt&@V=UWFyWdru2NQNH`)Y zm4EmB`*;vj-FJd$tuz^tXJ*k&sTRRl32i@1PE+%BMMaN7ZM3=j8wx$pNi{+U5a5uP z%e-4=$GJx8s!ceZJn%x2-!C!IT#iZgek=*$&Xv)9>#!rvcH}R~+uvXVV*$Ae?JglB zSKHic9;s%5MSi#p$hDynFIPqn!l#01dy8)$dCRD+trk^RM}xfO*X^$(^*02$?$_3> z&P00!xlU%A_1^+A3EwS6joN;oGAIuzCghMw@`(y7(}v%#K9GuEam1`-QVdzM2IIuZW#+CV8hD>IXZm$%zR1*(5Qm|s=e9Hx*A$sZvl9q?t*^#nLzb>gAT zJ^{0~JnKZ@CbJ}M8HQHoh7A#K=|xXZpJg}mi%7{ToYT!=b$&BI z9v#0Si*F3$eT-5?JeT*DqbCm9pK>5ukp{zy+NRriZ4&DR zsMs0+wbc)HCiQ|Zr7haSAp*wx&I2Yt{x3|PpZ>>bN1gM8>622+ZtJjJi8-Z{&Y~I< zdUYi`ie(g9?5|{c{%cA8eFDKt4vqyCfwfycLzgd0wQFL18s`+upx+rECMm5zlSyk1H!X z?(16|6(*uf&u-(26j+d8x@tc8Bl@HR@{%>G`B9jWnC%ap_li~c{+%g7*z3l^1lK=k z!L(bR*Of6&Ynp!6!4U6x`|;YiZM|=p%jk2~^k29w$vJe`qV=7P+CmZ|k18tt<386X zPb38%Jn}XjY4NuGzA?`Ym%DrSd3t*4XRuKdg71wu6ra!mJ0|uGvw2O}@%9sM!8v(T z$696=-oyxO@9B=9;OC0UUisRv0g|&kC9rma=dF`JNy5vZS%3mD&q2&x0`wR;ej|*G z#J?c=9PeyX#;_f@4_E5#?M7-%C&G4p2SVtfMFm9u?)FwVIXdq#Qvtfx-_Psow@86L zM!6O81>OOf3XbtTfPHyAy=XZ060a?9b^b?gmlW_Tx7w8yJ1O>HlnEl-|43dwSWbT7 zeM2Lo^zGXSphu7p)^UEwg6SPKSE!|t|;YaD(*AfbPlm|Ie z{Y#N}|Ml9tnUx!xE;zlRff4^X62j-+(e`S9u3gmDt(Du{a9`Z^LOePbqHo>0=o4z4 z1um#BVEcu;^3B2t2|dyCnPMl8y+;PcNf-@tJ^#ys$!BxN78bxE0-<7r61DBUAK%*h zPu?pu?AgWL=92e;X%2J2+9Q$@3bY7mU)bKYwA{Q0b$AF@YbV8EbdpMrBvjE6O?vBA zT&!z~N9tzo?RvhCa60uGydTQiDTbdTt`Ir^fQM}O@tz98Y{@oSycrSwo5a2_3l5nH zsYl3(Y;fI(>H~HJTpYmI|0Hu(7JT8tQy*hbo(oIL%zQK9Y55hp_p;0f?m&AissSTd zgdu2hsOk!KCmZ`IdY(09fMy-A5C*06+hu1$Eo-=LI-TNh%_bbh$Zyq@JqHiGzGu_Y zY$MF|5Na^>!{II|(3Spk)C$25*X<|{N=6x<=kNg{n*8nC>2PYVYKLWS-h6WF)-6`+ zF*#77?flMFvT9KIyod$&a9halZ^qqu^3X_3#%AE-{oz^0?R{pC2!iB}fBEvFuP>8= z#q90xi$5}WY7PK*RNwGP49aky4xG6BSB_^T1BY;F#b=443085`zw%Qp-y85a8tqb(fHo*eqm8tso7p(w9fL?fyiRK^YC&Z14%+_5stf z#zp_u{=RqyYLlOB-1Kvx;*_kKD>E}QgYXXuF53xbs4JD$+kzf$sP^F*l-O=0Hs}jP zMU?*jeuOPW2wK@zH9SW{t(AN6qKr?N&3jnzBveL~i=7Lhl>oJ%!#RxHCTN?Hf;u*Z z-1AvQO`gJq3-4{p^>d)vxng{Qj-OxS!i6RY4E7Dx&P{2&^Pn=B4czMRKgZ5oGZQFAM00K`n=H#GwRa>-rWf z&YL>i)V*4y!xSbxEOJ$mGeK=sf4xtFj7*h|iRqrjEVT%v{`#k($sd^L7jABbFrk7! z1VRSV_HEKx*;@({bQ@v)9DsHSA3xqgb7W9TLK6Ex2v})1#l*5i7mGf&wLK9PB{PI= z=>x~=PC}1HYoHueiMGYOy)B;!ClPhiP)NC9R&Bzwkr66Q3W9>q(6|l26?C@W2xq$! zB=G~*aqL*Us91g-oPmszQaq{@LE#Hc4WKOJ13}aFr_IjVHaBG|0>`sy7nA8wT!FEW z!Qo+g>gNUrN6T!n=_G8dN=E4Q(ii!d--ANf0l|(7NmD^A!l815vZ1Vj6 zKZbOhlEWkFC6OY3wHGnJb2d)?QlOv~XJ>I#AG&2FG?;Dl7>qEXW04Cxn4hb1TZf z86EB%J5~sEN+g&@612`3wx-A%7$l(iI`r;a+wZ2QcTtp(csy8D1g3EeEkwK04>|_) zR_8}UHV*eaCz#y*AK<3Ok(;8WAE(P7$W#I}&TiV1#OJh115KRhAP7{7>x+a0J~@f% zD-}P!i1+t{+UjRKeY#Ub#@X5Vo>~T(z%ap4KDYp`50b>ko?sUy(Cv=O0c=(y_q4UN zkS{q((44fpP8*6~7;>VsCl^M{0qk^q8=i}YSwg5GKq(wgPj2VgZ$J>sic6SB=cgwwLR7M>~uQ49Qnn7;n$Hi}pYsDWj|^eOZ~7=C*N zC%vOiX#3r1>FKu6K24{)m<3@ujYl&0D(C`qo&@nZ<=qm*8_{-tKO#_e2JvslkADJQ z5$y9aRLxLv1*W~&ppO#S=PiN)bKlc$zSG>~_z|c&$osWyL)f8Z(TDjC^x)pKEHuFU z^MsHU@5)t^5=BItkz@mVzRhF^X~8WVTX%D8d@0Tmcl=YdxV*SH zW&QxUKR|+q4WDkXtV)ha zXhLElO21%a6j|G8p{@0rc{>)J%6$_C7@6tQ0(b8eXF0STB{nB2qBcG%Vb4rNDG-v2 zK1lEjqhexYxH3{@A;;)8_cR==@HH?oY01=YgdPUO)v2A?im@dk*T6?P-@0(2zM>)z zd{jB`0x95&>0r|I+1?{4Z$T6S-bM9M%PE-Llc;9b)>2LR^XE1kTr130YI)Q{8R*2x z)`Y!By9mc7`LuE)n%d1GBxBOkLiN7^8V4cBnmq++=;5%RL`4D=#+kOA` z4VfGV!Q$&A`7|=zyQJu^UzE@u+@gC0Y)~%1ez6-pt=0@mt%Y>!|)Ti;M?;ms)M+osVQ&4-* zp}EOOR;P>vYB!m%5(Jo`lNM|v6wBR=*}iwe)y51+Xp0}CA=Qd9LzYPb{nXY z{{6;hetx396Hf9#4g%i|47C`gxX!`aV&;JDzoSNRVU%yTd&au&y+dIWN3l>9M z8v%604J-rdazO53h@fVkrNf2_koLKIh(kBYFk~-rsCx@?5{Ox-XbUHXAqaV(U2!ox zC}@e6XN?=;KpVXIzcAYNn>8VLPbx>ed#9(Z?F`XDSRQ(Bp+T1(+G3%DCpm5j@0p-rPqsHg0RUZ5TaZ7#B4?U|t9C0P7}H7N936j&Q#P|D~UB zVujt(o@ns;Z+xcrf?&e?m6hp$I8Stcb2jPQHk8O9gDIt;^6DRb7A^k5d>ZI z&l#$m$KatgeF9gIWk^GVF96$E82E<^5t<-;o5>MQqxYWD$`b>F99pV@gD>2!fS_Ob zF{oSyC_3603oy-LpTO|PubIzS9m9P{<@(Sw$JG;F=PFpl5fc7l$J zEppC5EOV2OzWeK+Sv)llJ(%feXx_p2y|Z%>TJ9IZaHy32%(dAq;ScvssR z2lWPg#z#H`++72VIk4GG+}}iLm<~cg(CRUGsxF~7CrJZS)C+4MNqq>tVwsP=A6yuz z7t}2Ggkk|ElaOtVEG|y2c?AVm{;mr9^zNM*95I@If|%3@ZCE6vr1~IiJwhR`E*aVw zEtu{>!RR_mv94`MDYR7>LN|}7ghVr`ZU`cd^iFRXUR^Rpl4@*6hZGc_UVJU(>CN)> z2XR0r4H5?{D-vEEFO-(CAU#hDBvqa>$JK_j#{XeKiJkZXjUF#wNtK0H;r zCL}j>YW5Vgo+v0O1wx}()br=Nk-i@wc(rcN+ScfgABEgKs_vyWLHKGE1@;EiG6tp$ zLY)@d!Be?@AVitGP^UF))Xn@5x$dg43Mwb5Yy7E}{(>pQ#|MC**3nkO5c z?5gw)3W5S9IS6DHx`2M3iGpq!yt-lp+z45C_sh=0d)_G3u=H+0g;>^Y5_K?M!29SUK`gI>4A5=UXxkpfk%DI&wIjGgVe@`kZ!P`2;>0Di1tqdch(3qAxzt@%V zH9^bl*A@$*ncspdGy@PTfTv>U_=|^>Brf9}Ch5WK6E{wA^|>kteiRqUWB zPlX#OQ1S__!7$#AENHt&XQYAXoGQTY$qMN0`|}4_*kwd5z%l?cbE8^W`j#^h@JC$e z$&0a;e%;dRTMQaksAQXVBh)MvE(3_}{oqXH~K?ew5mB^KKfm_ZUKxGViW za8m-_(a@62pq2+SF4AZm9^QTaJWWKs4xkxsXlq0Lzi%n|)nORVn|iYCGxA{aFk@zo zt{Q=HU~KvQ&(JyrjMM;a_fkwy@DhC|m)M014{gfdQzvT_tINyFqvt}?Ab=Chj+;B) zvOk9QtpY?_KG;4|bM+iiS1QE&Y0G zCJ!{oq@+kDDkRImfCieL1_AGfnr)^M{EfrG7s5R=XhZ|1I@w?_T%-X;$Qg4wlVQwA zIelxfTy|#32IG8+IzzVLWgz5;G(h)g6ZC7L8wai?S%$nnf_YMMjCj%l+D6f>EZ)(m zZOkM`g_M*Ods+$_RYO9^b5$83JG5F(civcSPiw{>#DibZkwSaBW814h34E>cr zpES^wudmFpK_w1X4s}Ea7=h;2f=dj}0`1v+VLaVZG9r7|JE)bt#@EXJNuvm*jZ#xu zF2d;XFXoi==VBKv(-a@}7yH-RzI5z4YgBFh0};3G?gvrZuDh2P|JcUUe#`d2>$6Tb z`&yt+vlUGlrC%NCpouX5ikewrPl|jj3h6^nKD3oGZ2|gOE4I(*~4*$ z&p(uws`vD6lMRaYoQSo4vO2vF;JQG|m-(^K%#AKz0Pb82CIwsJJ_vddbVlgT4-kve zmdK;@P52@j8aPf;@{1psDV_9$&RW2A;9fpLyCd@KM*pxy)L%vl3uA8pXwgfMU!zu3 z&?pDEG6m)ij5gpZ0Xf(=%tVj4gph2OUpc*h{Tc@JjPC+*E1lw8oo|$aj=gA@DW$!1 z2{C{V4uOh_z{L_UeC+Z~8(onD_i#|AZPw!)gIhbPZR;-z3F*+WHDkqY+ub;a@(LKk zMXjF5h}l)2ZuWBRE_7P!krS)j0p5}1_#@nk4Ja5IzCpuAiEwKZ@=u)v|8!pWw*`NE zUEO_c9p~1mYtTnuSK&*KZen59M6EU$BSISTG^q5m8iaQmmbv_chu&oHc>Xs~SJ^_6 zt`S4~l`d~-xikzIz^+m~JPsd$*;%w%o~`I`G}B<*21wUnhfiTqBO^xn1L*D+Xp6^l z6u08kFs@V=klA4NE2Ew)87LD5f4B>=wjeD#6E~6vcd? z3|h-;TCQU1+PByFXfv%8)btJHDs8#?_U&63MtlK0t5u7VZhO!k3^x_wl;r4$Ql!mUGU&Q){uuf7RZ*^_4Vx@LCzVnG0Y6#Ea}f%zqk> zp5B6{I0#CERk=MaMb#XGpN1;AxB{+tNQl&l>$M%^mVq({%Do%fcj06-sf)_eh? z(ny5}v;Pz!dNp?DO!pe-$G>ajQh^YYJ-7_R37IZ9=eHC*3Q!)vFMuFZZZ=I1)u}6R zF;rre2fa~cJ@|I0g~x$?knz|g8FOLI2M|Ohnf&ol*4UU04gH)bbRf3-X>NgVA6nUy z0K(BK24$ZNkOY9(g;m?U+_1qMxIU-;u zbaWN*O--ZOT7Om?kpSQ}4~RXF#P(wL+iVMux58UN}Dxy%nfNVYe(k_hE9>OYQ&7s!PzhB;F@v6#DYg3aR2n)@7K#|JmCkCUo zhadsgEeQbBQLL|5%GSDH zTwp_X?SU)oDuSMnorm6%1rzYYiXAvW<#Uw3$?q_v@!M$6?=Ys$cU!9>zl?B^HYSnB z^6K(WcVvm7p}2P!1buw9YX|u;5s8FnX3sgCm4|lPj~_!ItvdB*Wee*aInoRY2(9m! zJi0AU@gTp!2`3{a_9CQ4$%1nyuFM5&wH}6)SvT({+vCWGv6A09sS*W1#;S*)i4;tL zbVqpa7E8^|tvpcfMZD~Y8L8OXzH2faokDz(b8LEgQEXaTn`^DcU)<J9HPE^-6}B z@C9qA1y%N&?MCa|M|k3???SZ|0Xl=6yIdCMfD4!hfQ3rn@Z25Y#F%b^rtIfT&5;Hh zpvBvO{*KVexfQv4_kf2wDiM6DtsPU>O%SRpJ;O#U&^Z9M8QiE?BQkPKx)b0g6olH~ z2o8@JVMa^{Iwe3l@S(Es-ouCHP}8^=k)fijj3!GVy6Jy?eqcxF5llD1wD-sDB1x3f zD3*m>5-tET63|Wk3q9}TK8wlQju-b#wgjMYx-J(+_?5E^d|~K?&7djHwPvsD6d{q= zEiN%>BTudx1yqEhoSY6kRT;4b5KOGD?ZP{vAsH@!cuz&kUOq zTkWCket%mRBBFtz+w2yw`}@u2K*FF}$}aW{K-w^>uLwlR%fqDjO_gqMYO_tf*f+o| z=GTA}aa-5F3)6Cxm$b3sXf>|J2l8IqGHfApK7lHQKM0~k*BEiI|*hfYx#WPJt=8{mbH z{HTIke<}wp^x>{T(5xdr#4#nG393T$@2>t?*Q$AkLKItoXS^&Cbn;ywq>Y4=qWHVZ zUsyoLr0a_`q&Vn~H~>vMub;q}ITF?@xY;!fq;iLA)tudpe<732_F=5p$pi82bOLTPbjsA4 z%J9gu@9d$k1<^R@VmP>Y0{=)t$%0c~-SnBed$^*NRf>#E%zeIEy#51$1H}SmK4Q#v z8%&8vr^@Qf8)2f*rUiY|%6!xRB07O%ESgrFzU)UJW1qi{`|? zRd}o7C0X%g7rnd&?h^!Infn0?x>V>^9~kN_MkAkadt&AA4vZwB-<*?{ap{8v=U1Ol zr{uwm{~yKacn;I|o8U1|D9Fym6Pe8xhg-KqoCWw%m^y5MegPCu@HHd+ zRMt@~n7GmJEz0o;;8X?*>uPw8xe2SxPgv_d#|p5 z?TNYJ+Ezhg-sng{w{zJ^MMdgwPBApVwPLaMb3^U#@KZWRjS719l8_WfjEty4#pkn? zAHLY*9lgoeiJ@l^?XlvRfvPs8wUW5FhY7GFm>9f9?anWR)Mp^Ri8glqK9O6N%Y#D8 zVM!;TRLI~axa-%GT%rUq0_FY3$-ArIN(ea(4W`j?nN=6K3j}86A6er29t5tzXu^NS zPd;Th@`Xbm8E8oit!xr+BN?V{h9)9E5Y|k4{kjaoEle&eVyK^_hJ3s?{tP;gI%{&y zv|CWQ^7XZbM)zbV*)`}lPJ~M%bwjQFK*_I$?!k=MoviB?2YM5#ybEwc$cUecNj;jm z1&EceiQm!FSCtFVI>S%!CEle9CyZ@Ad!L&rUPU#>8gDc@o_|-VzHkMGW76#>M0o67r;qN-OW#M1>Da8tp?q_8& zGLxN4e&7-4`ApJ2W%w~^qyV)>(72kNnEO|%rZD>pi20TUpib^xJ}}d31Kd)3z<%`o zM-RKZhxsy}LjZI%4<32n#js%)-H6VA=e0s6cwor?fsp`ZdzQi**Z z<|rp94!W4_k_fs|7su{ax|P{+(ru2%SmWmXR0Avwbekpt`9yowgTH03YszyF(tSX3P`^ZT+B95T zN-*+|YJ_kv99+3;f-VcsgBwF9JrAIVj@;hV87|8tp0(V90#&#n+Xu#0(QSY2*EP!yqgmi{Uczw0j3RgwxGHIKQ|U*Z|2^5%9s<`j{~(kO(7{aH+|hjm^c?Ob z>Yl=zy{n|r=EPI{G%dex>BDNud(-V1!r?C1+`UK0sAC)Te;&rT7Jk{iIq9bN{Y*Hx z^n-`Q9{SY4B8`Fv2 zkLi~EdxYwz2hf-3vJ1Gc;GVb#o9_Kx818*N&(nhSOiUEu##AULLl2IY$9rlZ27!34 z8L~ppajp4E7`M{e_zQs3i0NQ3+{7rlOcgGv!+^g55HxTFz&)U+5k`kb!|D*O#wk1e zb0i}^$IGo%^vKBkI`jSek#w!HEgpJPFXnzho*0FmRkUUT6CLOl31|q#x&S;ucMDH= z;*WV`w|iIZ)-0FS#y%qQI`I8IzP_ItC3&jh++1s3Xt3F^AWP~#hQ1dQdQa#(8T5#w z_WMg$q~Lr14dTrk##M^@dAy^ChUhnN{C?89*zsuHotO@{O%(2Gl9Ih>X}t1V0h)#l z|Ma;Hnpv$=ivJDpd9*^^4i3V#L$Zv!1g>o1YypmzCb<|d{;(_IsUoE>8Sfc+6%d6# z^r-*;2z$%0puVRI6cw-t5d}m_TIp^uNGSm&1nE$^1Zky_k_G`$DUt35k(Ta~F6r*N zb5#8O-+Mn?KKcmzoU>=oo>{YI%|xy5^*>nb%iaAmD6#l`e@OiwqoEybYc2d{cmLzh z3g<`SKM2%6Wi^1>!yQJ(-akRVHrUbK*}s(eyIi_{N~(oN${ZZ3J;^(PKgnWn)KU)0_{XhB+ z=FXZS<*wimnpBsOXTLA4jsHy*bm!nPjk8X z!!;&BQsuEKD(>G;e?dFo z-4n}VjmP)DU%=$ua^@j=!`F98qOh*JVF0)!vzg5Wlnac1#SiYjgXEz2{(W_MNLpk_ z*CEEEILA4(u=N!x#X!D(&u?dmfenfYhdRL|1}wC_KtRg@A4$bM}M{{G+e?ki4TS4~&J&X)*pxA8vX#McQ=mWw9hrt%eu7L)hF;sOf2%dU7^3sZ! zOeB=z*izeCuuV=^`5%@n1oj2`5Q5VDb*8*N6TuOnf*MFG=o~3?bMqq+mV=A2#{UAr zo`X<}xe{}FC{eM9UdhI}I7a{+OK88Za|ttl#}XgdU7!gQ9U2{-dgdH@Kk^%8e}5xO z#bjv~F(U+MMf^3=Ruu#|Hm!~ygmS;>@aIr2$U{%$@00eC&&Y5vxrMYX49F>Lo&xD4 z0{er+5(s$~w7QY_;178g<;p2xKFbFhcLRkg?nz5060-lgM@8jdz!!`pZBW1@B>W0k z22#npGIbhtajx$1!wtgO(`~8{Cn%&Sx=&>z7EFma3gktgq4X~~;~%S;>>Kdjwd+>^ z1+&yk&`1(2chR(^l#u^Rmw`C>FJ0z0v-x=l^jILWQuCvgm7Z9{Ux8C(1VrrM1jt-s zL7)f-6Ue}ae8ZkQCzEN)=*Wv#wJxcDoR}~O3$+eM$0|$iDr+1?MsNU&9?&>B;OioV zOY1Xg=Lk(~5GnvNLZf=T9nrBzI$43#)(%?9DNwik15aL+Fm;ytXA%iqR{%M~? z&CSWF9n9WPM;gy=xt(OZdv~FfLLwMRd|(vL{l73Zlz%0ViteCwB@_b@!B&KTKre%e z@*J+=Jc-ATLtBMTpx};#@t{UVCV_~m9T_}`12Hs2?g^rI1ay%6-9LLb>npSnn#3Io zu2kiZ07k?7?;)yqgwv;dexE;lC1m@tTU#Jqz+ zG@1LYwiWSE_x=K`AVeQn52Sj+o2LIZhN7lIqqX3Or5mLe-10@A<8zk+C?yc;`xc0a zB6&I}3B~?qfuaJJn+OD(Z7-lkAeu<_Ahnkd0Raa)J1xkm0{(kEJqn_+FG0wDo^Nzd z#)~PM&(<`R9zidFYj8xW1bHq91unc>GB!2_y}!p^Y5zZ+x)tf_pR{%)?}W5D#QWVC z)G`u`nk@mw4oIs;0gVxIgM|M6eypbJ;?MM4F8|iQ*{dKkU#L9ZUw}&aZ>gKuNi)MM z>?E(o&XmKyg66KzAF-+UtSsiR@bLXM<&j(i5G=3^jfwd=-c+#!B6oeDt+#x_4a{kf z^^@^3UVuQ7&-j^5>&0#xl&c4-WNT`+v9n|Z1@UZpd32XY@llrQkGC}hH9|N|KE(g{ z;q;^FAPg{9Uh5;8eKmZ1`~{$8Kr{Q7jM8OZp4e8bmjCm_AFVP;x!onrKem9neI}x| z2H6pikT*vKA<;sRqu>PK&VDH>QU{xav>O%|7kJAY1?2!hLNwXn z5F@&l9~t8y=DlffH(na*U7H~9{urUALJ5>!;w{j1A(Q+F>VvYNR+kQnr`JIE6dDZ_ z;g${7ZT#IvegUu4Rw$vf(*HQb$_Yp>>fZ--D|gW7S{A^*&1jm19=DATRUR8LC|G!Wu$#Go=qZ5?WA{nh zh!eW#6H^n?6In^Kg!bTf?GEVgMxMM!6S*>{iGSI9pXhS^NR!;7fJ=91p9$kU!V$&+ z?vlGZ$$eRh@`x7)M`I4_M5Nc{B+whS4r+9_M~sHqLf)z68Y}_((XNyV2dDVk3sf)f z-#-m1;SoCm2iQ{eZH2eC-3zmo&z22eg|7}Gc&{fWw4tDnuz|{kgS#!mrWt?A1Qrfm z2{KGVc`E$-XNvyLZGoC)0b5PZ1Fgcg+P=b!Le}F);E0#lEY1J^ORcGzJ#x4@8Fna#8g7q@b$mBNf6Y<&jWjrfo)%=E%xkH(+Dr0ytRbyB}FU9u_SAL@1to`ptdiUQW(elmG1&73u zlYL~lcrlkH+NpdnaA;`;baJOa!_0cIKj_v2Pg+O3Ox~Ai-m>aL9$n-54JJ#^jRV(h zB+2Eguif?iV?~E(A|j;&xwEFm1Vt+EHcmkkEiPa;#lW%V6dO8HD4Ta%!Q!nN(r zgNpi!>n_wIKuYaBTs7?_FOA@uN?KJZo1Z%7?=6dRZ`9TP^_UMcx^J)c=mnk2_uNEx zRt1(@OP{lqQ@K9#av06VjU|ZCz`}Wujjp+XXbm#n^TVnJge3w>l+&y$OHvH z0GUZE{SCA1)K0=;gZ9zS@?BKr;`wDP;aYU1;aQ6k9%dbeTXwgu73G*yGTkEQLe(*M zk+Sa+o^nk*3HxLD!mY2h2#C0MFWBvk;t&gji~5_c*+?4yED8dQ5fl|+78M3(bjuKf z1cQya5xPdpXrPNXw$kWu(5mN~Cp%B@B7#A^uiHX2c((Cd-lWMPTi0swwxCBFIX*Y0 z`_(^Zg6CX)V5Nj;oh3Q>e#hrMok%I_%Mq6&cKm&6PU%cDDY zEsvegF6>&iIK?{S2;zTe9&7J1a>0sgd7)ex^@f8l#7=3CwWaJ@Vf|8Vt4Ko09LA|f z5PBC?{JvUDotpLw$I@Vh=fL8yP_oPJ_<4kEG2fdm887O;y0NigC?sD><@_?Zr3oA7 z>U3LA7w?s;;~FnD=4Bmdp9bHL%xCm)e$g-Je5M+M&9l0fTE?`20VVL00;LF}=Kl8I zlg{IR=xVY!bb?Y*v6VeET4jj2ZYTr;v~U4>kHqRaSg<)s~2V(cX+dhb7K^oHY_BuhY{K zkdqTnNhMupT%aZU>b%hCzWNf)?b|-k{)CH%R|m}_7cZ@w5-{dRXb~J{&0pZ_C81&E z^iEB(`6kqV9wo58C2*xocIuP3BYo9l^dwDsdmJ%gGVB1_WHJm>ZKp5h>K zd|tG6AX*_ml~v;ndP)=iMZ>j}lw~63=0>n-K<*Jd-qmq+bA$PYSD|);Y#LKFbAQ?4 z6^WbjDSh1TbSpWN_`4fXy8-L0oaL+$)E*|xs`RhYB)IV^?@2fbEfcG}>A}v}s`Pir zh;xN4;+ZFxXZz1Fkeb?}Nk|mRsXM!dGsKazey&tH;IaO71ti#M$jDwp6+tdphB$MZ z0$%o89o6}P!rQT)E!9O6t*DNN&H)jZa%Hlo6L+4QEPHCPFh4QhP+GB5EPv>DRp}&;k zu8i`TY&gSd!}Th@GR!_PeXlQzC(jpaXPZ^HM)KdcMJX;GeHU}ceeCQnE*|MtJ29KD zE)&aJ3!|VcN{9V!as>cs*e)GEWjH3x6g;x<{2RxoFEaykk0t2w*uwPm9FGT^*u{OR zdHVy3KSTuwlGeqBHF`x5I=1(S4tE}xac!e%&!fI_xIsQ4GU7m|^vAPJ5y`2Q#SJ?o zX=bB#eX8bnbR;k9m6Vaex{OEv5$%t1WG92B5M7$gc&ImO9GptdR9oMN&fGZWQTV)8@tSPMr1D2aouW zt!wvJ5cV9N^$jg&xm|Y4pq-iFB8%0Dm-;rf1JAWeLPWTJx{sd3;V{4xpWb#U`Dz(g zuZEa@%3%#DFwuWRljhrJ551wfuqyN>v1!*_cv@(>l&_v(Lq}(swcqn>Dk-de@9nD+ z^#OvA#+qeJjdusrg`25d!6-%-3OqfgT1!YHGNZ*rRe2Qq0@bU!qBtD6@p2o7nyv&{ zx&K=>jGqMGVO#1`g07JC`=63W`$~4{P`$efYUOwD-8%zvz1M+;u!ERV*pyO^;f%~B znXrz@_es`-L|*liGtnL!+$G0ij4zn+G<<~$+T*9n*NCp)A~|NM6IC?7KoK7|K7Q|r zCF+`>Z^MZ^3d0|llpml)KuK!v`m@aJMY|^DnuYDFbkMqDMOHO6Wdw|cCzki+ICjwQ^{wTdUu<7_F5=hAt0?|3$gB|GtKw($A@OZAt}-^JS}C`?CThPM$HIBZcq^)R zo62;#qhm-CUiDwhlk!p~=2diF9+i9FoJeZ8tlg4d3xx4NHx^e^#DnZG0ot${thb2n@`03P_T> z^?S+5Q@2bR7oIz|B;S04&~c*|B8DeIu3(q2!NwoWKWxFb97 zxohWek=&Bx(W&|1^Cz>#)g(ll^Iir|BrPuPkhwW-Q@XcWjSaIwK0zym8c6wGimRoQ z$DY|3xG906W)~6rMVVC4dHkXbH5JRpIm+(>TUjEZa=XVu=3(5CLJeMRyiwk?gjMf{ zl5l^eic1K$`Z+)S8^NKqOr}uYp0oM!BcStht&XsW`h21wf0|lxFqlny#xp)ZYTgbQ zZ$AWkB_54#n$5M<$&M_te0cuwtfb0%n-^_k^Z0srAA>-d8HL(^I_%r{31 z^Ew-MOhvgelfeTWUptbe`Ex+<>ES9aJh^x2QX$SwhjZC1xUh{7Rfot5Y5^<*g8pxf zGKxjVb6?d4#SWy|QCv(7j7nXZ^UggakW3nt6j8)^pgOu|wQD>qIx<0Hp}kr|OE=1a zJ+v+Z|E_RJV=Fq|a^NDwf+ zpbjt#-&*Z8UwN#$zG09bC&T%^B!*%=S`zZ>7ff=Pi=U%b35z| z9tnxw#Pe8lQ`3E-olQu^wne7+6Hg;=M)A41CLWu`m-k(9pAXrfQ!BK1Rd{NxLQ}~) zBo9|fN!vhQIP2+Fa#44g(`e$x*5`Am9(%0T56_nNGU(3^%0BP@NGx#Q>wfV~qt>A* z(-9dtV$x%t9}hfT*aM$!Ug~>kxbS<)q?4_`SBYy##18R!EJ43G4UCK1kntXtch+-_ zI#z=On3$9l27ppk`nh(A*eSy>+_o`E&+CHErcOh8%fRZgFOlq^#6fsOd)=?Mjng&O zi+by}N$*rf1I03|og9NPyGv#+p!9YuXokf|hP}T{8KK zBPB|1BFgOc>}|de_1nE?$}IJ$Dq1D?tebO%z0mBD1Nr8^1BqH#>IbRXmoHMu(hK+( zu~+881fegn5ljc{OA~@1RJcdsEz2>g#|{GJ`?sbg=IhXKM>*YO4JzCM@IAa z!BEpF%i`NroBQ;SPt>>K_RYtlV&@GtI6S|ytE_h$5ob0XEf9mNP?3ni2*`}r( zp(s@S{O7B$O7{JgEn+OV8Zq~tf*cTcTSZxWwaaHcHp6nnK*fd zwAem-5&CSEyz#QhHPgtz)aFLj2gZ(3nxAAFPF7cpM0HrA%gRx4icwBMh@g!@pOBT{ z_q=ggS*CfWTlB~odG3WpJOOz+<)0WDha|(dP@=`MRKRg>Sx~;fpyQ*D*J1Y#ZG%+u zB$ZNUK@LSLMdB_u=ev_0FYaMcMo zv!SB0Q++y$j%=ym7^wyUbeKPRLr_oEU0TE_vBsbxHF!N< z=V&Onr&?f9_J&l}-&+2~a@`Lcw&z!`Pk?hvz>_<@6lXC5>#G2bd!!hCe?uPzZP}Xp z@4O2Msdh!X{3R_-ck@>VwA~~i@0VWIUp3!n6ZCQsRlhCyn%=A?sWsZL_TsgS)+(tR zAM&ti2Mhem*DA`GfBM9|xL_nEB(0kjhxnPNqW??Qnyej!^wv##HA`v*Z^UzTY#4l^d!p zjM4N;ZL$*cu`{2ICw*eUYrZZltS03<6R61= zWtitICx-*LI`H4U$E&@NA`)RblYER))ej|Sa;7Z@wd z8Xg+T484L-8Y%+JS6p2DOUFJrnu=apOm?hmUYmu|a*~YO^Nfl~EUBi-_=F+>&LN4n zF3Rp3HFDwG`B|T%{hK}~ZA^q!8y&U~;sl-i15#UG?Cl99L3mIV9i{B{8G(D+QkxV;D!JqMX;wrCYEr%Pe%BDbAz;^#x`P*tj z=apnQtr&Ax^@gsP8g66kY?t;ArTcoHVu(RjKrq(|wvnwf-cEFLA_2$)KBR~U8n7wJ z>F9i+1lJB@l;A@KJLNHd6A@qb9<14HcC}NKqwZWs@~3 zoncy7spx{QE(ycbWQ1T6y%`6!8b;9?=V17eaO?aPHPOqCpK}_VR&L*+M{o0I&8}4E z&(z@eb#5VaD1C_&bm!kK8#8I6^e)mG8aHWP^vtUOJk|Z&DY>f=s$gx6Av7?vk_JlQ znXq3k8tohpCrTgVnbp)Yy-rP*imwW=(`=6zh5J6wy=*m4t8LDIw6ljI>ya{7euf z9&&p7=aIRH9&PgRSjIfx zXEQ6k>!TjJ;a$Wa9mLbAZPHcx>DH)SZ9FJaM>1FO_{rj9;nhGyJofiQxvqE1@teq$@{H66q*hRWa=5lI#syJF@&H(q>fG zRc?8^c2q=u>b)mbzw2cC)gJ}UwKJS}>Xkq5@9=iLntU%2*~0q(Vlb0a>!?|H>${U< zu=3cr;1O^C?8GFlx=uc3wI=9096~oKsE>pOQHW`0|MHVhxd8#_fYFM@>OtVQsrqT$ z%;cUb8rtR?cYPcPdM}XvD5wyf9*I-73X0lcT$>#1ueUR94@1m;%J0-ba5BGs5jRkp zXd&?Qb>CjOdZoRn&hVyZ6(qHdpc#BuO^pc9%kPFgaX;kQ)MRK#`S-|gG^%Fp3U_`o zqIi159#*#A7+QLuWi%$aW*|GpZ{c2h)RI<4>z87pG1a?V{@WgJbe(+tlPXMbFevm);rMi2(%;?x3bv|H#$sEK$_w^&@!y_eq)*x*t`aa&?XE&m@$;Tx_TE6*mMVqADR=H7D#Zj2)H&@l#`Uv0}C{)GA*;6?6jNVGG_+@NX zqV7w~-FwfucOII97scPe+?@}ZmB;TPwPZ8>$?fQ5WIB`AZ8|^`kiEDjmO|e`LHn6* zl4&q-bosh&iALSz+v?;XpR!^F^T*+9KQ7=3aTxyic`vhzfwA|u^}jgXx`etLo>f(D zfx!`lKAxCjaNbn?drv^Rc@L)SdB1(D4T0tE!9m}1r<8Imvy(SHZ0a?WmRCsx+_PDH zvtk-<4YA)0q(hAp4V$X4z}8cAn-z=ZmhWqQYoy;n|8-{Ov#)oI^nZ&H3kiB{`e8%a z=s^>nEON=DZ3A-vMsH$5K@bUkzJ2>fn96{lv&O3BuF{et=R%k;Z)#L~e@kY*?h&MC ze_1xspTA4cI9HuYsJ*!>{$rMmP_O?ne$YL8H>gi({98;EI$n1QMA*jvqvM*J+(>S;vd`C{Nb&-e91aR z(#rR-bfSeqHq9@~2BKHCkEf9y(VnSzYbrc-m=ve|$42CJE?p|f1zdD>E5&R07)#Np z1??NK88w0MgNla60*e8U{P$vd9%yO%eQXIdEA_@EQn{ig#XoIG1G0jqW&-MUIki}w zKWZKi5gBTZ7YFITcOPv&46@bAk1QXq@NO9WccU2!s|?5?w~TI%c44J|oJKWaYHV=u zcNk*7wy~iDD6G4e(T9x3m5Y;Nd%LyCw5^TbCme9n7tq+g8Sq}mBqcvlwrxJf`#E4l zt8#LylrY-PF4Et*V`n3iPPFfrGF+eY38#tV*VyWy`V1%^@?OrSLGbM%!c?cleio$F z(U|ilk_p_o^R>=Uaw#n5=zMS4_Ei}#T%&JfgI=?pOk;Krqsv4(j_sAdJAUD3j$2RH z$W8H&;$pm<5V0kyUSa8n8~~ow(*q!$Z&^wwF(?qVzCM{}yoMSSgx{yU1XH7ObQ`bI z<6Q>2E3`N4b114BCdV;x?&3s;x7Hi4)zg@xZnq2Di=}_2)kmXpkW!9!ynSFw9d=-| zu7Z!dap1YpE|PgAD0UWtI^kAcOx!;ummotO0+Q{elk*-Pe9x~r;Ignx8l~CT+w;PG z{hb*DXxw7f*4F+#;e(CS=2_eRH;kXIlW>lTr=8I$qFofT=Nh`_?4}pG$O7Kv!P;Pb z%Fn_O@FvYGxGkwivn&M)_-2JnSbz8$G^6T=R4UW9dUcP-VNAO3zf_y-_GgfhKQpio+nWzrx!}&n{ zr7vEq`6@`l7)1E#^enW#|M5eCl6nM{TjIG;;J0r{9P)(;3GQf@@!rM7wE<5uduWM6 zUdfOqqPhy-N?qq<#=kqI52Y|yNG6`~+>oP{!%yzjNWDGySi_xv(n^qfhB2XMT$A4+Sbw;JzK0bwyRHA(91$=m4mh^}ZBSG75o}h z^4DY%lN-f(#HA*wuU+4+)Eg)jL+{gY!D5h@B)`E7=*D!OzoLjl<6$>zmHkXDXKzZ}A zgEZVjN2>xx>Ml2`my%Q$2(~{EqUz%+sgphsx43{tC!giM_o#S9;D^i4hC&nRG9KFC zdWRNmWj=p*$ix2yM~4hFvaJ*hCO9wWH$Of}HDHeV>Qa&Z2gZ%zw+jYsU-PwE-qOck zy^uPg_Ki`eEbrH2nkAfBSqxSLc&Sxkwu5VujPcK#EYjpELQOxs(K%rhoHNw6SP^Z$ z!5_VBv4)_y-b(*6@k)9+BVsT44;0KmN=d<;*&bAo`Kf>M3}yaLCF3^ z^uAe&Q%u0aipV3v^A!i4U(n^OH<@&;woP_-BMIzn@#%$h<==<8rGLWqNF}X}wU6<@ zk5=hR!6e0U2$_)!`n_u3f2$^Bnd3X`47&x&qHep}W)0l`TAlczdyDf$t)&ds@~yZyfprj_TZFW6IvX}xblHR9J5C42vX>*7U;FWc`S*aS{gaA>G7 zpyf9}fcU#X3SOfs*T&ug8#BKtqnUzdJY}faTQdsE?-8H43K9?Fb$=FL->nG`+1M`+ z9pvZds`E3;y_j~xP6!a)28h(m{=KH39e)_*QblY|PBpvPF|(Z9xzzF`ErPUXrQt9v z&Ii(E#NpB7N#^&34T6OiIByD$GHFQNeMF`tMoJc6M5iiA{U})-B_i;}y@7-CtD)T4 zJ#Eysy+VzcoNLXrbm>ortNrLQ`nU;jf<`}{p3hcqErLZM=2%x(Qg5Fq@%a9mh6&3+ z)!2mrlHR_)I-s7UoW8H1*vgn3AN8PHc`=_?w$RH++Dr{Q?C!4dqnls<`I?KM|Fi$^Wn2F(n;iE4rfhvJ zpMmxQm}dJw@vsv3p=Vg812uzO`r5D5)JC}d!z=3h$GD>1>KCpK-SHDl|9<;;kv5!} z^v-s3y!Jt%V^?d<(7DMd+iBJQc%9$2yYY{Sv-gZD(W}3F7OPobXmPt3P>S^J_rR*1 z07)PUe*PE)`6M7vUgAjHX67d;aLfy3AZ4UtZjVS^SpIDK&Wq`AWY+wx|L!)9wb_{L zTwRFr!oGajnuuDXgGx_J&0E1G6}u%O?dtDkdd>9DW?xXj##p2?g);CFR> z`q-fb$Uj*?euZ!~ira#`s;a6-UBM9BKo8AmG*V){x1}3%M@xCPa94~|JALg=eK0Zm z0zs|F8@wMb{2t(x_UK6K#DyBaOEx?9C1Nj#wUIqGn1qD;_P=wZ^mExVIM_7tGr50n zx;hzu**;UA&Fa`a7#@00QD?d(Xw{ zfhBdpVF$DuUcTbLYoQTtC zwt$O*u=kXu2oNsJh4PW?tx;1}aEDw)z!Z53~vH@A<{hCw*{`>ghz zt|%TBLNjZ%K2`dc!$>K zk1FqR>nUaEZONZ6eo?#mYv|p+*d_A_-t3+ha@*eODbH_R6z7V*?&SU)(8(%{xJ7O` z^6%K7=*~Kdtbay9>01fg=8BJ3_ikpI z|MFT%!Pho_IlOYwv?Dh7jalw~y3UkAct^6v19GgXaSMkmR1(US^a!3C7u(cs*5#ea zw2&@k98vf5Zb177r$2g}7cJnhT&I7z`U5k5G(+0c1z?~f_-~914OA(q7#TB=ca@8Y zuKs}^N50Un_0&Crneq;fY9=4;{O*-hb;-}P$*z+7oRqI*vHF|G_CL6atXPr}suL>4 zxnMf^epqWMM-I*9(?b*VG^32)|3B8;V>5!7=;$T{1ODhTtP zBHG?&TJ*!6k{gj3@1^e@eL|>=!LAR{x4s6REE_k!Q6x$sRj63QwPEL2TcL|)J{c^0 zaiixO@Ca~vKR^vXXNNeaqH#O^+XpHG z!!@41O!`Z|lrF5Nk3`Lqljn#1z0iZ6HZWsg5jnb-9jbE~VEVdTf#01zLmv6!&bjh!|%k6wCV{^dCgXV2zIJx5j}8qcok>n8J>g7V(r zpLpmQ*dmB(`GI=JDk zjA8obnfZgRtk0iafe&jn-+i00d{e%}c10aoR$O+Z?W>Ex0~M_^<(~tz%pScp=_K?n z_QrLvQeku8I)jHBA$R=iYWo+u4gxoUbR=wuc3S7l7k94W z++K1)(HPzD#~rCN#Qa+D6wAvE`f1;Vhl@gI7Ia9+=Jx~->Wn63?rCqQMq=PUh~Me> zY+_ZB;Xr|3uT~TEY#`0S0Q*w~-oEF-eRVX!>_pv@1NDbam>+K9e9LM&uHC|awupGf zZo+>b*{8FU;(v&GmFPW+O_OV@#|JA`K<_@4OA>^)IXQCuU;J?=Pr7g-vjE$Fx?uOZDpYo1er%Zv-o3l;tQ=oOIid^ znG+r6p=8mWZL)jSAKZL1&VT$>P2X zT33JRWf&3Rak{r^V?UQK!JQXhHi62ZDk!2Wd}5e3tq%CDQTfo= zTJTep2Q@5Dy>wZ=*x~fDq>>fTp_FnZ1#g9fCB1ZfZg4iF);(31qL!3#>Mlkz(MSxM z&J2zcO=lAwEE)T6$!{3d)u8$IK&wM$f^hhu)Mme7>h#(xXw$w9q;U#za@W+1l1sS* zY9?D#-y>|yg;GzpuIY)0YzFX(Dmgew#Vg)xXq(h=-k;!BRU}GM$?_@ie8bZ&y0%n66CzAY)h$FXuJ0mA|HcP}|*h-nhOnTjtqlZuw;M%=?|nvy3PB%1$g8ZYFWWIz!1~a|Cme z6cOhoi?BV0M!$*g2Vq{M?!_W*c;9jIy-$AwIl6`%r`C-Br8|#`NN5f3(=Fl`UHep0 zN%K562beKHE~6eU-shbO7eN6Znc~tTFUjag5*XbgiJxV%C5AdggyB0ZJ3JovVD@H} zEaovjuf-FVL@&Zin1*KBvn1`+%E zuc$V4R$eX$vb+|+t9fl=B=6pxgtrB8RG1NT7kWBx2s!fuRk=q|lF>Nj{C3Vh#OF-6 zkN33QA~vhot*QH+1+7MxwpV*f54ejdd?m@9)@+5UGh;UP$~&zjIX1KA7Vb=fGS~%?S91)8#iBCd1kbk@ z_UHmDn|zk`pHTrVxb}m90`2Vq*%0a-`O4!bJ&JTJEI~*q1x%S4&|M&Br>2Li6#?}# z8n3c`TULm=|7jHmL#OxqO%nEHg?xYD_b)9DUn6c#b|EHBBf$=DU|&BzLgo3iI+5Q| z#y%$?v;1?R+{4?u;l=4W;%U3#d2js_O+(8;I69{2RZ?5A{2tKuVWLyYpk!wc1%@vU z5fNNOQ{SJcDAv7Kikoc6h-Keon))h3eU>J}Elc#t5D?HDx9zXC#(QjD%-80OTJ5({ zIBYc{3UrO4P-R`&w`0e+I7^AR<<=*s&bif!My5>q>EPAx3!9&xG$k`1ophiX41rST z9tam+z`)Q&5M~dLCz8a)S=VEJ=uTTxXv#I@Qqo=w0LP=`^B^vx>g8zKA)qowB8-_@Dg0czvk` zM0F6js+3sY=8X17Qn2gSpD?_f`d)C5n(w&Rx}a>%nkGRgZT108n(4TfGtna~?L%>h z{oeJw3t5^McWxOhNTnUvtc4BNRIqTZ9`~Fmm>BH&dN&N5LTyw0kN+>RPnqE*%u81R z$V)#r3f5MDsXxe@LX&Pw;l^y1&Jy8$^WQ&^)3!#vi&q`y|W0PPQh3+h-$J zGj&#Se>QN^c7m34i+%FO35uB-YX_h26`Y_-slR2x2D`esW*U|`>X_EEpihl^wX@>s zysoY;g6{E?U<0~NMopdcbiTnOH76~!?Rbk@Y-gEfu<~6hI{U4$60UDFtFWqm#9AXA z--|i(j&FT_41->Ii8~X~5lv-hIG@VczlknYn@M~EhCF%-n@;ghtUV46wdT{)6N89~ zAUCWM6-{y7C(^Uxt4N2Ny^U zW8En|!Xj7+cDY_(6GegKi{R0Dd7(y`<>BEB*N+?HLh?*GG|CU~6hnNC2E#k2TBon9 zEpV~O5Ho03r#-XIzW;i_JyGk{K<;vK^C-k_9c9dRIMU?oR%*gij{lj@yAKsHI`!%_ zl-zF$Obj$xRa))+p|1=f`D$t1>Ql4AgwR}Ifn)0Xv_?`U@2^s(Mx(PE2Ubtg&Q^*3n1^ym}KpIB>P` z&k0W*Gyl|#^8>@d`<7OfRk}GHRf2&Zzs(qwEB-bJE?R|p5^ z()z5piGaxE4xNx7=k)+$^^ncEp1N7`+KHl-jh_Ze<-_lc;DG- z=$vY2qjIlV%9*bzMt5@Vjr<5(km312!|5;z;ltvH{7D_awJAamyO=)~B zR43O17dRO#9Xh~6R%YOcm7#&!v>=U4%twcvWi{xlMvfCdNny1AiL-0$=!x)`bXOzK z9dhmdNlng^nQaHoB>B8~=9VM+NODKI`i@<8C0f({V7de=L-y9bs9?iPT zlhA7$b0G>m&iv7l->znq7+pG!6+K+uF6||*-m0}A+HHk~A-q+KkrVjDn-}1!4Wqb4PibMYSDw04i5aDgHCK57a|;ajnKmwi;7~giBAmBs86y+fA&H& z>tyczaVg4OxrG3eXsRXZES_6LrtkQiL&O!_t50$-U%|o2JPonrrGG9z%*hd?;COjU zq*ek27!Op``s8v!=a{2SIC^~n<5tP_gb|Za%&UTC#%yJJ4n>Vtjna|0TF=0~2^|}@ z!*!b)*;i|}vjRZOz`neVTfyap;(Jj{mt;DZG@+Yxgkbo+i|n(zUo;+JX5NK2KPAaD zs3^(>$w2Q-XII3y^^6whK}Xj3b8T(vD@5I2>kc3-V5-=4g^r1?1PNRilHk+xRp|Wm zCO;#-Mx`}&sSPnzb8@g&p%mHYm4wmJ5uL29Sb=WY#(G0eIU;j{5xX~w%*xE!_*P~P z=SY2JTNq&N){Fc84}R0&rsx5zI&@7>NMfYqa}XG-8qLI6vj}<7KK2x-Ety-}TYVZZ zFb)r9eAfKxh|eA{Ty8ev5qffMX1y`K(1JYIwg25EPr*&1W9bqwHj+ZGbTi6M=6r&X zTHy~ybQO{_7HN!W397q_M-4`))Iz|tS{Hm|@bIP&)S08b|* zMZMChJOaTyLOFr1BVEuN+1>qx@tCK)Y$`U*aXD@(A}YH+s(3djQQ6hSML!p)??cV8 zHM$WK*^v!D_S`?332B+7`8ah^Z_Hm;Fv@ZO8}B={@x9AHx(|L|;tjkEi&DufWmj+i z@;8ycEP$c9ZR3DqJX{i%S+V~XMmMqmr@2lL2YZzP(42v?yv(>!b+BQqb4R zIdrxjvn&}{%OtM3rZ|!Ho+peoFsp{)u>HGK?NQ*`^OXg|-%I2F3vVBIj^EgrM=fe9Jew;_hc0oLSECNr_&^@YsX zR+G`gc}X6JirJyUliCic^A-ATR0`L0xTURV zQl#r)54vm%_zE5z`wbDE{A^~^aDzLX>ojaQJd>RJZqsi%A(1f&@^$JEk1tB=CpvYcQM|`5@9KTzAzAu{=+B9}D6p-Vobe_OBlZiSd2o)61f0 zKe|nOi=pI$P37CG=g+GT${ao9ny%2#sdSF?PLmQ`oNG%AY%wrYJE-gq^!8SJ_3udn zCW6m_MUb&sT_=%6<5cBdvna$R7Qhp#^c*o=;aVCljR>Pt0>#4bFt+r<#f#99YKdhT zel6!{slI=E@?H$dWfm>degDd!eW&ABx}tW@Xt$YG$U3J#)r<;{CwE<7U>q-hqxEyM z|9qaeU;3Nt5VY;xKfMB7)?URxS2fMn)LCegICJG{%w4HwVc^LT`PTL|5g%8;@B+d8 zUlfr0AU%#SUjdq+TUkg7eaj^^W)5CzC`@F@*S9?w{dDfReG9u3UK8oEMzG2cpFSJ; zdYwlEW1kM=Z||1IG#699cRAiTH;?Ag_YDa)96$dpcA091VxYE@LP@!Ns_%=R=7KMu%mDdWkqAuH_Eomr$sxp0u8>t z-D%fjH?OHJRdSBtEm*&{)&b?hsA&0{HV!UN_4TPQ z<1^Mwt}%m40Hq7$1e_zP-7|JMuqZI$b8ph2=4^gY39N<8zEvTow6vBNp)QuFxaeL# zw%g5*=mUB^j4h-C}k(|&&OwLzhs+6fBvHJzp&r#{oRP@fN`4j`|H&rKe zI>Rh6H)X~+#~v-~6FL>X6Yy^i71~DK*(xa@R3sxO$0z^mg)Q${7C&rgR%>kR_6QA2 zM4<+rD{|d?uU14#>6;bqMnKGokcL6hjDW-N8pyYAAj{WRDA#bUFx^V-_N%;(uJ+S& zn~OhSfNPA2n8~E}mLDlw3iH^ikF59T)@@bQr!Q6)7yOKecIc5n51;kl+P4`5?i&u0 zv$6_)Ut)wN=_I*DD6a-mk&%snt($;uM01=_9JCIK`Vrh~3ZEVQb%nSwpL2; zPL?Ley+U`|5-DfGQSV=s*8J5v?+RWCEw&^!=$IdVBcBN2%C3kPDpyx)5Xm-aTfYR` zeE&3+Xm#)$r>3!`fY098G=zFrQ}eR64mAKBJ<9YhOZ4ql(iRph2o*dl>m7^>=zxwa zvqDOB`CxnRl@Z4nf!k=WqY5kpH_P;RZ?*89OzZHnK5W<}yCErh+s@R@@0r;Z7uKNb z<<$fm6JG2WPmDf6UAf!z(#L5(saW1H=*fa3OBScBSkDM=<0~ z048dvn(QEXzxN?ma_(!yIVUkDK`jrQbBSC8vPW528PFSQR`{wnG(0>9_6WpAiYDID zeRZaZ7_60xsW0FO%ytsH#2DxyI!^MPZ)>7cmE^doK23^lru#>Aq%5A!na*b<4_zFi zBGVqwlXSjwh&%8$KVe1@d}TAFkzqCb|Iqf*I?*7icPyA-hns?TkcdeN}p7or^#{+kK?ko1S z_rB(MoNcv`I-QTYycIOVo)$AqqkrUDPii@=XSNyjHQzBFSoxDOL|8oFzX_wiCaVbI zqN&c9PCF4<&ei~N{IKwyEz{H1yE{y1oSb&#b#&fR)6}ek$oFeq-F<9rv>1unpOQH( z8a8%oW>Z%Z$yXreIh8Z5qq~M|U&2JM*Zd#ScmCr_46kx#_}UiVL3rI{2DLG$sIU!s6VkQW_tp0 zPd|;={rtzqEJNirF6s1Gk)1myPKVcEXUSV?xG-<#-t?aiGdTEbIir@mAG;)Bbh5vc zPc+j=s>>nOyZzdB@*A|9c@-i%K2&bZY^8|VctqdQ&vME-B$e!;B5fO6PT8S+MK6_+ zH_Ih?1v|10b(y=yLoZa7{YkxW!G+rJtrq3==GfT6oNtA^$Bua-8yRO!XNA%J5e>AP zyTL1@n)T(+T3272p)d-zXMTQ|V!Zi{LUZjNw^H~Agv^^s(uLlLpy1pA&!Pt_G<<&b zpXB>f5u@=B9}ATLg0cFZkmusq_me|WzGln0bG!38G&8<_`*sk8z=scqJiWX|I&v3{ z!oCz2m|gnX@`UeF+!Ne)QvyOl3$Qy^k*Sf(xWbb|oQBBwi z9q#d2wrexeUgQ;)9@l+*=1h3-lpf1pd-k4iI~!kpKK|9^;mj1HS4-efUt;_IT~_P^oy`NA~=tD+*0X6XK-BB_MJz zg#^P|J>;C_3+Ta2d00| z5yqycA63rW4|{jpooaaghX346Em6PXOYF|W5~Ze<%R`Q8_!I|!-$ztP|&5zm$$L|QF3r_z(eOnhfxgl@`Ie zgP+EeF18l*9pNdJe{bUI)Y7HcVRT+8x64?v*-huF)qcOkCik=fwO)Rj?=-IG3RxXQ z^3S#9seg0OwzPIW!O3kl6xlYFwM^<|Lbr=E4q^rxE`7&*ee57U0AqN9v&WJ zC_S-;3m=a{h;X@479l67?M{@DP~^^Kl&oIVGWx6L-r<8q3Wbl7_iub_<6r+O!a-9S zsi(A4EP}QC#`Wb{=6W}eb9X47Xw~h{OnrG*dMVXx!SE|aWtC=wDH(sI3zJ$0 z!d@vw&u#SiY`9f&IEO+6LMJU<7~ck4rkYT(+0UzNaOQ!IR={?qUoalw>_JO1+g`8d zxHyWPtW`C^f+W705HWKuV*Jw8Mf%*fo(0@|8XR}RWk_4Qn|X548aKK(k@hr1Vn z_P{A#5sxz04CpTy=f2dG`a6>0LDi`s^1rhHf5~6Vr3H+Y)iN zvc6c5Yn!O-ufUYX*u*z$XKvcbQSGQ`uXD|>&=G8le|T>woIBj+DUeas=Mnm&fM#pL zC0D&SKXAdD-nDnHTWL?^l|UYX=5{g%iMr?Tz(GXF-_Fb`QO@#iXe4Ru*as)gHmM6)~HcuiT_- z90$!d07mHR)rE=`F3)4@CEP{c?#opo&S;%8j~X>|Fo*xUFE2s8bMdHp;f1#GjE#-4 zeDthwd)Wv~-%>Fshh?#tY3~-al}|v+*qqy;h+mf9{+CT@9Glk6cwUR>tG?ysO|EQz zehg&BOvlX*Y0|n??&m8lX^e3$iuvhL&L{d%K)L%e*J!mfz1l6g8%ItsU3ZVxjhTAT z^6F;&tgPqO@nfu<+X)^Py{nPL8-co_)fGRrt;1qBc|e@dLYoj!2N! zX4_%3PXY6Xdh?4Kfkt0jBL){qt$3S!?A0oWN zy|h@Gw5&v!M1k{9=(r&!8tcpt0qqckO97$)KW%jaa-XSNc85JKDTo!4TK@J>dNnHx z0UprO*RQ7ejjX9}JfiZsYkUx4jGWvQO>?4IZU?KBLlEFA+{sPLW*rdiQrpHM-Mla> z^e8T>Z*_U1miw3WXC@abpUuOze@eHAq>Gh3(^E5fMi4V9~Y^?;d{` zm6+%af!>d#J0jR532SZh_&WKGlc*bYTaqgHFE;c^v9~xSN+wrQsPk35jm&bcN-l1?#F|lC8 z_wGM`u2@+KlVMYu!+et(^!(Q}H5Kc_C7JoO$)IJEevF7=^dEfLRKeE{4Y>l~)0r9P za5ff(pPoTM9zRpCFta*>MbLytkjAy9Mjr8@HQ!zYjWk(VdAUwj8$w=f#Kuygy6Ca( z>8--6%2s0%zSVs6jO6P^+5WLSf&ZL>r%BBH&Zevp+ZgYccE4zok!$6NDh$3D)K;7J zB{kF9(avX?L(|Zjd2su%%2w>0KYZZ86Nu)?rR&dzOV|p){~XCP%iz?OGia(XxPFgl z>TJ`FIl^_HLVP%sutZy_9VfoCR>R~k!@855`4DNd3-F}Q8p1z7WJsw`G<04%*lGWuS3yWGuz|<*4QAN(lIkrudR9XLrp;d z&j8wMKw-qwTZH2WiNFO7Go__}5ByM)9(c~ES=7?P7gkr3830{3}QHKP(n>DD0v9rg7uo=Jq)j(>>E_?G(SnA41o`s_QTRBz1fS%|k1VL)0IQlHfQH4b>8~$_ zoZsuUHL%*UVs)dlkChG@=fBP`kEPcK3EbNTP6eSxz^5}k7-Af5Me#|8iFgm_C)P&` zh?uCZE|30|%tSm4yD(ut863Y{M-pRMmEKrgz*l06*74V;d)t8*-GdfQ0?wPNi-;2) zc~d3Z|20t@ZAzdll3~;+!>5+r|N4qk)+Fi9;v})_7mH`c+H>d=n!W*gkBf~JO%S^9 zVdY;;^;LRwwhb@03ZvQRPXnylG0da_-4)S~#;Jg@E>Rb86=pG)$d^@q_Hx0#((KBrxw7QvL%fVA_ZRaPl9x^yO zZsKsK^w%*d*|hbc)^1X%`_JP2M?1?}9N(GL#!@a>|Bzywdz6BS(4oVJReeHDi zvzAs@yw|TTlhPezsE1HzDcabGqJCl6yLV%P%D0Pt_ikcIYtuMxx))5PLMKV&oPKDG zxiM?o$tJ#Lpq15Di^|KHe_tOwKJMc#2z4E*+^!@+Ed0G+6Lc0|3pEv0Bw{NnzV9%S z+I#YL85*L5x=obB%}Fc-4M|PQah@X?i@zhfyS1U`dTHOZ z3D6KCI%OGi;IXB*%0rAsq$uOgHD1R#bsmYB%%p)3Ypo%@FRCMWL}G>6RNCHw0AuAalRMXGa6b$-~-f0lSB<%~l ziciz92-BbnLBV|K>Qy@QUk-Q#$iqHxHSD&^_3-xA!)j=d)N_pAfU@enPo~HN0|Yic(t*PZ|DtDfFm$3YRUYQ?iBN380WuvR}w}5t=LSro+@} zbgthJ-Om9bq4`ACM{s*lHP4S%v22NpvwY#AX)Gk?H~)N=DfB-GVkN&jOZ7(h!IR^F zumgKd^ze?arc7X=Mg{%u*Cx%QHg}-m!Ve_Ald!+AEb9E2)oDCWPCmYdYPRr@x{ncT zLLY~R*@f$!Jl88P^XoJu%h5@4OJBaK-X?Dh> zS$#p{SHCan|6^HN#f5-Ds&0#1yIb(mrD6M0hA44bu|8cUMUmh? zzA9;l*+SZi`uIhSELAd{sre~I=ndU`sJX1v+dWRZNfy=a_^sCGS@#&JlyGm&?l3~j z1ua=x)*W$9qQHd@d>?iyl4_@x)l(f;tq3V3W~nGD6SLYt_x{hoZTUP*7Go5X2K7eI zUB0|Pel3D79rE>r6Jv&*EI=hXuT4ThN|oq(V}ytDa3cX=5ppF&kW5LC2LjEr;>KA?}ETaEF<%KQwKo)Ef|> zV4$7X;r2pZyN=Yg`)|MA55M08MjR4)3~(ooQjTnL#a zl^LE+Z}E!xu22zk(PT{Yd97~u4AX}@$4uAe_Rgy;UY*#S@FiG|xxw98eM7pPzA^Eg z+F?*cCI8F(yi9IqH8-q@;TE_4W^FpUh9E8^)^!7j63aJe@dhVE6k0tUKwf&FWbA@Qg$`BI_v{x4J~yi3@Jn< zDIN|ANJ!_&{fyJQyryl-D;T$#ccA#dV&kC`$?wz-;ga(Iy(B6!$o!d^k6%)_19A1P zq6>Qchs!P~L7&3N^u*5<_sO(oZ*aTSY09t!CZ zgGJK<@=S+L?T8*4_uIwPw&J~D9VjMu$Cjyn!Z49SRD$AR^zJ32y}NQMk5E06Sx@TU z!BjJMc9D){w?^%8=#t}@|0O^erIjT%NNdyiyh!Ij`Z;)KOgKg!mw{kQ%dJnz>z=s z)}!$X@x-<>yM=^?riccAdAYVWNlvxRKmn7f(Iytp^4`gTouqKU$4FmtmoLMtn#lzg z^0*-Mrv#;PWi{>QBWs|=iugp3C_y67R`Ujw;%SJiq2fBzo~e8 z^x&~$KlpdBB- zj+W$IS*K#NVe?!3;$uRZ8rAyQdMZY1o^>(PIDROb)CH5ZI%J&}5fNH{qq6twLT!Jq z6Z#))a0`~Oonda`50=NyjGbQP=Rhh#v7?9192mim2@&*Bh)F($q~OA35fDrXGg$!v z0dOxR`qnpm0SnAFoTq*D`t>;!QDCB=;Un@Esi&ZDo;439^cOCYCbNT4&Ck?3Z}|U7 zDGWyi#RqZ9=9X8c{fJ>b4CxP{XC zAeS8y*Un+Z#OvzM*D{JW3UIc~n}F4Z1Jyn7w&vti}0FwIW=UKEaOS z`{--AyykI=rw&i!ozF66V<+o0DphQ6D#d>0pwreo1jy6X6@gy%RX~8uy?ckVI_-{I zcUY>a+O)$(A7~0znwRtXF5>ReP(q_G}oKwce>@A$7l?SVSk)<;3|Z#21-F75p>wCXJ~i=(&>-zglM;H(I84VB+1Pt z5EK$W5TV)@-7T!5Lzo5hE~zGAXO$)%Kzthx@XD3?;t|oXx-Q2PMuqEEpmKoA3vnc@Te+1^eVmoavcboP=k14q6OPu``g+Pp&3rp#pWj)=N+u?$ zfv4ZhD##d#OXn?9onF-I7OpuJ_b7T&VUez}r&qdDB;n0Z+MPY-pP01h8Ek0Gw$tt| zSl$uUzGv)#L9c)K0gHk;uFfwHB*=wp*`!%MefpFQFkl&F{DY?WJ(C5iJTQ$wGYj~! z6!jzVhtSzsppHiaf$!1~VN1pG{)};J=3^EDr!eiSAgXMlqa|K3J_SMS9D(2aR=jv^ zth&itUudIYmk7do6vBEJwF}j@mF3#!gvftu0jI2YBo-EuwOjm9$iu6XSRG5he&far zF6fixpBq@J8zf@P4o&xa{349)X3(2MAzS4qy+Ku^q9j?48Eg$g-p-=-9CKS>$~o9Q zn1f#_27L15osCHrE{q6}lE_UE8qz^C5;pEspEozdeO!={A)i{Ct)E@$^AjXMH~HWa zEQGN|tB`v0L>`_?yWO9pn@KmYzL^`i+jMTdPuuv$kNRTxZndI-Y8rQEw~HMe8ztFb z$&2FZO#-)sd(S>JawsvY;B<_}P?U8i0r;X_xq#1x;LqTrAw0XX)p-bQ8*rRD^yuWk zMzwLH(494WeyC!U-9uqiuza8BX+J;wfdK^*8JbxkKx{qHAx4{GS4fvGI7R%Up@V59 zC@c&{+(MS*t*u(xOgw9AQ4gEvoc0Ol?F>oU$*q()YZc;r8>t7a`e9UBuyP@zV_>*| zVJ*5fC`G%7fky>I z?1Jou!>$^j6yKv2SOD#n<9rZ00o1D-wd|y6Kj%~|y!(`%PPi!X17~|VvVss z(LHo#Nm^GDA8kGU9cX}m84CaY{IC=o9KdB6!p5Cm#23}ofH!Xt!HVPCkoUIk7Z7-j ztaK63Z`kvsmESEMFbkV5D72n#c5xwtt?xH%gQ>tf zd*SOFZQNCG;!dVF{9^qvLz=Pnd&opVbZ`JG3%?ixh=x%&!TanU+O-EIkAApqv_ZkF zb6v%TAE_W8Fu11uxKO5H>p3*$w~)8c=sjp!2WTOJSGozzAp*Rxn|CTuJOee@GJ-N5 z-xZgLdU>~qN;T0iMML4XF1wvh^&{-tG3r~qQ~yzisi@vqd##goO3-l!frt^@M-y0tG<9U4M+obqbli{e${}+^UzHP zNkk3>6b?)D=g$l-+1a8*dHnnH@()H@`ekG0uB5wtPqQxZ^7ZHnH|E~a74KMK$c#+= zF`;Ib_+pOMf4ZKhv`g<`J>QW_9p}oFyr=|om%bbIR)^2$WOlJR{4!z^o*i*DT(*aX zQW1f*smcSHw#P&i{`}Y={0z?{ki{sQ+yzVnyc=p-T5hiC;P} z+khHprlamx68O-&%O+aJJ7ITQ0kI4aY zIZVMkys!J~WqFr;nIn?M&QD$3Vw~ScANlA~v&Vg^{v>yi-ociss}^H-O8npMGMt}! zs7Bvq;iq%1@bs-FgCp=1`IbTr)MEha!E^xsV6-8C9XhxfDq7kh!0Go89Wb5SoYkfP z7VMR`C&;@CRws1vO5pdq7Pq|{Z?pkr6*`KG0G)|y4R*QOsMZLuZfSXW8iSmTUYb6V z`h)r&6YZ=vTPzQYpZJ5~AKZaSvs*Mw}BgVoaFCJbd_2brE$NF>`|7z&XV3_yVJ>Hp1SG zR&tIGtMR0Pks{;~)fUL(PnrzYkvBR&*1G#K(=stTD zb#-;2dYgf70TOc!=Lu3a5L904h05>WzYn+P2;y%R+V=d!S8RTlO-ZtsV@65JyjTeb@TrW*pGj^*VB3SLYYAb7UoAaJj%_SYR}ezzl7_@&>^1r9Vql=;Hw~Z<5+6hv_+UF`51Ic03rfB2q%{tDld!-|`p7-sp;I3`e?UfU|m zmv_3MrjsWrCuZ_`xaXgQ?qya!0+*}pSf6ChABrc^*Jnm{2}X;F_F%+WL^;RzG_HnOuAUtn zyX+h|mo2py2JZV>7}alfj0R;mYj)KBD(Ii;|I)0>5awQAQqZW1OH@j`$!g<$WS>Yu z$gtb{>3K=#r?+9fim*>WCt_mEewD7Z_e+hkAX@kE^Q$ym*ho~*No8PNd!k^jxnHdI z2{k?2ZMVk01#YSb4Rh9uUsw2D^XmO}U7tAEz*l9a*4sWWRUFslzh~=gih#duqm0B) z{bL>btdfhHKq^!Z`WKA*$>$gI8htj{g=dr-dC1_HdXTq8F=c~PWVkSGbBuc_KIQsQ(AZp#r2p zeQ>ab=YDJ69dCb7)~L>qh}(+G8|$98PTO-GatIN z?EwjS&osE0q9uvLG%RC#>#^QFif*7w^KHU$R&g{{PN5k1>slQ#; zGv>U>|J%E=)BWN~yFy!4vrrz$?yt80SLYk(F;Q&Y+6LtE5{ztVO5q1dV`@tKJ+=4} zmWF{7D~o7Ol-5XOSY$?n`3)J8u^TW6OfBENI^I+P{7Z9VTZq)^>ymE~ynHqz4F#X~ z*-jd3YB%6Xi-dmA>yz6Q79Qa`^zER0Tj$~Z)*RA8{L|C>w{K7fPT5Qz{Y+&%@mw`5 zLe^0{{I7bin@>jk35#mV_zmS^}jewK!HM>&dwXH5p> zV4^#&SXB73Asr?-0l80qIAEm4ZJo0N8qPGa$-KB|<@hOINk zY!@%oN&Wj>HWiKYz~o9O?&0>MN5#m%DB>W5N6d{J{v&fbWjRHd%t2~KKszup;|bgj z*bWUHF`0YRJVtwd&#S`Y;~}(5_owYu8JWg@B#4C>*=>5HU6qnKpDpKp? zVXRf3^~`2)&^&-$A`PKQ(!rca-L!m9Oou+&*15^92q0f$Lbv`G46Lx~fsz$1~bbaV%mz5+CMmW*?sjfR+*vh(Av^4Rp_UJ<4gPsEbKn@prM<0o+1^! zRbgJri|f4qJ}rIaTdk(wzwh-APh$;Cpz=EW0wfCpFy~A4$Z9*yiIEz?kOe&hxr^ou zU>bmNsMR*&1m`Yn#ILR2X`O4{&U)GUE`@mJ&Y0DHZt7SLQo>_NBUN3bEF;N z)n=)NA?{i|I>K2(X$H@HmaGD2W$RCS?C?mMp08?62N{7KB_d!*CILMv;6V3a?LZJ8 zfi?iefGvAJLCePMHh!Dm>fDHH`Q08=@Jr>>1h|JT`U0K=aK+-5mOxsmB^R_=e6}-s zz8unuz=sGDDMq{ZLDC?2>Y&~eN)vuvihR3uyA2y#TQT5!=-1pqHUtDmfFEgT(uEt* zN)f(o<6L}vFX!eg0E|AzdT8h+J^*l#n5p7~gBdx1D-3l}@b0u6{Yg&jDx_iJbHRex zDd0?N2VI1N7r`3=PgC;=hJ6T1GP<|6sD`23%e%&0b3&UQa`5dsc zAN)!^033m`8{H-G12F=B{qEgm#K;7x@!hPf@+KxGsvXYjf3yg+4^RUbq17NB5ugOl z80KX7daBSL63N$jTC{cC3Z>zB1A}AWV0D89QMfFz(QHntJ3W~S)C4z;ETE67#A3MN z3Gy7)e#!%(gM6JJYZA0RNvV2?SsKEy01RR@-gCwgjwcVs{Hr|ZHx~H20w-n(?d?89 z1upnbILB{g-d!hy!Zx>D`uhJJxu5@Y5wk+Dy$Ir?*NjSK>&hVKnD|TAO1>Y7fB8sd zwJX_#uowPr?wc{`O@A-yRJFfIxBC&39z#S)<~p0hWu+<0ryflm4i8!u3!reF8W~he>>t2xl>)-fdl< zz=T2&lL7clggXLqB;GB2Iqsue8T*~Foeb0q;<>JlXAzvx5E0}1An*`82!^@Y`j!9x zaj$M(StZb~@C8y7f!I}#Im9opBUH5|{&y%_L^F+5t|!rn`~?_>0MQA>w+rlo2*?%9 zH?flqMoI0am23_mmC1)oEPw7(Bo-KAhKIpucE?1QBYR(OuhV?{(7?d@Ab+I~OQnxU zXVv4ZjtW#7#N!3&>ktpmPBM@ZoUmF2NB&G8j|Nm$C0bH~>K8VG3UUc9=!K1gG3hV7 zYy42XT(Vs@Et0?9PllL{DQ&&E6W6C7BdA$|3928P_%j_Qv^U@5RJc(lfG`e9l5y72 zjAsd-Uxb-+^tqp;jGem~%du&_Lg7#}$r8tBe9pvSUI%k_YCf{k{qOm9M2t^rCslQO zDW*mzaA^Fd(SNwu7Rz!qYmo6!)7^idtp*&C?jfk6%dh_M6 zs~NKDS6i<$aGlsqb?ef(1;>!}wGArf-BbsT&@~LL+f57T4?1KCQU*)R*33H2$aOlc zK2FYGxUF<%P97&WIy%~oh#_apyK}KUgp5_x#05Mp1kY!;lLi6O63jt;%-zEm%m-^8 z{^$Z!`CIQejOW6^)V`Z>JMDJT7D~#^bSx}aFgCc8X?Osy3B4V`F(t!G-l7;P3SO;) z3&QmiO7?C-UW2to_c671z(!~qHgN_r(6c%pNr901?%lhpuU~b*)EJ((+sPs2P}h=% zQlg_}L*CNT(gE!v!IyHufPf!PmBfO}`uT7W!pT5_e2{S)`V9|XU#ZE^=XBEETOyy+ zoufMNEc?v-d@ut;GPd6c@~Cl&YwVY_kEYvW%c!ZTtRb1d^Qh-J02jcJfk#6AcQx)h zh(0I><&oyZ!;%wXjylOQ1bWr@@J-s;LtXsqS^Xyp{ zH?HOFxq5V)j4C(Zn8x#Os^1FT&#jmkCwoOy6eh3l=*T-{P8vF=%z(r!5&VrY-tzGS zA6eHQBV!hA=8nIH$H!}HMY?K;EexD~66cW1AO_S&3S8^sAa%p~R7yX>Gb;6N`8K>8 zi)Qnb{DpmZCS_%(0xiu%SrP6x)`q0*KiS;*ly>K{RA-tlC2#)RZes2Zdl}FGGLL#q zgYf@GqZfzT74EP+~1%reS0z!reaYjP*wOn-Z39c^6@ z_@78Z6H}J$7)-~kO8p2njD!K|9aws2)t)vYMZM3G@ox@LU!rtdX7x-@oDN{d8Mp0ST+ zpHN?Ag@E3~wa=Z8Coje{$<>{{^-|f5vHL26=eKlzhsAw7$81(_?}Ba3Ja6y3)-C zz6sYW;h__o(23Cqw$L`tE+M&=A;U;^c`w8M3HYR+egT1dTDO~0f*6c}11O~^=+5J= z`UeKW%%zC*gnN|$+D1%jh8)=H=YL0Fxa3DzXA`nKz<&M#0Rz-uwjcQ7_TR@b?9!p> zdwKmpu9dV;?G5kCA30Bq?fr7G%k}FU8=1h?2L=YcxDMnRe8uFa^oxDiCEq=Jwgp&Y zu8mvoQ2b2y&I0v$kx!nXvC5_j4t$&(4%<>1G6$$w6?T5=l5$%y*{bQHp?u_+q^D`u zpI@Ja@+2t=E}i1P*B~#IH>fFZr_yrblDaV9tHj$Go_Lr$T#L_KyS5uMOt1lo%I?*> zci&)zZeo7MnB;yWqrgo z?ANi>nNYX}RzJUy0K&ynz=O6GDd>#==a2!UMzR0*uoqlZpph8cq;)7zkz*QBL9kS@ zfZd|5g&&6e9W4d~#(y~&@h_bwt} zbYx_A)LcM#_(a?v%cB~jraUd;&&YDRc(`+#rNbVX2qpGie%Ch;dgbwBish}wSFg=T zdETvzq~$y|s(6r7Uz_il`fGmPiWO{L0$v1mIMFL9Meau$hgcUGpdnPb`OJ$ef90>{ zq#X&7e0U|5HLlF(os+U%MeQp66I0H%P<`W6|NNdKziWIbL`4a-S(_^W+`@= z-fohv>%sg>niEQSEF59=1M0u`3|(6ifRA#!B;w-Yat=0|{!kF%N7snP#yT+qa`8v@p_+};E2o|L`%d-_T@?5?H;9&xXDVNtA>aD^QCw068C|z@sHf)^SyHcG zY?Ipk$9gi#M|gNDaQ~RE?Ay1GY#6*HoS~&98*f=oc$uqRr;}TA5M?n~GHRlC$QWX> z%5tM}&YqZPz2H;$zScJLHcKmuUzGF>pOkHeo*_{sGrh25>;}al>^K)Sqp=zaOO#40kN<;0gb0DMuw#(bsV4_-Uzj#Uu^)!m~Qx zYYRV+#^YiYn_g{Ay>aJjrwPM(DpLSZ(Uh0fq*(r-J$)s%-_9*`@`mz?^O?!#1D(VQ6 zk1VDZZfLK>#aBF&ubgy}`g!a0P11sy=bz6^rwo!=q+QCyt~A<(@dP$rZM-z$_TFZo z>F%|-)MMMqbL^+Yv%~YKC?`92wmO#DKB(H9$jY}hZ1CIa{!2u1WAp#-!fu)7>^pj_ zQ~qr6?S1=vH_7u1bA-zIT`_IzHsgA=Q-SrtWkO@Z`!0dEKtT zL~GprfR=l*GcGFI`jj7yf{pj039A@M zIobPrjDA{7M)Mt=f4FIdb~aSS{?n5@pWvmvP4to18i%cv1J(~~x{JJSRO;!tbX0zcDr?kMmXuiJ@FUrhEt`*vF_|b20~ql}7GCzh~}U zZa3@)wwkc8un=P%6krIUWKbbuPFij#4D|SWxgbN-$%~GARy8$B)U@=&ersPXw@e>2 zOKohUcSx>F87QmJT?!DUi@ny^ZKiCwCTI~nnQHA)l3S?U<2_pW^byCwoW%=2kImc4 z?!lFmHJ$tK>)@{JiTxP+fF2+ILj!U0Sa{5HiQDacLe80zz3;1Hj6W{I6hRMzt>YN1 ziihn4^n@3P5;?YM_e*-Uz2zg_Rnmde_f`UbwpuQ+?BVJV@Yqmcm;HFdoKG_BTj;)R z!fd72J1@PwEaO@&*Ocbh$l6xoyXBXF(sN+}UuhqkAgzU^@;!^zD#Y264*xeJmnq3Q zI&u*~Sv_xSRxWiI&x>$Ljzi+3Jl+*m$>EwTo>5nC-h6>#8nD1AAt7=)dU{v5K^M7@ z56`dggny(PukKoKcG#%TIuh4%_h;;KIWL7yfid;%_abC_ZZ$29H>j+}J>MVTX&2)f z{eI--EP2GCV{5Ai_C!yeJ;653r~AJ0Qup$)*XFBifgid$Nxv=RRvxE;(*uO%S>uB_ zRy8%XS&~2-ba@4DNxoGsGr>--3>s z4`M?yaPf(z71WR=+qU21R?&)Kmm#Jk3S}r1ANy)KEf35tpydB@iR|{@^{9GNyAkER z0S=X`sc-+SN97lX?WuO;@A9c=C>JzL`10kQw>J&AR3HP8;aVU(U;@ONGw}A#7aO;8 zp?72|^c`{x#44|}$Eq5*?cAx`?^>@t*1Sfp)cWeOVu8QR8J__5V<+c#9#;q^6$g-Z zjhx@pS$J0F<}aZ&%YIIE7Jl_j^}U~LN8@5HDJaZui9CHAxAgh*XGO|La-xOVv}qGG z{e)k0X;r|wQLTZ)! z_Y+e@HxRR_feeGJv93Z6mv`^>VMrQ`(gK~>SZYUsZi8qqY^n!N=oK`8J5Rh7cVI_;>35s<-hE?&9K!_7(JPH`P~DM zzmXXm8-p_Q1i|*;!A${|W$f(iS`@FpFn@5ON>Ac??W!jgymDWy;@A&<5;oUa_mD_RFwWNDoyrzs`Djo7Sjf0O-7g@ow)s( zDqpi6LP|nsvxFxyJu`DqNGJg6bu23)c;A?aE^P4w>fkQ>z6Xmi41RQ>Q@NRC#0JGU z!76htE`rDeVEF+cZh3hUKzv7_%R(uT@F90d88!!g$;A zY8R4&NMP-2Y~%8$j;s|Y-~4UX3%fU%qo!WrN-&B z4aEv3$U9G4PXJc(MQ5X;to#H_7~q(C=&HaDYW(%93I#qh`Yz0|G1U6*AKV@eUAG%Z z-*EXbCMZ{sA>YK!<1c)OjS(D)7rULJ^if99Y z001)Qi4)$iDhzktuqCLxNLkRcd9#T4!jKXIw_42lYax%r5G*n+ZI=tUJveoevyY}* z3O544fVz6)h8vD6z-zMZR~Oj{zy!r=RO5ps;>1D3M{vQ89zFW_+iPgZ(h++>?QesG zKaI?0Qx_}f!j~^%BH?Hy87uIgxzh8SxV`DlMb@+?h zNq>5o!*5)>1}yLZC#NgE7BY}ge35c6-vMrJIZg9iR2tcEb)Hln$T9w*^DoY#*q07} z^j10+=wh!kIB3W(Q;Ka75~I2i7n{-CC%VQHAf%OM`PpmS_2(Slp_xISD=&6b$TC7VMo(oQdVa#YUc3;f{XDwAi%Pj*l~*5 z*+Atf;gUjdvr+VshOb@wzkq8Eg{U0Dhhh{$+#({n{PvxvGPRZaFGr;%Hv6MG8tVVL zLzFcj@;g1VnCtNHt)*qFyx{Kx*R{0jA@l=b5%ZnRbWkvY8%v<(IP7>g+ZyFOOFZU; z95xmxF8|435<12u&*Z&aa5#2ecE*}fIXL`^NP@q1fakmD+v=1e%I%;1YIVcLywwB` z9t>9CbiUH8c5Ao8-awSk=(xiIsM~KPJv#sB)VuBRybcAgmAV{eceqmQFj&lKzV*X1 z3W(mb*=B=+s~An_L(>mL6RsvD2)*C%MIAYQob%|>uk>Wn{!vFDgh~SvBJ9`l^XFAn z)uzETz%F>FL>PN}$LMTA6ZWJ@)Za;JHoyNOOJo8s5>&!#x%JkW90$q*&i_}dc}5-C zrrKnGhetf07HfCEdz4&k zDEtE-%JKM}8FWJ`X(ajrn{k;>SN5DbCRDonAk(WE2DeBmvqA~E+DwTrtx8+NSrSax z+RLcbC?Cx%~wlD_x*z_fj+gHg{2zL9|J=s8VW=J z;>3W>vw8nN=#o;Vk%B_4*cFpdudjLrDuV}vh4(`52*e)1+g+$=iCJAxkj;%d_*0n4 zR=9IGRiU=A?o4~_?@tgA0bhZgL^cKZ)toRW0#-^;STQ0i7CtPB->9IV03Otwon!90 z6KJtON9?6&!obBWT{N>p`T&QN62MzoGs0etH@f)nl-=^9+n9 zBoCj00v`llT0$>bXTO7tu)H8Bt59!GFDyi-r9rh5I%{Nf-6VS2C|& zoub&Wqmpd6d0af|{r$$IHp|%S1_h*oM12qDEAAd+KKB=f5 z8e#)yo(+4uJ>?wRsJtfdY4qyVU2UZyrbkJ%DwA8xGKW-FM|hrw89jaSZe?3LrHILO z^JmtTVpT61U;orS#-Sc^?v3$vHC~xD8jyxRVR%SfHVwsQI2>R$u$zIQ48n+un06tv zR}PrLTzl+mr$(waIsSr;y?sOmwa!I|9+^Z<_TzXT0tx!uU9-We{pD!@tK!UMfw3)Wy*DrX*=mU;R1S$@%=h=RLB}glP zrkzV^Nk{SreDqavvHd(gS^!(&3RHIt&SW`=e!>6s(?F;pG))qA6QG>e$11;<=u-+7 zcn)mcPMA{tlO8o#d(w}j2n!RJA3~9TiIGVHQzr(cWY8Z&EZ$!1LcW|{*Z`!SD~F3|sdW~i~2?bDRP+VY-ex;<1a9jp6wIE}iOG9~s&@$UcC zvG(V#<>RBQHeQ=MZ{?kCmb*_YDKeKAaM4}*Otz9u?t1mQ)v|j0qmEBG>L^lDVii>W zjE+i>x;pY~1qmiFG3M>+D)iD{=xh4tI;|25eE0LQsa$aHjc?@Lq1Wb zQJ=Yj9w%StH%cRIl%KG>g8h3BP5NsH$uaF2CDMe zQm@WhOUf>Am$Ta473m-|mig<;%As#h=Eru{>d!Jz%KX(S|HoP+g*L};zwTAFlnZM$ zCpGheP_t&)CR&EwH#Pbj%DFO{zWo3XFZG%2L`U)BN^_TzZEJBd0}Pp(ak=W zOQ*^%xz*Wv=CRE?kjAVnHah?#ig#$I?ChT%_BRY3{O~D+*1Ei-a(&Q#T11^$D~Iv# z&R^!gZ1UP9m+eh}@%#Ox9r2F}$cyv@=-Gt4Qd3#3ChX7Uw{ zCJel$xJz&1dk!Y{1*8po)wzUXDIx^3>!#d{x7p<$3%k!!h@kSsl_fsi1+f9>%Kn;`CslDaAa?PK|sT;svd3 z`R_=rJpi(BCpY5h(~LK7oLr(r`It&Z=+j-QsP+GUP0|o&L3ZDUSO#t2uGTwpj z3?4uD3g;67vdVj7ipNvUY{t{a*5+77*~B;HYxgUN=%#R7skPs;CBE7uAu@jMNHeGL z^yHG+cm31Kt6K!?CEQO_uI+o@{Hv*&0E^0$UgZ7wr{|Cks=oLB!(129b|g>iCnG24 z9Ax(uYy+QQIO}1<&NUfZ-YhV(~Cbj+KS#jGm|Q)e_njrd-?o?{_U~rbNd|CQqHMw z6_#48Jv_45;^nUO+UX9_^no#;b84StCGd+pP?Om*t7~fqz`j9^^HHTQ_WH5-_;{Xd z?;k`wCS>bi!QVvi#%!8!wtBiE8aO&?0G1o<6J8CaK|@1~n843A!B~TW0k+Mo#G21QNJl?paK20qNFeF*mO06NU$kfvov3T0H)))HEi z(a}+kpPDbK{|9`23WVaCjhQ63l2K7l?75_cgU3dBqTWwbw1{L@4>mP46oDK-RJ42d zZo=wlVp%Taz(8LtTHBvxU;nn~R5X;J#g(Q0T=t_*Rb+V3mkw7o-yO~hCwcZ<-@_KU zvc%QNe64uMTz$z-=99_r=Jw{vy%wYCu7}U9{gjyhsY3L0n;OCbLPD+d}Q!e0SkFf?Ap4=(S8fv-VGM{as&AtR_e!|kDl9p*miO% zrDxCBmpX+=Yua$ucO^cKMrYcdcU`6Htt328zBM6EWA=QvtpHs=p%LBso5lV=V z+6uyp274?ei~FGK`U|}+X3xLpvhLUJ0~P(^rAt(B)hN^EOEvA21?ifwq=Y06^bW{} zit#sEu^NSNDJ>{qf8SVz0U&Z=&8^w7rUXVpPyp!VeRg&bhHZAAoc&nENLJ#sxcJc74TVtk&8a^`u`?klb+*!BZ-@D6+I+9 zGxJw)o5`zcYKZ+Qd8-q-z%_d;#9^c(q7tKc53nb+3lZj0NkadkG$V)N?H|;jGv{*=zwC=R!R+59r7t1H?U0xiD;g_4chBPSn4=H?Y!P z`umq+{NkzEwN|?;OmQfu9?fqlX}ycI3q>;_vV>}rnEvBk%AkIQNRN1GfYpfiNgz4^ z!ott26HEnS`5{3AG9r{ya2E{%7Xttz`VwFSFtW^-wXw0ub-ws4`+L>tokYT+GG)K9 zZCmpzt<<*_6(`QfWEmAkqHcNmwBoO88XMioQAZFH?xJVFNB|uU8Jx%{!NpKl4;>0T zcfiSjwW-`8eTI5rg0~2z<*o;j`inVRR9Ab{qD$h;RKiGXTe-77jr=w%*!UR2H_5c| zXz9HCXuvbsq><6XRMV~jZ{G^Qyw3{$v{>VWrY$-uY7<&8Xg0nQ^IR{l_vm*Z2@v={ zsCy5vp8L3MxKL&Zr8Fd~L@Di!l-bf!n$pmsrJ+TLl2Al@NV_O$YG@JdrINHa(oktU z=NH#K-uu4a=f0ogeUJBeuj6oyLzn;feaGi>e$Igknw-~whzP1IcG8HjRRF9;0KCfp zeocn7l@IpYPpAon8%>#{{SvlHq?nQ6$N?uXR>=zQW9Acn7rVlcw?RT$H#MF zPAPZd#2Szs$pmg4i}0PiuenJbIb)C)f|?I&K6#jA!AxqGloVUQyCW#(Ku6mF+!%-t zfrc|IT6iJVkUTW@7hSS2`=6pqfbGBgd(1D7ESaSUFUoq~*A0U1{E(7IG!A*Tf^z6= zFq2QLa$uKncMa)5b4ZJeg6wk)V#x;jkH8X(i1kwlzX*U$GK}H1*Gchw%(s)QGBWHbOllNz*n_=yR zvBd<+iqZFfa$S4J_Y~DGwVZhA7eS--Kb^0f7yA`JAMG}mG2hmCAgW?TqwaUZ`*e)$ zgzbOq*!M>60^F>)JzyeY0|EiujJm|KQd94ic&`_5otYgu>CY_WgQaxEUGK#?xx4{m z6O(9-q~nT;)CMrHL>|)%k0p0Pf^MGWIn;;h9fu^SQR9KJgoZ-_u99_@l`phka7%`- z@%(pLz?Dog?)d}Gnc7w0rZ|D`@dR27a4RX$?>c7pH}4YFNYkq#B??~nwx8cFcnv~( zh181=XD_}k1IW}i785f&XrBv&fy_W)J%FTT(X3ClC%x??B5)z8s;ZU%PsbC*9!HKq zpR+rdL(nE=wogNHNnGVHl;X#1p1c$~R49HgGKt%B)#?cMR4_@oOllPmRDT4!7OcAI zPn{U7!8{tQB{}}H;!T9A%YWo+hTWfdy9ZGaXaq(8}eMKfXm6iLIq?nKEjLKqq z=p)}SzCR=A8Y&|kMF2;g4kUttL&_A4=%Ec)<8CsJ2%-bBbq_xnSUQpTsPfv8kyUz9F89>*;wlOuHy{s|P zR?F;19oNu0^onhPZN##WZK_T@LtBL&fEL`X+~NjWw)}4(G1a3J7iLBBwo?HW(qeD z%g_0(4V^~*Qsk1YmY62vHpr8KMGbto_H|TT+yXrFe8{AUS{d3Rgw~Pq0q-!=VVx5{ zHvE|tqb}S1tu$%0%}r{{mWlYpHaW9F8XbXN9-Zo?qiT&|K|(X5fb}aYyKz}Ddi-kd zB$xi|>@4h@{Ky~y^cupi#woNjd$+qS^BVvA6N`IYGv^II|5~8eiwOHr_<4T4kOK9G z4I&Tdr&i7-{fe$oaF}Ks6`iE_=S=QM zlIX{*ixH+(Dc`Q@<7k9F*lTuTz$jDuYX1V4o^v0=@W@Dw#7}`=7CaaifQx>YRjm8T zQ3VAWb#-;@zdE$4__9FGmi#7qSm*f8<-Z4@-KNQ@U$3lL^Eop6{95ZQny^$B5Z)if zzm?!;-k})w(7m-^^&f1%Wa6X_3RVDgL|Y4HefLT?WPk0w-#!*S{P;%w9kxJYw&>CG z{ITl7%xqR33IbbBt)tDm`?DM3h7hc+?e)x^UdfkmNb*XH?{m_$RHS{5G}c9rCs z2_4qi`rqYvT{&RK_&;TMHBg!67Z5o=AXGwsn*?g6u-f-GN{4%B;{O<>+e^@71K@z+ z3Rl#*8Hg$Y?d~0Bp-O>8(&J^A_?g-jfua*?Cw(cGT2OFhBbzq2s$-Hle-l|Z^_4uOA3Qnbh$8nb<1vCxo1t^0yDtNoCcmiR1{MMA=b#a)f( znGgRQ)Vh9CQ9eajr&+AIrNs{PDWDkOQc^(bid!FR%u1MMII%_dGvh7x_n#Y&&ECJb zN9uV<`sk?>zLi{Q4&C+DLiW9hm#orrXlakZBkf5zg@yx&!*YT#qoWoA3+fwg^h zpV>4I+1&FfwT?M+X-t+?w{Paq#N1T(t4y1d8l!n%q~C1aQvbEbwoq3(XjnVTlC=fZ zw9>tACzLfc2NJ&Jp|K@Cn;`c!Sns)+gnF}zA>(d{b?$APwgNED753LZZa13vU!#ZP z0)`n6Rh$TdlUiZDUoNG2(+25>7~VWSwK~Xw_umdC!bg)Zwxvcf3o-3ed(UicFpp)8 zsbV`lWLu*ya#dADCqMN`q!zg}7e3u=cI(H8Z=t5XtDuTg(z5R@!?l^Y-x%bI%`GBd zCDLoQa!3oiS3Mf3O!%(JpS|;zjGJ}zMTK3-?vDhr&WtRiDae>D4y@U|)@GO1i*r=O zNT$!$@W2x%)UbIjCuH`Z#50C&3RBuE!YcHKZ?XR~ZILkq45a#tjf%=*56=RfoQ(nB zky_6F_3%kxeqGRR9QGcy1wrh@?N^0V-8>N+je-y83r^v>A^RXJ>BIA9tb=?L1E>PG zF5ax)(W&w%=;Q@09tEq)WNv$b;6e7rl`^vHJ`eNF3rf!Zl-kwP^iEr2S6!Mt?Pd)!EVmCed(u9|JX`(Box}v(n*jX%a>*tfLb?Z({OeDlxmJUg~ z-BnE4{bBEegG&W7gDziEXy51?9Z9neX|l`^QE5@Wc{l>e%nJHGIKOnomQ1sMICRclvnM8#dbHVttrb2nTXF|9yB=Q_qe{v&fm@B7^JNYJtY`=7JB&HplJ4EIpoOB%UbdmsLN+o`sA zY4uFs`D-Fv`15%c0ahXIH~EKeuNl3a7R4wXtb9s)|2or8?=`M$nsSiK zPEv|Ycw4F+RKnj$ZTm*ISFY`?qOuD83hT@@DV4{bI_guem@?I#p*ZI?*B}zP|7v;B zd$-l32j$=u`wLL(#D-S8`1o|jMjG(=`BhLd6nE^VW0L|w{6)N4ko3nSt~H282k1#; zD@H~;C|eJ^Tl8N=Zimp(c2y_os%kcdD%KsW|@nV zgX1h1!*%yH^`w7@diCnjA8Hc}eke%UhQ9W}y?X}_9xO6`;!S4?dS$N957pxSonj+atO*fdOsi zwk5i~H~02F=HNUf2ZAfR17C-m(XI7&`VM`E*C; z&kFzsIIWer9BJjEfV?>*M>h*Hr=65u33L-}qCfZs5)v!~9s3v9dyRV1DHRT0p`eE2 z#)i_dIjw zd?UG86k2~~TiCw)nH9kDbvNF&V!qEOzxaq7xee*{bxw=;oemR8@j(V0iUGM-6LhUQm33gl_#L zgt}kZ?H%(=d22hAqQLtDg|0w>g82(j5X2=VP!OAYqShsbR&3Lm7(c!@dCuoW zC@4>vn3w=c#sI+zBt3#DX|W47HZ|$i&hy5mW;@;xWt_D-M95a%Yt%FyZ_SfAj3xN# zQ7tPQ@>+Hrk*_K;TUFTG6ZG&Q1lDigzKwkKYO^Q6Bx0pRP4lL`{nvw`^X9(|i9puj|&eOw9zE{5H zc!QFKZtnxSDXG>%(7+8*Ama(uV!739T~iwx7K%g{5C8z95tOa5r6nLDq6Szd!4`>< z73&p45(4C1U%iQf_^m8W$Siq9a}C;y6KBE`Cww>sG?SXrp^g-uP@}(r;h>Vi^ncCP z8?sNo^3(cX*?KYquG$u6W(QU1Pwj5d8d)(@!oKK(Jvo4a-B0i z?9n5PSE5a6coyec3ez!tf%cE=L?ECPM8Hn?j<5+5WTx?0`qfE1TF4k5Tv;gO7r`~A zuoq=7+B9=HM~4;ev@#1rO5u^iIp`byP!8Sx9p#WPbfKyON0qoF{tWD&fqIR6WN?xp zY(pa1FA#I=tNC00*el|b%Y!rv=x?jbAjfBqB{$Gy+jv^)e_W+TaNRceyK99E06{13 z3;t1*05f3_Vh0`$A0Wm^u^q01wNVhZ3gl0>S>U5gU zO~6INte_~3k=wtSFnRn>sP*e#ATk*OtTL?G(U1I?kGs4M$JL4Rm`q(UzX7jKCOEU8 zz}cwi6Kf6iRfZe0*}A+GFS(@--K3jydBXoBwBljZb!j#6`6%_#Pa)5$-#5@nvaI$cax{H@|9!bd(g+ErFo4p`= zm}1Mb6-VK4N`}yQtArmobjf;vuK@cL0MLbTbB8TD5XLs!j$eFnhUov|{E_+tO|oZoXC5x7zk)qn7Q|8$?e19k-qNz& ze{kwsyg-YEggT>}^Q)_Mf2q)O%FFI_o0~_b(S^aOf$I7kCL{RC5IP*LAB`ji(BV#< zS_h~aEQ-mwlOa|-?z!}GQLmDF)mHnlR{pT%dM~AD<@{uTmEDShqns~yrDBm12ZGG|g{+o;sj)RcR2mRvI%<-a%hmU5l%Ez`^Xhb9Fb zj>a^HiV1;T@%S&xv#8)@)+Pg-ONFp=BL9{3{;B&ew#9So?07NAla~0>WIRkBM zaTz}!BcBPkMlbOW%czm=c~P;ohtPR}{mPiR9oAX(Pwgtt8wAgflwQW<*n&kqbDn0_ zbLX{Q9=7Pzet4I{ZPk4}soU$>{L*-r1vIY@%eB(5ivb$uwn42oa<9~#XOf#Ng|dZ| z>6YbmasN1%zsG;nOW0)F^5~TZo>gr7O5~7kxAgWHhQUIM9=|FID@WSCiK>+bo_yQ12oM4B5 zFh4$9;f+5(Az^i1hkNz<$1=0Mn~-M+FFBUq7Cw%Jzt~VC>_g3ADex1#YX@^_)AUx$ z(N8%60MM4#W?j_(jkEt31XGZu@8SUhABQFC%e)-JLidGiFjt-c>QKp^O1WTp_F_0P{cppC)phn8v;E$#IAe%>?BJ@EsWiq*7Tf4&108-nbk+uN{dQwgwN=YcTC z$Ee)aZr)d9CYTPRX+_hougq3U7yiG3p~ow^9gIJnK&-8S&yyg&@bNfU@`73*8M2&L zE4?>#;_82czvf*hJ4!*-#KMjGWw! zxE1mz6MPUh!e?Abk0kPTld9TR)PhA2jJBN@S_u*sGRr2sW#+G9umuB4$q-7tTcECt z2NllW0OtP1UK~^hl7cosb1(wjKCr?tR%G`sxr$O60tGBI-CCXNGg1Ft^xVxsXStk8 zd~)0IL&YqCK^|+koUZ)bEpCpHttvRTyzj(8-4qiO`%AeCRpey$yZMK52YCiVZYbJ35??r02r=#m~Bvbowl@Cd%*f%veg?&R(raYgR=8_>Os1UB zuW}-g$NME_<~D|Lt|FhB(-L z=8xzcU-~wakf+v?YuuBKI#<88kR?c4S2rxK;2TbCVnYezaizRirc_~wOEf=-6W>TM z%&Sq!wn@70s(O*sZ<|y8y0Is;)#IqgzlD%_?e>v{W}npkJj{ zTd`7K$sU}LWk+MoGZWg%;2ZRB)F&Va5z(2n^w$0R(@q{p&2|wRyUe{tF*LCyR6$Em z7?ABpB`eF`*zp~;bn+K_DCSGjDwG(nrInA2CYxyP*hLeoY{RPRPCY!#dZ~4s8tw-I zYBle=iUp@xJulAl-CW1~F6DLicr!~izW~Q|(JTbvKyOZshp{{0U4~Xjw&=fVuzy-c zb3wiNkDmhXgPS+^0oPJdo2TW~T&|rWBjxf*$-ePaetxe01AbDA{gJwMcb|)#-5XIj zQ??&;fo)vJDmULU@yiryWDGa>R@fOOxZgYdlO^|>36)RQjodVgxdzwnbUk+CG+46^ zpXJsex4D)ZkCOX09;Nb|q8juqtWdi|p>8CmUG(%?z#((m8*``mpNdSf9c;Y!;?7B? zcMexOa;>`#---wYUpK>Pyj$2mGu2_QJF#{8kD&QcWqQWGzKDC^=p-ig>sGtD^$dY| zX=o@CRi`sglBdWu1#%gxKcz9^^Fx?rpI_$%^37~f{bZeZ(l_L!S!f&mb3u`ixym&Z zK0?wf24z=I3@cg=WJy^vxT#Fx#>b?L@hTZlrFZ;lye!h5ddoSl zO`V^liGDF~%(dH_?^5HGPKyZb>*>nb-~s~QULSP%t4y1r9GyO)xcRk$VX=7ftAKbl z?Kg)>Lg41bMcMr>6d);}=LeM7g&8^KZoubI*$_+$gCKkH4mmbeomtk)EDW4K6r0Se zqxPlE`j@m_XBOJ+CbDd@tJH<2LgoN#0PiZVT*Ur-xvbos{IuF-W2|I=b6)&7LdY_(U|=(8&Gx;Y$aDlC=n$b67|BuM*mZ(tL#WG-wPcM6x?B zPtMV~{0vr7Yh1VP=5t?ZJ^0*VGL6lin3O~b?O4%YakiV={<+89m5paq$M%HP>7mcOuxcrO8$m62hmKn;Taf=C(+4H;!>c~V(@+7;Ao?#J27w+k` z>vN~RJ55`*rNO_pRHf99?Wj(~);srp@cbTv~M}1!^_K~z?1#(^QvYxlUS4pBd za^%zw@j2^L?q0mZXP}P5s>HAny$w9k?8Vt|>)~!jDiP3=f_NjJ)Pd((dunQ8_KA4c zf3m!Lw!o+?B(3|oR$Y=GS)_v31wJ~lJsVV>f4N&QvEk>{lPd8l!EYV=RgI*sCTFn(-Qh}7k>HLu(cpITNC+gLhx}mMl$N`F{s`>`0*%w&86S!laR{C}Lz>$2BLvEob0S^Ga z#2@u`;v@1!i7g$3t@vPIgb?yB-Z_Lluc$8yEn5m73uwJ+`N`JeOH2pOM zZa{=ggWCr?rw3H61(kP{kk9s^-X;VGne6o(srG-jqnVrveWJxQSwY6e>``2bcgoG? zrnQcp?}cMSk*~N08m%R~5E?&1xS)W7j|FD!P_XbZts5AU)8S!GObUK)+S})JASB60 z@mlnkE8Iyr5r!0pI->b56@ zyz}y3tT>GJp#C~2WbO0(^E#|O*DfB+{U2b(H~TMM8vAd{uTR4t7Bm3@laNRx3gADw zanmDa-wFsks3bK-599>3g%`mx3YQkXF$$(NlY}+=dc^)o?^e=3vmlC~q}$yOQV3bT zVDSpYwPh6>F~<7xWp%t-u&=NNka>_`Nq?iEL46R+8YUrgdMuHGFUfmsjBWCC)fmDi z1FOvv+CM4l-cYpKc96Y|)lyi`%cC)k@4`ae%iKa8pTjg7irH%uF42F9e^*-gokE4v z;=7~gQo@9#IM2iTb%K*@$KKwv7?j?=q448k{3K)6&O12%F2G3c4LrkZN2XvP_XJ(G zAJHFUt^;sv4-Zcn)^*kvCzD;XA`Ed+I>(+2B1ZaP+JIGSie3r=@_TTmzloC%02x|eBjVcw z8>!%>*@z_wxN*F|HI$@=<>rb5M6EN=1ZbUACCPwB{uP)U{p(w?Jb9W z8XaiInQLl55&L!MC(c9@e5QOf)!`M3(^-Wtz{ke>B4iyPm|!!W*3_)H=sv-MBNZnP zS}UWBR?uE({+V zD0goT4pvlFp4rW*RN&X3sodK~m1<2*A)~@XVosj5X zRG)};S9U+w-Xn3pJyFk&)|k`Qrgdz_Y-+T0kkb!4mM_V(?@$LTv4j!{!r=zP| z1-k4WWbq(hnsAvAV8RbB4;DOWny`Zn;15KyamK2xK-C%rcnSI^(kTeb9Tc6%r*;z> zDC887>JauHh~B|q#Q`_%T5TQ|8;k8Ts3q(B=%6WL`>-Iu@>=G3bgT1qq|qZ31Ei!b zK&TSHV~p@hn8+x@vr}eqe7z^_!1Dh@e9nyEjsGbUj4u(y){`(Y%|YHYIH7_BkA;XQ zTPKy>wG(9;F?1#R)S|hmk#}I&Q&V@~N`@*l zylDV5Dt0}O1qYHdd+;Cyu^6d~mJ1(Qz{NF=`>~z`G=F$pjE6!#b<~)MvLM z*dPSd%JCu!C{0&t(v5NK?sq(L9Y!@HjwHmq z*x1}u$~Ohwjpl09R2L4aHG9~qqpo&qGaKa`-LlE3BP_oTaGl?`+iIKk)XLo889PJ#%JMKMx6%uGZqeE@|S{Wzm*CkYiob|NR94Iy#6!_4wZ z|0KK=otC}l4SL_ukD<093KfX7h~+P0cky9Nrxq}wK>nh=QqdL64PpLTF)%9>q7B=Cqq5H&NXOwwM(J%*kh6P`c$_0h{Da@~VrNt5akos-M@|-qq9XD(oo&NeptRH*bRj>FDAfMl8}W z-;%}N$5~EHiAhEa-aXh0)j99S&rI_gUaG(mPt7|Iu2oOQ8FA1v?(n!&JqUC?25TvQNO31^Wy4D+2bwVbdl>hKGG?6ZWCC{ zwH|DY>y*x8xS?HEY^c6RE-|3-W4bc8z%5a&!)Y6DWpT&Vs9QXj`&llvJNyfW*EowL zovbA*bLXpK3rCkZDMmpByc>HimYGLd{yU=jy>_7<*LEH&tna=y52 z22hE$9o~bg9wQ=L({M<*{fDWH@V#!TDKVg#Wd2J)UNl&xL_q?ywx(SeOkYy?fy4u) zIt6j`$ibW|2MhiQL;zqK8(=HJa~<^wxW99Dqno~ybBIXYQ~!e#Kn^WDQ4>qw(1X$rJo)+OTtm;5QFM_9HtlZ#Q^0& zb6-RT;P}2sZBf6oRq~9^!ary8&aC$1&SjjWzD+b2p>Qu)ejC%{=g&iG&F%~gb>a~th4Ut^a=$06Q>{#Q$FhYz~ihD;5!WgHx7=Kb~F=Cud$8&+j(s}W&pRky!mJ<8wLs@r!YvAq2xaDelKWERpL_A< zx-D;DCFz8Bb_S;ks;h7sCQMY}6h^%2Nl6TO-vNUn3d|fp7#^d}^q5N0w}CY^eAxS1 zuYV`LOA>Cn*RxM2(95Y*dj!*IdjH=OmfgNu7-VVM{^*rov$nI;T;leL*j<8?n)dz$ zD+4aP_2Rp>RvnlX0>;=&ZlZ{Y0;Oo-1!trgf`mDkvb~ z+d3ac!Vpun(-B`jiQdLF9oKPson{EelY33rNKhe>pFqlIwYBg;|3`JZoLU<;+LiE< zjHx>Yas^Ul$h<$?^1H84^$qcirMrvO`X8KpXJ+rOGy9EGWx~?K^40d_=I!`yWDsNX;+Q<@4}O~*4nb7qn2kn?t+hbMhV(zCfOnHI6?BZ%%^#1o z-e#@T7N-7C6(uRd__}?tm5Hyi*7p$Qj`1-Q^9jo9vMO9Lw+}1ubH1Ea6F*R=WU_$HId3WwpK|6_pW_5XI8(WmN`gGvnVPvA0oVXC%c<27sNT zN5bM=UGq&h$&L45x;UPszpw_TtB?jEGGNM5kE^uiZ=-A@XPajQ1nEqQ+?f>4kmXe= z*Q%JUwnli=*=tC6EE(GShszy%yR&haUGoQlPiDRw;%djFiDG7E221@Iruzt0m*Jk7 znG(r5m!&i?a?yIMakhoF^M_YOZX4KFZpKuZoVFh)(mcWeNy}PYxDb-u?^KdGbM*7A zLsn)D17hGTFKs(jE{Q9Hb+KeNzNA{EoiMEA)yL+eHvG<@WHEpS=!NG}s zPk8={1?>YZjCJ#Vy$>Kgw@WtXDrU^sU~(a^p`oFv)Wf+87SE83ze8$1CoKTVwcH_2UL$~Ik7#310r@U`UH$)3C7zHS0nce3%VrhUU>Lo2JODAiaz^r-l)0$fBOg~Z*c|c@` zWt`(3&Bn;vLtft^uR4{jFJfZ%=isHS+L9@0YuO8)w$uDYg>1 zn{1c&R;B5F+Fp+L_fDovY%}b8GP$pg?sQ@oBZK0kV$gtVS!7wQ%+_y*s`>=(Ebr!@ z{N~+eaPT(-yvpXn-^n6l=}_`SV6j=Xo^&hsm7H0dtoCN>vthieV)&Qo7dqXEOMi6s z+IxAUo!E8eQ+-!{x8Hxz&b73gO@PZzu1|XZ!3@p}F%}Ew zFwR<5gH?vN78xH$FUQw!`b2`^#)&fdXNiwlOOB`e9px7s-SO<{weK63OTBO0sUcCD z-ND0CRI~P5)a|OP{i;*JQ?Fg3>MPsz|9tnnoX#tG$x{G}yFVfG(=$o=lh0dSHU=dX zbi88rLvY-Rt5^ZoR2hP2v>WO^9A1N6kA^pGR#lLGQTEQek zSTsB7(f;3S{krJ-KZ{+4(kI%$QBBa4Cri~XN6a^e{!&DfMr^%7t#{X02TP2hz1^kR zJ3pAjb!*k1C}_h2?~g$nd?qNM$s;~0NW$zC5Dwln@QiCsrg1DGFZ3t0g!-Ny1H5;b z3=iD6_~-Py*gB?jg>yUT>yH&Y3)Sgaow&|>m3do@XT5yx!@(_jU&|hwQ-w8AhE_gk z$>xXQys1#d`XZ>B$|;x6IJNL@e6U|{>>`#6qX{JEFbzdf^5wx~`?>!P2yTPW)(Ny? zSGKODr8R*)%_LYnC9i->Y)8b;^2NnP3d_y{HkgW{IjfQQN@VC@krJkzPc#@egvW`B z!oquu*WL}}{KIh#T|GSrwlX66KN$Mq*bdDaLl5{4=jC^>(?HZ6Zgj#djQY?_`WTP>xPK`AN2`?J~_Yx>O18st;1cAu*?S74&qfY!L! ziTnAd(;H)QB;}dR9a>s+3qGPy1$pAywPkSAhCxPLRMb_ZP2Alo<@3G|kM91xd!E>9 zps2++4X1`rGbAJwq^ir?{T}N2jGTQ~yQ(Yb=GpJ7g)%RgI^^vsKXEnp)?qiwsheI- zNrlD-<#;-L>0KVp8l>?@CtQxUb#`t`SXjrjsi*C{Fw41f=YQWQVXweIBW|{OX{O+s z66&Dq+)y064AH!tfVTofoz4^ejQkXc!Io^@l%Lx!m_Oq{fBe4Z3c|N ziq6Gp5m@e4L;sI74?>H9>&tf}G%VZ;(pzQo@PXqTZsF2hy_Y4Px1M;>rWme*6X}a( z1_RF$W0nCh&Yns4!-HttO#_vWcO* zU57&p>_c*fo`pE!r?p@nL?}?4c}&}2F{JS19-O@Jq_v<5ML9-BK_5ay){iBuw~;{r zD@G59o_PBF`8`A;QV@m+xPeHO-JqY9q=`LQ4;H9TQVH#OZs! zH?S;cre1fDvrmyc`tz`k!%Djf)GAUeGUPvX7@c0BC~*LtFlNSMv(Av@kA(+4d2(Aj zWje6s-xFnBsu6gz1wg$BB*GJt%`i4X^L_aE@!JD0;FNkmKtLjM8o&=p6oYymJy<^Y zZN%+t!WDoisEfoshUD5HT<}M&?D}y`5J)x(`Da#0-KX?|3EhfE+j7uIBdfvy>R}>p z4%7Df7Z21KvG9F-{KBSF)^4MHeGVF{{V<6VE*xCY5o&5W{V1p> zp+zU$ahYQRjfbuX_@%F>jt?GIH1z_QCZ6SMbb{tan{DMJ=cSS92K~$+cOi|rNAZ_5 zhT;>(OSEFIKBEZs+)a_mY!ngBwEJbHttF1OI=c_H3dFJAQv_bC+x^vM2SNk5-GY&ww{A+%Ydf znU!AU@cq<4bKT90ZE0J+R+s8T@jX4`n|b1>t{;0fw+wyWdLCyj?$=u(LkfH{KPYnH zw4I6F(qjDU{I-avsLpL}`(Y9VpioKdHb$n10LQo#(;(u)0Ws3bgK&@mNC#(@u7Lr# z22@c6dBDLDav7()M*kQn_<<`(lmj+4{J6?YGu`k#&h+i~xLswkoVXT|TLCuY#E_Um zFybX4Iskn6RtPi;VOHHPl&Ha2+h( z(T*E~;w(Qqy8aoSxyz3&bX&n42Cu31|Vb90+h=hv`v0yPCsY?>7Y4%~>(_ z$|!>&VuPABi{)i;i#DsIIwvE(BOB@XM(-r|y;Y^`za+@~4MqIgp38YS4M)`;*~-ew zhMamRkQrEjCC(u(o?Eid7{`th&EfGL(Xq>w6BCjl%QSlX&q-)`r*$iNO4EC|tuk3K zo7}x4^n|1a{SB8aU(49=?FtOLas$o?c@HJc-mIGrDAi1Sp+01mSl^)BL2cs`^62&Mx<~nHd)XTe@KjQDVRc&;08+74byK zM~(M;+ki>iT_pS~_X+Ms{|nfUYm}r>#oVnX&L%UF*Fu zYJFUUHO26khBU8|v1cc2PM@OU%zPMl0S`@pNsf9)2saL_=Nco=HHI%FPd&>OzcZ}i zu}iSy+V?F&MH&2&HboTn9eFViTHvWs>a0^qV<%yObNkej15}vvWUQ9Q)U}|iq zR{=Q}exmKei_GR-N2~4oGhrO+`Wt>IWsp^&XT6)Kv5WtA3AZXFWrf^@wA~TRUL5o> z{K1}{T`AWp?}z_ja&Z$ulF5!DG>*a{DJ36zn=5$qJ1oc0S%gQ>P%oFpVykClGv0G@ z$Tu}AyF)FA9?gy{3bwu+XTg5Wb1#|S)thdLT)#^r!t;;_uI&!ZZ??9FFGL?xJsS1w zj<$pF{ltY+y-ceWGS8fJIHP1NEYjq1UzerML`PR`r+Z1T2$di$lPACXwzgWjoR#F} z&eTXgQ;Z%CwJ;7gWFb|hhSwXv_!sm~5>w{SU4Qn;W<+mu5k>chgHu$-*8 z`1P^g){^EjLNh{HHr&>Ux{B$6%xf|;=@Wfy68sG{*i7y`D9QAjI+J7{w>WqqalGq{ zQIai$33H!ck8kPZ&JY6zM%A24kGGy`k80lpVm+&^7P=KPapsuJBi?c$@)blrxoD8)lq#-|mkOa>6z}zMA*2ZdbW}gC9Of*POD8g;8YFqJ$qWXPNcNvS63=CcpDJ z7Ai?=m<-;YdoJ#Mwj+lraWwn@vqF>035)rIVhfaacJtAFyB{Y)Em&R=Gg2ojypk-r z-}~*)r$8=~uATC%n!)UcA58c}V`HzHhH?!BjEmHd5V{Yh7WfAh0uCJj%A>0NZz9lQ$X}@NVyyD;+#Td~Iw)YH!X`0umR}u;+=1dQ zgLeha1XR&k@n#8!dY<5H=4N}PYAomztW6c7&OQ11iec_oH4{5m$(z}tFRxyuU{zGA z_!hK@cZq5Po`k#nBVOG~(C7G={la?y?&(3gN!#A{FRlZ&pZN*&bL!Mn&=jkBaP$$s zFy7+wBFGwd66crB&WJQz!qGqRwJ`q1Zo1d>{ch!@DElATGNml9qCIMyXVqlhlklPM zdPA|X(fN>r*j(+_?Q26~LNHxcs?CN%6<2(y!1GT}>KSqV6Ex^??7rCjrL!MX6apjO z#&Z2)a(?3dysxEOHHq_JOFqAWjIIoId=E$DtZP>Hv*y0Yw@q)4`vrVskoeVM9y)A0 zXOYflVLNl<>x!Svy-{k}il)MvBYR$Etr51!#B8;Mvmn;QPO=dZWG~ex(@gb*mrUynovu_E2;MC=GkYBczkTB{-tQ|-=U?%<*%a*Sn?9kBsPzvM z8R!64_<`~){^9iW^fc;|Hzj@cU!J#J`j}khksP;Zk>PyzePPyjV^t~zzbQc>;V)({ z&!t^Ba<;p?({MY%TsZQ%r3uc8`0*(KutSbU@iHwCm=k zPqEc~{tTR#G_pekLX@mFo*5Kpv(#|j!cWUw%X;4Wo#oetl@Ikgg%}mERcD_3f8)k_ zDy~2Ebi!{g4zR>Md-oc_^OTU*!0!U~MTGWPvq)uy+2(DHq`q>Ni}13#hSh`-8Wtos z77>VB%z$>`u7*6|WhAc@n!i565U>$t1@kc-aiUM>fJkxUrVpRMINwc{7&%uDyjRO^QDDuV$98(im0HU{%nt^ivQ`x%^3E zrF9-l*;o0#6`86x+EV0`%xV+uD)X##y|y$x(?_CIy(hO2CTC}h#4QD z$-*5I4mIGno+oM4NZGROJ3&fkyGO+{|2xR4$T~{^HU$4sr5>i|g*XkcMEc>&2f9`$ zaA6yeWTl}61jAGoDs8Y^kk$*f0YqJK0Gqgc-VmA|T=N7+@gh=FI<6hMl`g zQx6B7D1n?xscdtY+t+hvNn(HYnOkP1XpH#RG2Kp6l2KC5F_tatAMN979G);$!rrV? z!96pBQUzBF>AfW-^Z)dBlWgZ5HPqGX;CMDst*^5+eBZgeOa7SX|2-t&Pi ztIM4j`QrH3E=9YJGB}wFw-3Lhw-x`GOtZGw@lB-gld87wmuC6ou#2;op+z7nZjenW zh{pB5^bY+#F)i_*fm6H1a-4g{pzqwcd2>1Neh6}YiT%2`Abk#$FABsWMKzi4;pN@E zdpB$>-@z;5Ebzn}2$_M7V9S!k55On>4MS*0Z^8yROQIas7_j;xhLjU$AkIc(4v;+T zCo}4^Sm-4=uA9N1#38)Z>l}R)n+~Mdyokx#yh6*%j@mSJ=XHMNze0^9!W4%` zSQ66qf8%@n>BqzqY(R@WZkW`_BE{Z2Gca~iu0@5O$~0+TR|932hD?_DB@F}*YXFS-*iE*<*nws;0kdY9< z#;g_O0`J(tk0T(z+7_#M@q7^-=3mnF zeN^+bp5EO$v(oe83x` zr7XuZ{>x2Mg%vV|o!SoVi0l~EqFpEmCB7$|PLQWIW% zoqFTuJ+;*P!SzJDFEli0rU)#p0oMx!~>E15jhHyRyS3EKtljhi&yO-_)){nlEHF()|)vu2So5E&o2P)$egE=8E#%!h1u(EYSfI z*>im-bT=%Eb51N>cer~y>s^IsZ+q7$JgXI*3QS7Uu^O=Ft7*C{&$cDgSZIfmu6p2G z&2xUIkLTDVa;qr3VD@R`>-s+TG-B}jwz{W5W|b#jmis!SD>SCx58ka&awIYDSd*aI zuAzhn*MGz)>ur30^M%gGhYEH%J2k_%TcO7MMZNL7Z;~1@XV>4&>?Jm2l#u8fD70N%ME+Lc!_mp zgR<;%_we9&-TLCe^hFMd`=7#jooSLEc`itq1cSK9pOby5^X+ci>dBbZ{3a2cr zi8Bm7Iqz5c-0h<>)Di-7KM) zExL*&alt`6Z66aa=kfIO9aAp4RVH?#bLSGq8Bh8Zh=z*|_$bf(ulBAy9LnzRPbqm? zB_a|@WNE~Z{XrQu!&qh`*&6$h8jPJvrHCm*H1;BlW$Z#!mYO7kW+r={ELo;fLYDA5 z)APLVd;R`+uj~E$y?*B(_jS+hzRx-L`JVfn@AvcleD2BCO{r3<`vikp2b-+DMOAj} zo4BAL{`2%Gr`?UJct}ei?EDzfO{V*oGBqfF_;Qs zzKo~}df2o`^}pSjD#VO6G31p-#r{WQmRtch~^S;;?5BuSeF7o@x{zT;TF!T1HJQr z>+}4%6mI|n4Exr+?I?l|k%B&3B4|k_gr*BaCo%$9J>es(PEE6svups&V*s6TEbx?+ z^V4;Qfk&l?OAMQOc#A_6OSk4PL^)RrUx;FbO4*}`#0G8czrV@U^jl^xI{ zR|QW5ZRv(k{_q~i+NXTx+c6Wkt`Si(_(i$v2b_0MIuhutHyi~qjr7hjk9l~O`;Y4*%?mnd`bi;)j}jsM2!7pA`j{j34UitE&^OssZo;710#PzH#|aqwsZvL5GXPkH_pK` z>=FsRUv9M`NEH>Q(vNA)z z+rT=wmn~6`TXX8X38(?nB&QOT)1#CGuEv_I?k=499m`SK*l}iTqdXM_vP6|yMxy@G zmt2R28N&SO$G>*m@yjZ|Lc>ivf5@T>R&tAco0Kud?Pc0-JTlD;Q`=81)pnC4_uD(x zI;CFZxD&~SKU;R_P5$~3cd!1#z0zJcMMYeH3x7IEp16{=QAJA40cnA}qGASB-Vgx) z!a%wMe})Ag@o8YI%h6r+AQVLgEG)Y(jRXx|Y~=@uy8-q%<_U;&`a1%GGOGN@&jld9 z-5FX65(KTlesVmTsSDIK7J!xm)PD7$>pZ|MCw%;|K3`zu9TsNKX9s5%%1c{`$w|CtJ)6MzNF=L@y$iHPoGvO)#*L`Ehe z!b{j-ay^mOwl+xWWBZhSWk)_=clNh|rn;eL@;L1QLF=lcQ7#6a0{Z7|UTz`w#Y9wj zSU#!nT-|V!z99bnh0BkIyb<^ijsdW!Y>Y1&u3+OO0$0F$9a-?r%Mw@CObk0|)2lGk zX9`1dPtrUr@@w0tzLpDPo%)7H7?6Lv~ zn@UtWG<9|90E6Q)baWFW#Tc?H2g`Dgd(`{McS>77puSEGv8cA>;G?ou-vOVjoXkE} zU-PCC@e*Nf(`Q}4n;*C$-GCVJJK8>yQsQ%Bg#h@V%!8Dk*q4gIR^+;>>eJZ?LA%a; zJS+aaMXK-Ug+iUrN?&gxTkEFeECn;$sF&8Z3XGoCF8hIk9IMOW>JBDsetxH-&8mhB=XZYue%L?);D}6#BiZ;)Ms#!Qxof9@u)U| zi-D>*M}6(+GQ{~?-8Zq$km5QT9b^57vdbWrYGfuC=pDYxee0^l4v`SHeP6uIvAars(BqQhQc{e+;s;mR?(GOvt1ecX!0X!bQSi|@yKd27~nhX;MzH6`Ic zFXuuryjyKR>FDtH4BlFKoxOXO4g_Ln12(U8!0^qgzZ7T*gSfaKzAAW`lM#TVYTtsT zOtPGy9_q~RKK{`2fA!2j(kxj>gI*h!+tmz3|+vMj_G)LJHd7( zwLV_G^+gDvT)q?2-syo!FgyKhPhS6^`X%d*n=@~;UCNQ>E2}% zS{pP|$w3mr;R@C#gl_NYFY!B3Q0K9^=JSn+DnF+OzCQS(rnPYUvw!y0Su$iq#HC** zPAk0&e12WW#*1*%i8Cv^o4XJ;)1TE4W#u@C8RQl6f;o$WY2Wp?qZB3$LhC^?X3Hqj z57)jrrsz-4DwZ%zoFq=vE6kbl-dXtMViogZ^WT|bNRP}UJFNDFB9xOpna3xWxAJxf zzV7!mRz;9D{7v=OLu;oFGVpF`H6JDZU?0=FeX&Yw@VG;JG-H%-5+qCgLPc7Om>SF!};XPa~SpZIUs6>g5{K zFPJD($Q<VeX7(AtontJ=NZ`NyC1mzL|J&iZDEjBzmUWwiD# zGEr^YFBUTX;8qRG$=8ArX)Rq=%oxEjc|~Jlhe=GHa%F``64H#yK5=Zfzm=FLwzGuD zDoim4#!vH1L;9D-`mdcy_Cis;l`XXr^V4qcs~I)ra+J=>!*<3@-c7WdT9K89#?izO zaTK@p^4WH*3Vd?b6j$q{<{_*tsgaQ4o2xDY`ROSvqd_2aA2+Dmd8wDEag^LW*_r=& z`!s7CzxQ0!w{*WCDR;D7=$)GiiSN%@#<`yhPxRE5&&*LZjC3j=n#PL4QMymQ3A)tS z13J#fyQKDyJScN2EgB8ifZB?{;Sd8wRX-KoZ!sTkpK8*peCQ~7rgitI&S$Z%>AR0# zkC{R{-2-5X!cID}S%WqX+Se3faut+YnN70yoWHv(+7EYvXr7n8DENA^=JBO^`g#8m*rAM_QW<(PN`wNTwf|A!u&pbkB6;M2)RT* z&g+bPId6@2Eiru*oOY5~uaIQ1k5q)G(i3%X`wEZ*EFw?#exqsSk!$7W-5PIn>|e3k zbVTW%XrZF_^dCdiKO*|=;raIWFTS7P@~Vy~#K3Of`TFtZ$s#G1B%{~!`6W%+-O;)0 z%bOsO5|8Uv=cF5Yq_1+lzXfMn*^LgHL3SPV1UZAYE6+Fj=<=aH&CJXntI{1lVvCmO z0t`;#q$qqB+y-OAu}a+Gw%mT|FVcw8w~vz}%9@~5c8krC17ZY3RVY%Fq2&f@E{ z#~~ZVajjX43BF;euPTy1-Fhpv`%|!}j0oR=+4%AFPoGssn*(O9{W6pFp+vbFD;8Lw zJ@PoaqX`9l9&xC`*LmA7KFW&a!V>jrZ5=Gm!)i-wgRL)KvWKm2B44+B{zC`tHw&WDMWr}2dtBlqXQ&QulB@ny*D*IVFrB^Y*~s}NBN!NF)=;0) zXxVPyQK$iHEiq~2L?}b`yx06l#e`jmj0*h~iCgKwJie-vTQ-})2A7F#X}%EIAfY3| zAD$jvKgwnjv?Jg>^j720B`sfNRI!G}+{C!9V)|o^wQqJdc}Rof>GIn(=v1k#EKK?9 z*Rjh#xf3AUXYFdAw%D?h?DGS!vxUZK!TU8msQy05{c@_7C7deDigvL<(}xJLG*iR$ zLf1!x-4)YC7d4kg%LIu@^;b`+o?N?Uw@kPbk^A!0X#I#svaJB!h|ps3 z-h*jAuU#lbN!5w3Z~bwysF>?!wqFr=x!!?gahRx)A?UIaQveI<0U3%5!0$$RntXI2 z%#h2wRbj;39<17U_da&4j(CnSmJ*q9+Sy2GGhkAETs`)23CEeRuA=6q$Jf9<%mG+X%NuQB;Y+VS~kU{Qnhi&C62q374nJP;Y_x-+xu*se{Qmoxg&`lmhncfAwYVCd{R zHM;&%C3>_EhBQ>FF^koLTbG!E#$l|a*)~?&xZLx-wQVXS*34BWI3$Q;8u{T4!W!f{Adj}$^!wO`5TPWCAc@nl9e@Z*IyM{~NCYE;KCb8IOw=&*}mU>LPBH0t2 z)$Rtj_C+hEA{Dq9-626+;629;LSLVaLuyEg*p7{^M$dZ*jP8A=c>efi!uhiHlId+P zI`b`>lm>k%H=f^uf$HVzoi8&tG3Hkbl*`FNIoHPC25c-lHhs_DC@=~-We*cMxJUoq zt%u*K3GZ$xesdGtL@I@bx5x@XKkZ33bL~@)z)EqqHa{iV|Govpdh-~5PFV3TAzU$z zznzM&Mw#kW84=!CJNo8(QG$MNrGDO^fG(|?~Y+414P`>l3Rl`G<#Tr2>i zK^l&H&_(y#Wa#qc|9Lpcp{5)Pp&Y!DmAYp#Y$4;i>&M8FJjrw75T>AB&NUomqh|{Q zB2cme0(k-9ZB`ZMpZv;)L)Mvto>1As154`@FB%{nH+Ose-8B$nrKzie{~RYrLnQw> zwB8Q3IDZ#oC7~Sfsrg+V%U%_voWAmFOosgNruaI zTp8OTka%;dd>D{>W;%ju*=>S8{j4zzAYWrpfnp(PFasYG+KfP_xC5D{BtT^MKm{~u zED#WlOarKibnpSNj@Sw?D5)S^lmC0u@Yv-(ztSXtR7tWlT)-Z1{X~YYfs|{`R&j?M zz8iqgo4pQzE_-Ccft!K+{+y>b07lP6{<(7#p(z0d@nB!ncMr0{*UQkk_V;rEp__vAlAOT;Q$@&qftsEOp&KxB|k$Tt6V$ZU`8; zS6*p9dk8?ZJ7rcsdm0t<`!WnL-^>GwSMmWB&}MhtG;%Z0;w({N)DdVZTfrIqG_pyJ zJubJ$y2+^*v~0d*hni{t*De9rhU*Yq+%XTpcDkN!)bu*B0hK^$7vK$`=2=C~U_g~q zt3A68!ENLbmG6lJ!l@eji|2g6VIcYxq8{L4^1#zc4uD7d@_ zN?c5b<9LD1UVrHX0#e*5AbWn`_ba^_`s+KB=?LDnM6Nls-Za(0zrC*Gg>tLVZkPLd z%i~E4;Qy6T7{p7aCty$E`}N}MHNc8|$W9#0ZkvE@`GL>3at$Cpi~LJKrW>$osv%Z@ zI$ROxIbJ-BB@y5bhE9D26bxNpLFGg70jz|lLXb1FAkfF*Cn*8dvfJ8j06+E- z#6VN{lc3Li4*_kWkH0?}$T;3*0SJ@2Ett68v^0SG>@0`AK$<{y{Wcv4W826J$Iw9ta^1k{~5`xI(U9=g)ja1qqB&ip7qP(A~)%zFU> z7-(~Yi;%{@pAL>7Iii8a{8Rk$Z!c*Rzkd9ZBMMx4f0wIh;s0FxpWfK||7UFcx{{Io VUg2=&w>ACDxxAF89|cdEIES-5Rgn! zl2LMzLlsa(!MC=Gea;x)9pk;f@5?x+MQhjIPgrTrHJ|ri}c5pRe1C^JM$T$P}($ z)p+oFe%Q@RLvv?$Wg}vJMCQ-qH)m>DoKI7GHc?-F&yB~U7pg}|vsfSP{zVMlQ1x`* z2=yPQQ(1aD#j+m1Jbt5q&&kL4udYki{!%~bCBaO^MCG_Nxxh7k@}b_v_cO6MuB>T^;h5AF?FK&x1eq@&Ejd z?A`f4lVBP@eweP3QT+OK`Op8oH5u9eE(n;z|5^y*6aUY+%#hV&OC%k?#Y+S3Xbe$V zL7}Uar+yyi)1l=w^+nauu?Rzqm-MbOYkxh9^HI@ORu1N*3B)fB&&{dTm`#0o6$>9W z4eXMZdzFCgldS*u!DWNvS7ixUV3$d)-Fg8AtWyN!AimUegSbn}{w?)mHzN6P~* zs}clnA3J;)F1P5Z|NQwgO?jd7teP$-jR*r}d9Q}9Zq_cLU{|f&cKGz^)2WXi|DhBo zs6@msI4KeruHL-)Jc`?J;CqwGYvGlMZ>svd=Ispd-5@#vEIHBaOU${zhHlP?n+gi5 zckbNjDeW&sX_N5f^)`6&`oFR%Cx=lOhE@%-ShG2&TJxB2~Oo__h_i^1)gln@Oy zwKr_)>HIcCT!C-iIhiM)M@M(%4{W-PxXagiZfM=U{gSXgn}U4N(%8YF5T2HtZ&=OF z&#wXNLulafNhS~or^;~O0+?Iw6~1S0Y*(iG0W46GCH@@5B%xz`61WdiH>*AQV zf`USJc6K*uV|28n?Ox#}R#thnwU?qW1V%B}-i7kf{Gy^Oqo;@Vk9rR?U%ul97xUW; z@Q>0QB(tDK_GHitht_6iXYV$RHk_JM(=+W!SAvVI2-DAuyXolZRkmWVSgRmrDWldX zt{$VR1JDVmFyW49XvuV*ax~nQ$($aqEDv9~^+mk%zLyg59k~xER z^|k$Fi^J7(RKA;!7AwYsPcenRdX+WXlQGxKny4Ww>!0x8`(ZGHJhndzva{6+jA~0B ztlZPo(6Ctf-auIIQNeAFhwLWheGH(Qdr!{X8^dd&TuczZLlY*#Rg{Te@%KBE zu422>0*!o0F2>Q_>7N=Ku^aWfwZRM`-{;iqV}k>FH8nN$%B){Sap|X%GkbMnO!txb zTtiZ{?b>4Xl2v!ALQ%i96d~rkeDDnXgBB*=keY@jeWcbC_3eXySa2H?lY~dfy?ggg zQ&Xoobx3II>gu)y8`pZ2wo7anG&DA{i;C(^zjjhC*0u8Y_pjRg9*BG&nL6yKo$I!F zqIY^p3BRnYESrD;u4J@G)ZzPeiRE_(s=hs;KYijvvU0rWtncoIFN=b^yL;8lo4^YS zp`oEiC@9d(yPNU~O=fG`i*EHj{9U8774tsw_5gpQOeu43^JEHyQ7&M#u77sujCZGKyY+g|zwl_G*~( z*t>nh_8%#*Ukg*dkBVh0^jojq zJb(T?-P_dEG*$o=RpmVUCMM>Lq1Q^oK)tVT&rbQMkN(Fe6zUop5%)|?T(%~oImE=O zb@P08))LW`PUf<5a@ev#TPM%WT9YP9L!Yfl@Ffknb9YZqWMU$Z0yxG_+75}xAuew4 z?Af!bl}37(&E;=qP4xt94xRXew}1crcf`w=N#slqlKOYXTiYM|`?E<%7!H=(*$qrP zi{j>sJF-<0C7(Zi+WGeG$CCTaVP;K4A`u0#P6-owp?0-}qiXEo(cWa4fbifv-`yQL zKC@fJ%T=XEjvPT;i-naHdG;f6D%+~y3(eo`XGT1iG;iH{aq{HJ^|;aP^!^nwx5eJH zXrpY1WiInyRUs&rXXoU^;S=YXmc0|b@vlyvIu#d{^I$Hoytlwu%0D20Lr_p_I^NBw z$3&&%j}ABUSteh%4{>&NSqT!J?83r2(T1+CT3T9=`^zzkI^VXo&h^=-9*tk$HhA#O zp8(#_F=I%{!os3g=T&)8C1J>#*d{cZ+sa*itFW+;ReW)4IuRpFYnY14uaxug@u`}M zMLB=-rxi5h8?JKc;ql#4bXgiP)XLG6@L2EUDj#}NP)ER$+YGx;$9s(ZL*APrAJUUw z59?M%!Vd~~Z7%mn;+s-aQ)j@1KPoH@Lb|f_NWd?R*7tj_ZF1tYjlD}XJ3cN|gKfmM zUD>T$;t{*-GWwtYQsv~I|%H!@9J%6AZE@R%=biinBHtf&wseW$J8yegJa zwUn6O9L}0!H(Ezn0Q*e|$;!#;K_Vv-g4x+`jyDFWo0*yQ?2J$62^4U!LaxLQ+Ey#g z&dw?oaCzeARqx-w-{YNDR;E?IgDXH6o2!5eX6cvPNV#hJ^} z$;rv08$(XM%My~|b6xQs>oduahz;g21<}s)qLqtPbBImBev;K1*13)kAkmd@m$2b{mawwYdd z)!5i**5oqV-EiT;g&55ot+UasrZAMYa1}{Qxe%t|`R&y7^w?d}=&nHH89uAtOMEs1 zC8zpUz6bKqRS(-*p-^oQB7@LY0>q6ayMbK@*m00v&zQV`J>-M><4TD5>eH<3>?@Ix zk!Fx%r)t*PQG)qnV|UhI{pes~$m2K)YVKJJ#6ciLw5I8TpRu6xP;T@N+C(lU9%>4Z6)2A5Y?eDbdH8n%R43x!YZFK5t zYWH67PQ|;8@K(DnTv5*GnV6V}2|X_#3sJCcb~D<@qZ!gmtZX0+ES{XYs_I>+TC84d z;XPjQTZSPcFD)$vpUdYRZwj#!-EpfY#-D| zuU|>|*6Fuv!*VGoDGBhIxAQ|#Xq_wE zsSlTrx=%AA9z(-pWX(Vspj%=Qdid~R-lh88I^IT!j*q9@p=wsTb?ZyWqSVfi#G*zA zd@HbyXdu;v>e?{YPw-k*=t)zgMSNLZTie`+G-pf?>xYEnvnG13qlp5jK81ooMefSf z{>^lyIJ3S2W4mnFiw!H zi+xChQ$C6HzAA$0Tear9JEX?Z(a{=+!Rg7#?{f_*yAdme7{V{Hqk$Qp9a66|9)0Gu z`@^cUva+V4^lf=513n&O68~xdkvcs$_X~rBNAImyR~nE(2r~rJ_^Q+o?qoH(r56zM zYR&hNc?ALiGA1}mp`vnsVW70}USZ7l@84-k-6lStINxkNQqvDXO=4rHXP)-^5@N7h z01%ea3v#lvUt9PTr)oSeD+JRYM|R(5q; zQ)V{E;CHvDr5ar^z$?zb2Q#1M8m&8R>oXlVS(6&E~!sSdUwN$aL^IrV%tM)lVFnC+2hAM z=c9MznZ0|F6boMYV;{G-Z$W9(;}h`l?*0P@+Eyo9RdhC*Igbw zsL*|T=W6jdL}c-`-JQ{0zDEAt2GYZ$+?P9~c5AjE#NM*9%66zUU_F-f=;6a#P#PUR za^y<#36*aIs z_J}GLiV{JwbSRCc5CnO6j;}H1$&)9{UZeWhAfb`V;7?LjyG{FNY=^5}O(%Hf_m{zc z+ntw2NRJn`zft*KhQdv$!_FjFVmdr5>PV8l)To(pTm3dd~TR){De)kC_uq{mOJWkz{*a zPA(RisBW>@tJPJYW7-9C`wgzWkXd~h5{9S%uSiTS>cfx(co7bFN3 zJsCAMwVqv>Yu7F?691B#{W@q%%VRWTopm6iC#yYPObBHHB{r+KC&7&7qB_*!%?1@C-kbWE# z0gz>&+N}VK#ZEX@Y8V=FW%RJcwYj#8r!gBjK@N2kW?<*y(u6buP@~(KOR=eMSF#J| zf-!WP-6hrUW<*4s9(HJAY?Itxtlj}uV-C@YFbO`wU(=nm_<#vKckxH@cyYDRmf^Yl{~v?|0&ZO8h7dXXku> zF>Y{lM+?At)k4{z&)W+vSzd0s;V)mlWae~o%${Cb@f=#29+?$JORP3M4hY!2!$p7! znq6Fcn>>WnX6KV4;xuJ3+nv^y5LZ6pk;CLZaUv}}J;8N1!L^9!>f^IRC8?bxAHt+p zV$r2ADBLss&0%tKQmVo{pRT=#{_~FBcU`6AK{EV1x;?Ap0$c34({8+G4 z>oU^vw~yI*d72xG!*MoCT`2*ptE;hfwn$njohX%n$s;wok;g0xmqEpCS7M>@qY$3M zlle<+8}&SDYnT$50Lu6-+*duACHtjIoA2@4+g`Hr@Pu%lJq#2Z zr)M$VWYUA5=z#w&KPzY*Q)V-WPa#6tyXqK~>XsG-XA+AH2gCYQgcJGc=!MOQ;vudsbr?Oles%&X%Llc+k3#u2Zax6z# zq7B}3ND}8CGjH;{EgHmoE><2nb}Zro0{*kJvbr7~Ji5Nx$}=~BD$s=Rm})Y<*qx?W z><}FpnU$Zf3>#sqy1*J16LSfGE&^y#j5 zVbn(@JUl!yJiI4&c*%E|4%nXosG|>WfUj7zM6gvY)e$3LL7qQ9f*6XfcV6Yx-{;R$ zpE;AMBDs~?zXgxArHNp0>o<+gs;Ky3Ljcc)-bjkNU{mMvtCg93w^z=hH@)G`EzQj$ z<2d0{r%nmD&VMx<=i=efhT31$b^bw%r=PQR{I?LP9UZq}7Yx-koWpqIEHDX}Zyy72 zA->x(@X-VH<7a7T`sPCzMh!pq#+O<5zv=AkOqY7XWn5o-YiEeTyC+ufW7am7;*~Rs zb)^Txh&zTbi|#r6ra7IL@wICYky2`|OP<-rwcu$`kS6eXId#Sv9)??mCd#;h9Y6+H zNSqAH9)ha!>7F0cgncTK1d^_EwBqGf#rn% zA$D&JIE+JX*x=s1Hy12{O5*a^-dq_+L!pmYE>yGj#nQ9?kTcx|-j};_;v^Ir0tb&F z@j=jTBo~mIkc8mn%eQ+nR4_;c0zyi#?PX-Jw=vLoIA8|SXQLOtf4^4?1J9$I`&wv% z2Fj%^Rv>4Vb{UKw3+${W00TNYIt_jOoXpHi2p)d>cA1=cgx{p`7>PK#t0gD*1gc^q zpDj~>bhD|Xbb$Cknt2CW)y$^HY23b@G`c+{l%1O!(KFjymm%MTi(g}pQ z0+lnaUl&}tblyq>3#oqV)~!H-@Pp-~5g(!rnCGTEGav9`ZLdX&Wfu&ASp zf30Uy!0v)2rGM+%a+?1k^YPz8^L-(&FrH{uCEIj!#ymvRaVRS}#a z3G`^Xlnz*hQlg}xVl+1z+$`=|=~7Ea2L=#BL}X+dU{{3vLKs6x|7iYkZ-ng^cK99= zOd^(gmoJVhqZ?d3iG&RS>OO0iSijrD=r+{#W?W^%{lO4KLua(6b|vZz zYb~v}F%VT%t_#@+$xj&|1Na;Ru%l-lL4}kP>7ujilq;2G$W*LXJLL)rU=nlRdsJ(iys`cplZCTHaMH>(K_N>=fm&qn8k$w~r z$U2wtAR#O;V%>o}b(y`r8f`JWGL2^%8X7FIy;)UI9L5@zQvv3ff23WHg4HJ=?7WUn z=D>g+34)EbiHS+iPL_Ix6)r=`~9vyIz+ltrmASJ4meMJW7sXRU1XX8;W;6oNlx-2K?DFq zY@1J1Dl}SSk227jes6x9$A5YVf3%|n31{0iJP4#Da!*j@#MOBi4RLK&T- z&B4Q?0;T(HKoAH;gB=?-E4J)D`Oon1H#w=_)@xSr6>+H zUaY7~Ho_Hu{(K`1B1~3Bf)$O9mo@-afqS2xhL^cRUjgZ}oI7_8VX(u(!uWl6wwb-X zY;A1=JG7xQgRB7r9?N%r`w%L(#Ii?O_~`C+QMbihSPc}+96M554Ov*gef2)%A=&P% zrG2B32?_aWiqXi@_krJdo;CRqynz%%mINSF4r`*98Zh?37iOVTgRCZ!qVIluQYj!V zo}Zk271?B{ue=&<#8*F`_gHD5jkBo+P83RkROGukT6tH03HjDfvchj@ZT+7b8fr@|kSz=!$vKw&=&pKrKVxUZR+nw!rx(%U&>E-QQR zh=yW}Je43qXu|c-iW!*?c^I`_$wLuqMg0t*|x-n+t37D*82 zi=`fryry26I|Cq_0T8L`)Wu4*8gjEpXn1WIrmqhR+*TLt{CaUB z=@!t)2+vkjRP-YUgaP};V+qh2>0g|tqs!(sY22O>OJVYuIY&V+cnge06h6UnV`);>JMgn!2tTJ2o>(T zOn%llJ3n75QPR5vY9F)rrR9SbN4e{_9SvR3np{Rl$d@C5j=~0uE#8J!7}D4p_FSs1 z<8EF+N;4?nvLG!;5&JukKt70J*+u9^t;A1dA!N~$<`l%^t&%NReN=RnqIU+aEqnlLXyzHpbp zCQHs3{h{OFyV-G%HKul4zZ2!aN8tgrplyS(TKw><^0ABn7U9mnA*yO4&S-;1qY3r9 zL@9}_iEs#-VX2=!T~|IUim2c5uNtsJO3E-E0g7O4)n^vf*9-u)vAB^XqL^RCq|cp;~%Pq)WY zGcozd5tS!^=ITS}kaZU*R-xfEAGHD~q0`1@tBBby=)lX0k(y^t-uTy#@Rx(jG*Swz zzD@5RTZgex{l|9ra9a#69B3;1gS=e_I$9T%VtdyyMf3C45WjMe9$iQXrZZUv_rna4 z&;z|U#O42Etv?*IDESP$d={(|G>&Csfi8kvR|3pA(uJJNxC|Hj!UP=8UY48WxLdu7YB{7oKuT)=$vS+ z0;7dK@1Gk*Hb50M_UqUGcl0VhC;We-d8;WXx^bluqe-@W8A8v`?@QC;K0KZ>ZW`YC zXv69>U3ALMO(YGI|MKCRv^c47&M3Zu6Vh>VzvOX#r-(j&HTKEQxRA#!874$>wPt0@ z`Hzn~-suLj2>-m{m;`rp$$Uqc@l$@Rk+JrHcOUn3c4HD^k`XsuZ=AP_zIM;ckb;cr z{I3}4=T>3R-7Uo!M-nGUBHuas`#X$ck_0pSnoZ00cg2NvopUBH3Uc<3UG@6&+^_j} zOol~~6jqP+buD5nCkuSCD6?uKV+GSS+v;`#$yJCkvnr%gP4E2{B74HV2P5qvY2O#R zXNaZWQ&db1xP$8NrgPFL=ESnxIy4}-XG(tNl?L5KyP?|;T`tW!E*RAs3_l6dXd3bM zHazad{FA4vx_K~}? z8yU})z426N_Y8FWGjMxY9c?Nf>C=W=r8>6iVoFx?VsF}urJnyd+lN~bl`ydfk5Y%e z%={<(BC(2{Oj7#yb42R!#az;;U}seso1n(+dAa_1A7o{yey@x--s>FW7d6=>%YjX_ z_H&il{E?K#E#xlOe&6M?oE{vc#bi7(kS@T zYYvb+Z1hX@;t3QQ(N|@>4XMeP|Ndoy(sO6eb9w3&ZJ}gsD_%HQ4pHy{4)ubq{*Ox* z3^^Hf3O*=fEbX+S)OzG{WI~MU@{jn1ocN_dO>%h=8YRhPg2SxV?iA_d(Rq@T-~ws- zy(# z>5^Czmgcg_O0MqNqp(DOYibR;rSpP79|kp#6Kb(4Mz+`Ht0C0O-7LcWFRRq`+nN1UsS6u_hepoOQVCMNIad83wU!NG8wtw`!X3NnHToN ze&KVTYD-cwcE~rWfCDD``*YbhE#(T89HljahF9cFGTN726vzgI_oh*DBjll*#_dyS z^B?qZOng@l8&(smE|A$U>^Yt?m$^UOVRkM%xoE(e&S}`$So@D8#y#QjyDTnmJ1-jPeJTORL67F0FmfqXyy_xGE@GzVM6GT+G2 z`7}o!dG=z}MV4&f@ZR{gD~2?!hRUzyw0Jbr9MAj~Z8$#noUG*h-Xd>0C?;z4bzRUz zi&6_&RP28UMfM3l*T;LV${H1&Tvf|alAas);okV^VNX9~b=rTM!Y5e0(zJLD%pLxz zs@bA}g&(7r*~<#dEv*{p{K$dZeY%D5Rhb+v$A^eg^>2aq6{`VcozdpCs=?Qhw6Fq&PQ zdn9EzPq$bONS8?7#uO6SGS}}U>$fCQ@1e)>Tz!K1Oj1cs{+fj`5D5fmq9IZi=(%~8 z6hwJ|21swc0(3Wsh^q>OAQA>A>|OV4U^7eX?Nxn@jh3!Sww$p`-ouaQ#RrCgh?uk& zjz~!04s7_|Ri~Go_RlKRp)Mda^sJy8n}2_j;b%yfatKmP=);C_AE7*oeyfYkRZ~mL>eJH` zAX;NuIO?}_0*bT6`duI34L*Y)l7nN8oM;S!2@r)MRG-q|^T0qCm=?Q$fF_B!fx#I2 z4r6~7ZSQKW>L2K#ZiZ8zIF$#9!Lz3R%a>dI3$B1dzl?kDTb9@l)g;0K09?!j#4PnS zIy^iDNIwNo6@e-f;i5b|st`m3B$%wwko_azUiC|Vmi`r>`nba9mi68g6JuH4Vz#kn z;kLE41!p=k4}rwmGLGH5s@wOK|}6n+v){D60~jr9!eK`+tuQt zKyJjX(Eaqp`G|;!RKP@~R?I&Z)IRZf#}A)sU}eqP$aG*@;^E^{4~vM3np*jF=SZN5 zQJ`j#dN7D6V_Zt*xuWa6l3rw`wz3Jjf_^EY0oxywSyN*G8aZvi z-e4bN$VeGojcNVxsJXs851_5QtK`-Ib_hVHEWMz$rKJR9kRx4)^Rw%;=OY<^2GfOd_5dy%K)<-34pZBeb!qz5PlzfZU+rPp)C(+nj64kc-H z-kL|HXvs1{=Gr`kG3Fg2%UEGb+r9uh&6Z1A(UQxGE;C&@vH|gyK_C<&00;4AX2t?( zSrk>>EfbKUEm^?-Gp_USfVp*Ft}X(V05nE`xvZ}{*8**e$R96QLU+hxXKR{80Xi(` ze-4oIgY-&k;V06_)7~5jAJC`H>AgzQV~{Lw1RW~E%!44b{BZ{W3^T;kYu1##39}i9 zgmAuRBKLi%xtM9)uuNpdL~ z^-wz~aDwioq(PTLGt-hCFm&u}Y+aeEDW{JgA6wZWE8(h{{#Ed>emHI0_Z$~+zx-y3 z+0b01B;S=QP|xr?PTVM1|M9?|M`AXgedS$Ji0lop`uwTzNMJDTfo}j6b02grd#l~- z0K~c1NRio4{f-NMDR;uP%p%#}snChEi4`aE@$s=JfY#1vmP&eXFCi?Sdb?H1s@ZiW zGc>$I8w!giVh(gawX~u@Y>5WJ&W}0o#dqOWfeG8&jeS@A59#qwREgDgPXjZA{`xf& zb`K&+!TRB(g>%2kG~w1f&q+44C`kZYq&P`?d}d__S{hgQ_`s##0PXCF43~CqxVpJ! zqk4IRnpv5*0|~}1aoU6-PjFPsQzQOEy=b)gR))-m{`U~9 z&>X&@idsQ&9^n|tKQEs@FY_ZvM8bC2>?I=eSi~ze!yuV2P~FK?)#6MzIupGO|Z7M?gfcE_1UvoMztR9X)0tk%)iI^=}bz&bls&K13IViVWT-W2sO~aRRz8} zSL<(9vL<_>pNT{`aGtd4&B$rf`;J>sma_IUhc}%`L}=(aFno%$S3V&_W&St7icck=1J?PJQ-boiXliiPCNyXNE+BJ`^AfmDAIq9 zVZZKDZF;s|Mz<${EeUhI09Oq%LIpKQ9vH$O2*dC*1e7=dNvO9r+jOKDW7%1E31@NM&`{GTE2XV zfQAsHteS!Ux>QeO!ySq!<}=)ZwB}N@)YNQZV)`TkP92za4B=qs&hM3wktO0SHl74w zY4oy#wIvqObwB(Z60gw$tu%4Z&pRo(d>7mm9ccjDMi|&Ad1Zb@s80P zTzh}R<<}`YVTsDP;)rsmXoBo%4t$aMg}?@scY%JnvT{ave+QY~@7TXs4VeX z3@p+$pp&>HhsD;GCPHrR%QwtfUbe?A4i6#=R{wj!LWf^*xLo2y0*^VHAIG8#FU?(2GYd zulx6M{d&SR(*_ngCc~t$qf8sv`DvgHHMO--aHIkH19)Vog?{gJQenPw&Zvjsd;B2^ z`j`tYVofupXWsU>m9Wj~?uK{L)qMpjgJkH#0F&#UXH2%R z=bh5@+WNZ50?ajOf7Xjq%qMcXNMUkF1yF4^o!{s4<%!b-g-Z1m6_Ay zt{_qrPpr{4mlERVw*Vp1bLizm+Z?87?|SFZ&!dpi^jZ!TI=V+CwY=_>dTh-8?UFcw z-Vp+Ny{bwS8WGLk8^-A0_3wFK5`{hvFD}AJ85B6k(<)NGjGZ@+ZcT$e;K4(OtYA@K z+u!v++M8w&JJ;tPys2U@TdIMfhikOQ12_B{v|2Ru^s=Fq(+rx(9Pb0dVsC$srqpW( zb)}I3xS$p4f_CV{!L5-~Mj)?*c?x@8KJfF9XA*_zTT}o(ul2Uo<_VXX5%rrAqd6p6 zT3Xa-oi`Q)5YVou9T4&>Cf}pq2S6$t)e(0-P?nCq+BkMI8)GhIZf17#M}&dRDL(r9 z%9#0F7tyy)aRhl9jw+NL8*YCA-WZwya1aJ`AqvoQS+yD^YvpJDEjkYVO!Av3}aYIiSpIj6kQWK;a1+?n%m5J}k4=UrNh6^V6X4 zu^Fv1G^}>jg!XR}AxqEucKh=~{_#BuePmN|`_07?CVx7FWYo$?oTCVxfZ)A?+-C<; zN1`9pJ}E_ES|%Oco$QBBy5zM0R(VTUDd9>%LJDAx7O6(_Q=K?ww^J}4TQ=29FL5+*JmNqqe# z{;Vv!m`2LTDKYj-Uph>@_gm2K1^AjTXHq8ZzJ#L9dd?6rYF@^S&zWX%qBj*4OF`s< z!=e@8>=V;$%8GqnHio)cip8KT|C=6Bp=`elY5kURvhAy4t;XfMkKgt0ag*;_@%e^% zk38Kh?-W?b>a~JDDbOr6@xOZYDl8SM&Jdhx;U#THg%jcGB`&l3G^2$sS=dJiGG}XS zRCF>M9-#X-9KH)QP|ncWHqYPQzdyn@>V|`Ns-*UV+F+1N*bG&ug1Qzt_a;xUxRk55 zkbd$Y{(0Cls=g=wl}7>(_wF)ss#^FNx+V5}JP#pU?Dvc<1~H1x6Vi20a6wv6TSD4= z@b3pY=!I-GK-U7{L_+Xr<@Z1X1+~qCYy)rq(#8o;zS=o4psklO*yy6^{Ip&ka+vF0 zI)9*GB2%kXE1~jFax&dWh4k||E?jN*oC};bF*P$|zi{C?U~@Q7imRl~9(2Ec^?(Wb zxHV=f?cnppGjFDg15Y!4FL;&mUA!A=wT8vLQ}{mYMKnHe9P(pq%!tCiN$T#+`px_J zH8>HZVPuql_|(PEwr;-SAivpq$k6<^f8wZYt5kDTaaUSP!a323mA5|L4m=AZlIr_* zR_KJ68(a%1*JDeYq7*DGHg$V;Rm>)yQb`z|?BxFKi$}59IVDr1^3yb@Qm&$z6$@)q zeYCeyqY&EQXO(__0kg!WF5_DHFn}eqsLNtH`_avr6Un>7x|yj@r47HD1h8|IvkOor@tc%Dck#8eeum57ZeUqx?#Q?pq-sd<@DZK(Wfh# zy-CeFVX-)MThq^TVIf(VEGGkK`D3!An|phWIFb6vV6FJN*^wHr?8FyP%LH zbH6QT+ibynslzA#zp=!^f_{)J?PPk^$UqVcBPNGPSTp71&ZEo=%i+Ju#x}=acTPn) zL-w$p&b7^((kN=nVaEU|@BU_U26`&GI+psWBQ^ zyJ*QQQ_wCtipJX}k~kIYj0ovQi_1bRy>c9`_uNS7(|pf`yRWjHKN+bWmA#Mg{QCCe z5w)1EvKs05dM&zosoTSUS6A24Kp5G}#o`u|aZX5=W@(P6b@7PhhhoWBzhm%~D{9CY zYENS0H?@ZE2D%UDkDMJ;O_Zv>DkznsMG(q-Kii!$?RMLC#5+tvP9agFMJLs$piTQu zL4s|;LFvKw$9}y?f{JaRT)vu5D|2w{95zNw<@xM6Tzw;JsWpfNq4|uIlDS$N^4HL> z$R{~-v^PK9Ul{b#Nh8lji%49xvlYu+x9!4O_^MFK`xoJL`0eXYKWO%c#+b?;VdxjY znJ6S*q{E+|p_pCpvT-+B#Y~Fc1YiIgf}9vddlwd}>0W1VvK#kZ_B}|UqA?}kf8@`y zIX?MP138cN8l6enFQFe3GDiyF2v~V`k4#8aE#<%1+>d<1dL*+ZFRs^aU8i9&fkW0U znovIf5kJHwB%}>U{uksL9`Ql99B|YWVE0mEwQ0SiHW@0)5_GeZRg7}BM)L3xo7WuC zIm){9R=o>JvL}RYMH)8++ZG)77u}DUHBNS~x;x!39u%lT(^CqPKlWCZ&sO4E}NlXQWK!O$%>O#h*(3@IgourP%#+x3}diL((VR zjS6HYr}h#@+(L^^^7nafI`LU?e>?E_VI{iD4XmXMaL5_Xb82)XNPy;P9FOOy=YE;x z#J;`F-9jjmcF_!at{m8dA>MSE91Ffe&C_xwpp1({J>W{-^#gOTHBhRhT}pvR=>=fo0~q$)}m=iIL;YT@hRMMXPn zX)gwY0<|JgqUu2G4UPCTM7v)J;+8dd`385?aniz`%|hpe;6yYE`?)=v4FUK?QaqaX zgCf@7$rK$TuSS+=kJ<2x6ad8BSy+^OC2Dj<)->x3s9i2}PvbEYukqC>y^K zpN%=)T(2I3y?3D0+dcVY)swj`3I8MO#n`ED@){l!C~5)M&dQ^PjxIMfdIc~QrHB4y z1pMNxH@DAgMl)&>Cbe69hu73Jp7Sr8OJ+4pIwC3j>eZ`s!uCmng9SRTIYfd!ux3~+ zH1#`HzF;`;p){xBrcj82s%6(GOQ4~=q;PK1p{$7`&snBK1I@qEvv+30+!bD#{PTWDP6efT)p!hv7bD$SwEz^&EyLFIH}w_$CbesG%)({ASdwYIQRpw?4^BNf2W zX8>?5FB^VooHbyRc4VGDMMg8IWz<1qSpE|nSz9|zPnV>qCX`j>5#k#0*kWC@*>3%Y ze`-c0Y{HhTK;_-mdzrD& zTBniWa{!$tD3dKgXdCFLdqpxZG|Yo~6$onnrO7d*YFzu{_ z6tNlq(n{=U##9G|R!wc=OfC^YiUAgX1Fz)$o*S+x3bFy(J$$UCBe^SZc3f1D=#HyT z{S%DVsNSa*7(6iA;emW!xx+-#g*e%B)2?b$0mMmKi=L|E8SGdFkIkCkmJVMdN>q&6 zbYC-szmGB@h`BE8L8NzQf1|+x3ztizo7ob72l-wvA`(-XK*0l^eF;vAgS^cFHRvK1 ze5v9?`pXl~28zcsTNJI(PS?VWbfYtj4&S9azEVj?A7Zg47~sM1_7awmwd%HZ%=$kc zbh*)Dt48EUcNbUMo$Y($M11O1q0QWM5I@67x|<*pi)b@*(ywkb)u_8H&z+;%`861+ zM<0G5;nIqgU4pnJJd9o+IkJ(}Iuel?6S|V9G)4xBaoAeINd0O#Tt+6V1KPXmml-hbY^efs;p6TS`rE+-eTa zL1{pn0nzt@#I*`d{4q?6qptZn=^x!i=p0ujj|}eCX{cj=R8M;;2UU$mQ8l}#D&HLs z*DM;%ej=J7sx*^CB}i*!V3qskbwP8?p-C^zycw%N(IL0`+;it_^Y%#-WyykQDnY(n zdavOK9I{U9$BQji$iTVcZqQ&M$D(H7$P#K`0}c-w08WLICLzdyVB|#%R=}qIBKW=9 zy1r&c+STU!y*wUNGxN9QO8}q8I6W8aa|eGeZut| z@xknh_PeqAxBoUt4PYH`c{}4xEhK04L7s-K^3j|0TMoz)N+FFfIGhl1?m-h>qkkrS6Z9nxjv}jcBMj0$!q~%k?Q{}yaHh3YZ2V{AVp9*K z9aly?)st1b=o}X-yR*lO6YbUwl_)6m!1v*W9mwL8m(FE7Wjb2cS%>b+UOJr~|4R1A zbEny~VgDk%)as^Q?DDsUmb}B53R#+d!n(#+f7+@%Uovy@K;Fj9sy|DZ;h6-GHbX;Y z%xn-j?tz8Z6ZD0#D4u3u$nEY{gR|6Gb#-;Kyl^-zp1WrGnxR7jwRy=B9J1ZID)C=3 zzUrmAxv0WEG@9-5cCu3)>q8VO-$TZN*E&0dMoHv*eCx_go>pb?N^oD zUl}EA6uk)x!xaxMy~HKTq`tj?i^oS0s_t$D@tqB5o$Q8Ju4KG>C#@7Os=2eXxOM!x zx#pwiTKVBz?Z(Y1M3iBpoC`BeffUVj%cvRJuAn6A&ZyCXB;A`84tna#R;d?9eCzcI zwrv0E5cN!g?AO15fPO-9DOkAE-cNYWDrgQkXiDp9a|R$6pvMI4*Khy zU4+1#KvvatI^s(ejL%59T_Pp3WbThsz+O?QEold@P$(mC^eppyG}=nld+G9|=FOfS zv8?&d(aA8wP&FsJ%ff|BSy%^A#bJyERJ{`NY@q>{YJ8By6yu`9=`sRF>dKPGXeG}RIU0yyJndqs4 zHWpNFNRO_(boNq?wx(S2w&;`W&z2e`^#g1Cl?2k7ySRJx)ZLL zOlz2#rdqeN_N1z_Nut!Oq<=2S!OuNak%kF>iri}S!N&fhf zw6cndn|@pni$J!K@rB*|Z!KJT=&na-tebMtPMuZ#n};N4)tgVO=2lC*3woXEWmPf8 zJiE+WLl?$ij9OmL_@&6D!lx<=~A!$YGES<@ljK+lDs= z%`E;2gSsfmX4N}S6J6zWG<1wIJTE$H_>E3yYbPD5xqE;Afzv{$s6oOZd)K`-;O2>~ z!SYLDs9KMZS4od2E?71gr*-~e8DnQIWd{d<;h+vA0+1lvS{5h2y;&MZ#qrjy#MM+g z8*5QZNE}t@$Qvs7v@ET3W9KG~qQ9$+Yxej#z#p|r_^&$U>l9=2vcF_A_WK*paQT`9 z=L*I(8VDKc?PhCeuD8uk|CNZIbQUdz9+m%-C&-T2qRQ{x4Zgr;;U%AE{4%9;hRjrl z&N;Q}DwXnc%s&sKH5QjKPu;`*{z&b;)@C~gTs`s{0OV)vk3F7i%$=6~%0(;)%TA@< z<@46V{Nj#JoR@sSOYiNVKmeveKw}i?jl$WfZkjMH&(53Aehr`+ z_({-kEu1ycsQXL4=%$Y)V~M*$-J9@hB}qlsG;x#?SJ>42{heG=d2gqTL~$~TUBa4U zD$~FPA^-iDjNej!nf0^a(=--e)3kxLx}nFbf8<&8lP%Z&XAl$-?KkM8j~sUBI5t5T zmPfx-Pk#CBnO;31i6Mv^Cte?J)cQreKGojksFxzQb}c233wufX8%iok&cJ#IJ%{Y^ zdvesx;KnB&c^*WHgHQZXn`zpL^wU0K#D1x-8Kpp&LAwBo)$Rm`vynVsLg#5J4}G#~ zYI%h1Jb%rO`I73Rk7@E96us{sEoHtHyqzKyh81dzG~ zs$PeumJ2a-e$MZD&g*nR1YPlt%{N1qI@*vsFjDxibgM(t&xdTv0jbsJwOj_GF8-PA zzeO zJ*sZsw(}mJbt79;Et?|IJN2uwVXyhnCli#EHp9!tA)c-3=>xm%z5j|dbcf&m@x-A* zqfl6-?@o2g_(PXF+OIr_5SP_oUe zJ!Qem62kD7BY!Vy9^|7gBB_(Up$|<`Pn2(!aGSgUpa{%&61+I4BCRd^#UsZk42Scz zA}!yj^KrxMyZ?mhl~rGlrotvYN$F^2$r0U_zoM0MlF6f!i?n}p_W$GRE2FaPx^4*t zrIBs~1f;t|C8d;ZkPhi?P*PGF>6Gqn5DDq-l92B1vu_oj_dAT?@E;euR?IcmT=$<4 zbQVKEHSUSkrp{H?wkP0`>vj>!qy8*<<#B2Y@qK;p=Q9fFGwFiH< zn9MT)WlhqcW0A7&Kf%eAi@ z)JLBWq)#ST#_%)rBa=h#%ikMGk6}o`PL&Sd@kTl#EBpAgq~BO zTw3q~)Hgmq{-IV)D)topwmYoS#7H*FFmyL{IDJiFt?d2c7PczcH!Rz&!rx1|hf&Ud zbWb(NI;(T$E}Yw9&2WoMv%R4ic#lz@vbDJhY5fEbFsnuf+0aO&F;5w*n~_dty~q57O!0)GSa89^X@(CGdLvQeAUYLC4DkkG?mNu}{6HxbGQ?;}%$YO~nm*#)%x z5wsPu+KPT26_h!ME3Ay>THrqPO5yoy9QPwDX!U#p%g;>qS0KD_M)P>a=+)O5yU{S; z+|&|!R^@;)(5qLkMw>nFl&aUL3l&e|$u1mGMq>C4ObZ-M*PlJV>{NEeBG@dpjl}8s zNv-_a4^xf)8R+>bxV^Q&ef+tmg*}LtcpKl zwKn}^q3+?*^T!ZmnTD1p2R&A^aB=^U$%rT98)2BZyIY44z6lV+1YfDHE9^)&9n;66 zC1sZ*C`0L4Wy5!}#^G-^YvvJ!GjZDXx2fG!IEuy`G?rZ=59`t`uhqeXx+mUx=;Ci} zMjhR`;*AXJnRn{pg`?+h zv*7w-gXOCR3d-<;o)Ei_(vvtng6jN>IeMi8Pj@2}-mGhiP*9jIX8wp6dFp->jEVa+ z>%wDa4TwlWTHZl7u@Vp~$)CwTCsQsHujJ2&s@@5Z6QfuU(S%o=Ss%?VQtrGn1+ar*$x;LMegCqmOUII#tKnbKekN$T-MuHAazT>uJN5bA@exu(86IFgWyXav0QoM4fQL&TEjE=~pW3r;WeT)$K2 z?26O|izMCY#lpZ~x(@JL;6H?76Z;$C^}A#}be?Y71`WcTZ#ChNZyZi-U-(2h#1{8N zGK&KL2uOk>ps1=g5z~`%9i*XpUBH98lwBsoj_;;Lb>qJ3fk!?k_*G>Uiy$h}NO4V5 zt~VI(P%^-AL>wyhzMaKtT>UquHe5qs2uQf%>I>na+p)t&uV03P8(dpk3pygl7hAnS zQ|xG7m1$YvDwje=>M@F1IhXcJ9gnh8lnI;})QxW*2R2H4*`j)~#hhkkKK{>tRjJRh zAwZo!{d@LSO1$@U3jnaK4{d?7H@LGkJGH%}(~hNj-QCsonF8!tWggM8Rj#geGTjG< zRs5j59s;)OCCompI%TCGyNRyGyxjSWYfYtgEOBecz6g50St21QFO2)>+X$R0#8w84 z<&{n>*qJSlJ7$qzWggd0e+O4hEX;JGNHZ`vSOI`9`O|q?$`otiJP(1Jx0gJ`A|}Ww zLxEPSELNC6k$yY&b4_8Z!gx?4!R*uHz<3Am1W{-xs{2l+ezhrriH(+i^G(nGi$EEP z7!8??t-ElTZ!^8W%K$*H0JNc3WF2XUy*!fCvB0=0IRtVB#oZM_T;V}X-CfuXlE29x%^(vaAT){6BBiJ z^+Sog;^GMauL0aEJ0KuHsZz1A4uZp6WIGOiEk?wOB+R>M6-_|sw%1cD2*qqnmGBo4 zTANx~$nYmCs+om+isav2li*dR8~F(+CAH@^z9OB(2{#Sfo)mW_9IRK|fSLx{LIGYR zc=|KIs7rC{ZTm3H6d7zqR;rqezodz>-AH-RDfQ&S*L;`4&u!KU(fMWwIpIqr9xB)g zeIew~|Fa|_F})0>rjt6DSFhL(58sEG&t4`g)QL&d%OixIntzA@fhB~F30klL#kMN1 zuzU3(OC`#bD21t*VK~oe@aS|M{tkW7Fp*w{qTBugkDSraXGb~&d=7=QH7UCuT&P8b z`zS`$=qPNhPVPia^t->liIkuK+j+kTSf@E17<nYm1im#l!L_3b z$}UvLj%Gt$4z_M2K0Q_{UUZHKURtP`Lg5NSx_~`D5iG1 z_!Dz?rjcg_DZbTLT%&vbtAk*5BF)j==i4skJn*e&3GnP|<644|fiXb~`8u4hcO}68243`UvQ+bXx$J!X(3aX__zO@|fJK;;a4MJ5)Zi7#uD&YR^ zmGPNUuczYgjwPrqmty+6|< zaP*+VvWyj-rS$!NS=C|*w8DYn@mp^=WH~FVeY{GYU~Z19DO1@Zppv*L8d6h{e_Lf& zoXC*j-+`S|Wf+UHQ}VoFkovaiN*@`?Ztn%uBHQ1L&&vU5$fVoT8dOe!P9sah%R0~( zTE%{)8UZpe3JB5MXjXKtD8sa?@bg`&N};xDPR{pt-1QH(mZg1ps+FS%R|j(YpCdcX zG2HFWVOp<7TjUW#^&{PHS2Y9=nT`;`cbfMNl-jm3Tke08xnw>Zpq@3zwSavd0#xH5 zq%x4fD7(2`gQaMQVu7=%SFGRCv|+1>@-T4h)2dL9V0*uU!ah#&c1kn>rY9JLG2+1VMcxmZWqkz>2r$N9x~h8dD-r-BjuInsR|TIGfz!OReQyv$)*O{lco zYI=bjI1&>qB(aTM)P}C)UYRefj1nlTxN(=4CyUQC@HG{9a9~sj#Gs&z{Qkl_6_PVC z^h4p_VA_vK3O;w`fP-2dKV{wrJ>v|BAIJJU`onsDEZtMg^w+lFrXq0929 zT}O0p(QV!piKJmr(Kwx3afZSC&nY>UVY{X0k!MNNZyJ?dT#?MwY_aAV2eDnp0B1J! zr~Lfk!v~<%KL}0;Uo%f7g7$rY=GIXjMNpK3Gw02~H%){Rb$j~(#%IbQ+d(^JH3fmL z=TCos<=+cc{>>-yuExS9$bN~V;bxY>JUk3KY~Lls-!ypMc@PP>sQ}$I#TPGH@G51a zt>5+^!f{`Hr*;IuLe?|WlW>V0E|s(yQUAP(bM|N@)zM|!=SzXKn3Lt%31ZMtM)z%G z6}0aK-Nf#3lAG&L^I1rD?FHDnk_X_<5rO(Gz!L$(WCf3CFV7KFL6Xft5<96#nLn4F zhLdf4ic^n?{ca+2CCq>sMZ{y9F-Y5MX}5Wxj~vQt{2pA`5giMcGe5u9Yj@a87g(kw zo-q!tIL_tZysFL26b4;sia-_v=qfpjnk&!?k}6jURnCzsCTeFvFGp6AUX|ELytf-T zha%#>PL>E&iE5b5S~nS4iH%yBe+l)1_TK(G=mFBfs&UT5WG?3q)5_NoOQA)Apqe|` zC#RAgMtiLUd?m$mditHs zlgM-GRU4*l=gux23DoTU_?8O9-#Cf#U!3H!F9N$G$F-F=f<6^Io}$9SF;JZYqTukq z>l@QK8ZnIWdkP{paos2rChHHAPR3(kc@$)e{V8!3i8Tv^;0pr>SORNr9HuhTXZaAI zjQRh1%Rz<$H`^k>+&$pz1w#V3tE|sni74#F`Yq7;I_{f)SSZzLgJb95paG&oGiqCAvRtbSgeR1hxl;oW?hKG*0)Y%DAA3x9@}f-Sx$HchiWI7JaW5rX z`WVGTA<9_XrF2&Fu03vTg-Nb>;$|f!BE9Q<(P;}cR15##Q07HKOu2K$VU>kk^T8TT zm2G!vNQby&N`%>Mw@lq|?CQ-6(BA}@_Ylt`+Nn&}AFUyJ(RyoIU`w+TXyIZpwiQj``S`z8D3kF>-5yIA~0 zxfD(_1VXO2Iyi}Y;C#HU?)?tOd*u!UETg@V#UHWMdWgPUyZm<( zO8@%#p)Yqt_$gkH21-FK$ToJ4>91D(WiUsfRP{;(VRXV9d{x z*}QF%JvWG@DRHz!4Tgc5z2`PpaBm4-y#?9=?3?kqhloggRYSNY8!6K(+iO4(B?nY{ zKrm3I_J+}dxg*C-CsYGFBcz?ZOpRWmz9HeZ!77#xd&3X9N1wG4K?{>Ks9oFoOmHy` za^(Nkk_F)&0SFtyJ!0DU2=3RN1dR$g@)rgXheOQ|3x8|F@e%VB z2!ASQSwvrkN)MlQ;}=f4st6L`r^k~Q=QnhxhazeQHtFPDN6z%uO+Z=E+`mcbA8vQ_ z?4#qxy#p3eeY=joeU0U~2k}JJxL|*OzYy38xFvL9)2VX*p$2#UR$j?Tv5>EqphrZ0y}qoBYyg<2r1-X#nl z+_W3XtZONgF1F`oe`wVayTvR4V`arG5pTe0#e(myc-|~k zo%t}QXj_kn9}q@tvUM%$6#;S1F%}hy_#Qi%4py}6iyZkmSOa{3jT`;y*Qo&q6SJB4 z8;!r#0DlDv^+MXR0^4iL9!y1-W@{*UW_*a`+4$mHkGWzrl7n)jA1(RA@?jS- zLH0TxD<1$G_6hF|GbLgFz}RVV^KU|u!Qh~!qdsHgA!}yqNM;U06Yc&3(C0}GVL%A( z=;cpf|8TC*ihi9&L;L=%9b>G(jI+3FV7}d%$`9F61u;}LtkNG@HLklc>|B#`tFT`G za5D~_cF$kkxUn5`On{QUIw=aYF+3sUq*?MN`h8ar&r|@x*=H0K)X$!+R)pHKb19aB zCWW_xh11pwX+zKmyGx_m*n3LCpHnV{8z_QAHhBiLJv4b}52xSrvpxbkD}UFsXbbPK zGuUL0Cm~Hxh{EsUUQd4Y{<&o|+qoZb(9VDsXliWy*2U$r`hzo~Vj)dbex7VrX=`4g zHTBM+g_?6$&Z2Q4F`+WKq;yQE;S6TOK-M`tNfh8>2xUM)4cwzA0tUfD5N2kn8vE&G zLj0b_IERs7y#@n><_+|6L4qmRsP`UUv9{vHcmP!4yo2!2>!%XLg!uzDnt`ZC$`N;h zt1NPx8VN90XUdZ+KZpUE<#gW^zMs1tvSs&fjpIac%}s>rnWs^w&2z^rwsn{1H0?$w zNnQT~{qC_DQ7MC-2KlwOI&sbT1e*Bv29Aw&Jjz(e!$QlF`j%ZvCl2-3L76~K^5V1& z7dn+_3FS{ms$0jZjp@a;vH?W;;(a#9LhZ514_yc?FkwlC4D?t7MKf~IV6MXD#AxI^ zHR~5KaW4v|JQRprgsIbAnHM5uY-C8NnjQYYWiMGWs&v& zVkI8+R6a#lR>tWt30=x{;?+p4LKrZN@)~VzX%{a0y7urX&%z|&Ph}yVs zUq!P;SeL8)^*YIaMP-lC^$P@8*iHl1^6b8Rm14!lVY;ebOiM~3OwQO`bo>>7BI{lQ02E%)87UK3*zqtwfd zoLKlo8BcLE$KBS-mH}!FQfAO08VOWdccun_<;|P25~l>^5nGxuR|R-QIC0COK4mqD z&)b#7xnH#;E?cs4wu)mtvt@|!p`o1a3o!e2_5i8?;B`pdDcr>cyjwlFV|3I_CEGsF z^RM-L2z=0hy9YXTXvoN30iHKcp&o5hS9owmZi6gGy&^foPTo|IL(fl^I_rJihdDB7 zh%owRh_c%S^?a;QQ2Y0w$f6?fBbB$2XIRlu1-BRj@0)BlMk3m2LhcwBFdd%&#X&{T zxd`fFK;C+MX&E1ZMzOw9Ar$3lH@`{-^cqEeYST*+bDGhIjj#sA9?FV}*w5%}o?phO z>xX?hSOjs=zs*uMXgK?dzqQ47dP!h#WAQnmUaeb;64}_bJSBt~f{>=9qyYVL7q!k9 zP&=j@_FGZ|r8oBNZwxMmKN;}IHOIF!FH{Kw@2FoCEi#D=ByNOXLboXY4ZFN1AjrtC zcEqU3jBGivP>Uv8)tRK5L?CyZo=yhQ4hE!LdVwMf?Jt{WQMm%l!tjz?>~=Or=JdIv z5lQWJhM%Qx*|l>Hb41W-lW0=-RI3KjP(xv$8t%QWQy-*mxN2uSJNgCWBn!J;JyAg9 z1x_kl*Y&)hi3H3D_)YPCf56;kcCI4cdPIS3`Q{42U-XmT$h%Kr&>$6;P`FHT0V` z=5?CSWz_6Yr13TCrO;UIUCS@tnf*TiFc3ZiGJ}c$01gVv2kk1z&e3_HsN>9%i4=fC%O8%+*hj1J+K5o4shCLD#*P zFW#OlcxD$^E{AyExtEp&`1*3`Z5o} zm1^ahsFUzpQ*ouO)_XFFSjXLGWMsom1_ow2o|i5>T3kMpN`kH}i)Uf5U*T%6YKw|O zfe;585s@@_mM`KG5=+3WuIWn)|Ng$7V=m zx|nP)s4O{CU*|zdpShx3d~P;f@d@;wg3_DS_Ls5jcQyP`jNDKTh`FvkpG@#x$(@NO zt2P9Y`bSs2EM8(J-GIe_hK+n&xBty{);AzvZ{6yt$!N0kFFf$?OGYyzJy6|vwp!5O zXl}lv>dL6?L1=mM6z0Q0uX`}=W63wl~n>}G2-K{&yfSIk1l3wn2H9YhQE2zujdU zG5YMIB$dPRkL+eDvO(#TU)swKLc`6Z=Yc&Nc3XvO4PoWE|Fa%3tFB^f3=APa?7i4; zLxnyEYR|8wq%y(moBysrig{0S78hrNKt_>(!#UEO{pr!3Tf~l?oV+~q7pdc|iM`baXR5+~C^izI#uzv+OV?uiWI*S$jLK3gT#X3iD~Xy_(TY+f;}EI@1rqiG(%77*>iN_H?w z2mF}u4de7>l+^^J$y>Igqgib)9a2GIAeq$Er$9q4f5(JEL07B%j&4{pG94wH2Dl{3 z+u51ZKN`uE(9INdZiYzmXJkf`9Fz0c$;glcLl@Eh{NnKPqN=T8+O;NxxQySEh z!gn|T=W==k5CE%M4GJo%lmm9Go-(G*x$T^TclIjlt|F3J7aoj?Cl-_^$&#r*UQa&p zG=oMLp0yj{mSQ9uE-{9p2jk-)KSqiM*60M-{m=585qXe_UBE)%CMzHDgngLohb+XR zIswr8(Fh5p*7}WN1{9UV3``kz(Rk}oCx&gEY$f*aC^S6GolTp;O2^^0hiFdd4O6qbTmwntX&u6Qti*2bmyBgC3MV*vR(Sg2Po8V@4gVu z)ERs|RQspQ~CUf<7r9rhDCGVfw)gkECpHnPsfN5U3m1;oIbO3LR z7j@!oB}2;PTXLKWkra<2(S)Pg z?~O=uLaNC;sh^aFvrbVv*c$xwSvI5J8XnxPFu1`#M?z+!w6ZtVUQ`iXu{)CD6Dr6n z$Zd(StpSxJDx{&j@PID_*bykm{S`eqsf`{rqsgQkZA~-9xe=?TByaxsQ2nsBOr856 zxp0)IY$?S8?PyU-$}mJnICEnt#qHlW3eG27-?1>C*X4FHa_pC<0I4HD1WpCb*MMlA zIK;jhgbG!{Q5@g(opkyvX!S=FQJmCg!aF!@47RID;dgh?4^Z&!7?y!$Q2FnWEn{k( z$>FOl(tAW=(V|BWC)*qPO7w{B2p?L{xkc>R0iB5;9bCwjm37xN#H-{De2!WwFSn<& z$Zjq^r`;Pk=a09*8*50|V&4*~SN$o@pP28K>j`=|xNKPD=E&Jg*U@u_qp-&7CvYCM{`DOsHfNM>H1 zB=_A>eR=$hkaR&`gmQA@7i2k$l?cj8^fu4>Xu8seju-a3V}R=u_wV`~=9^zwFlSvj z9~1D$czqt!!#S${c3L~AL-9Suqh<>cz}2h31s(6s5%{-RPhZ9tig4M|{HRvXVf}*3 z^p$MWi6>$rYTSS@f+i=zzfC0oHbs%?tal4b*N|RhQgcyCWlI3l>n; za(8PTDo^oT8`EeOHZ`T8jQj*7gHo!-tq7tR`@fgOPdVlI$>4m)z+D|+m2^N~G8*hZ zc*cMu+;5G+Y>r(=wM@Oi8Z3yl zgh}oI3?+#70fNXBVBk!}$ru5m71sFYhfW2Cmzq<7Q}0nD6gAp1p%FiojiMiCkT}TW zhF8zJ?Gk`>Jouk=6nsrjtXy!DQ8D8KDx_RZ?=B+6Zl_)hc)*EEO2U$Yf@?*kUysnm zFf1+yt2)ZW&`rdzI9U3I{W)UYSBVvs46Uk`WZMLs;f}K(o^hRPbmE|!VE)hVr`zdy zlUtno>)WTuQkB9^2I6k7;NsMqRb+=VTWM)zKzaV&*%=uXWjB49SHU-%Wxs8Wrvg)O zOdS5bKA+`zL5|+5J#HV>Ss%tKy)a|ZbL0uvG+6ZTj+g&!l}I-#&gW#7u(%lWbtbF* zgNK6Gu4hM|{pzorfiekbMhDR_@cP*WC!@8`wPSo|bacdC^EWbDzNFuuK65p}(tPbI zNz-r2+A7H>yx>ami`6MEEkIm84Z7mD^$rNQCd(JN!L9CTU);*pWw|q}cw0hL<^FJo z!P{pZI0~M>jsg=?IF>2L4S}B+-}!%r_!ClK8Fyl2bXYt))B$5_Dtcfa{Jx z?VeK~E%s?dBH$*7|Fa-?yovT+!6Z!?&at}{>((Ekj}EDzVg2|vPt1%=fvhoL4h~gY zY8MC!3al9Mjmgl&7%khVOsfCPT*y(voZxmgGN9DNKs2_T^ULpjwHO9BgxddIztkxg z@&D`?x1ONKp@82{oqgnyP{F$NckpIcPo}jHKDoPL>?37nW$Bcvtj|>MnY^d<=<3L^ zFVC8Nt)c-%6uIoQQO`fhHK#Ft3VYp)cJk$m*Sz~QnqVX6FXDxw#?nCzvaQj)`g4t>!4gA5CkUdsJ>!(jk2~X5Ph6 zxVl{7jbvP(f>H(RgkW}V=@kc0!ndP3OhzUh{_myRsQKxXENSoQvLxWy&5bT2A~=2< zXH4C(66d?=*VTUg@Aj@P5Pi3DnN-ktqp|f??Id7nDy!bgnGZ?k8ZG~E`ket9pBoo+ zqf62j2y;Rr{^$2!aTbvqiNR?yHbn&(egcMS9~1fJw>(Id#he6~QUL~$@fsB7*Tm$p zUnua>t&F+iI!O~a5a%uC9~1gHObNrQKm7`eLReq;op0YO7Ggr&{(BnohfcE)Kcr&r z0H@`8bMVl&{#_5DF)LF1*^x&JE0|5D#z#ExWyeFy1!@+KAZXKgh(5w_=bICc0gm5K zO72V6Udn*$@6wYsrCie$2d`Yf8NdGbD!8+V1R2)$Tcun=8Uhysa+VQq%dcvoQ*O5H zcvPz`h`@N$3d@Dt(MtckoOzWOwR7Kes#{y~XG(5gJeI5`Sqjt=9PZcpU|$a_)+8u< zHoKe!i36zqJK{_Y87f-A8#J+Zq&LNNr{^Er>UFjVjoq2q_V#YPjEuqlfPOqyh`0D9 zA12g|O#%hWfju^)^MVk)55}3s0N&7+=;qbh&x04=%}~+hv)=`UoXCM-ASCj?GmuM& zrAuun@w(OPSjNG$b8AI?^Wmq-V0{X58!0_Ka{Zi4@dzJXi9r6xX>$=w)xM~N)Vp=1 zf=TzdQTKL@F%h8o^|3|O?&oS?OsZ1vIs5ZV1cH^ zed5VOyb5B3*V%%~Q_6sD1m;+x0etA&z`$NqrNQ$rxGi6ocUA>R=vC9;Mmo8KXc!m_0j^=UKKKE|H<9Cb zelMLRDE%H+MU9PIl~I`8DkW<*L@u_FT|Ovk*R&+lFY;e^-D`D=PZ}j7v+?=wAQDFz z4VMe@4F>JynbqkaZ^6!6@Cpxd2wp2I@+9bT^^I|t1qfw)?;cETFw=cvwkCXXQU#1%avtVs(<9`*nldBHn0k1W-IB4j$!?I{&wbIH14s6 zE*h&XC1w7Yt28jJHvgGcW~KgHsIVD`1=nNw63VD@@P=s?8HnZpRTeOu0FUWi%craJ z>Ue428WhFy;(2fFD8VE+_T`KU@0FM_E0NAAA2m|9qAx8)?vLuubr~H%RCE4cR3nC^ zzpK~Ty;`w7l#p387sg0E51%0LA;vpx)ps7%$&Zg16_k}8Ja{k&M$?0Hus}7CL|H72 zn^axZZHXHnt>J5EFhlKIb{J$NMa-C$XhFbWM(yWYB~=#|4$b~|V+kvP#e-&Azcis@ zIR7bFpF(n=o2P97xQjqVbP%`(Gme?pTY2Ne-!wIP`b}|k`n*kMc>SuCwzF&UVtS_C z0-N!^t>Hed_`_V0jpm+Brz90J?em&kdBLbh0 zTDa!ZodGog^zg63|DK4>1-zwPTy5A&DaPZVAi<_oileTn0eS{Lo7$~e**+@jB33x)XePOZ09u#q=r*J44lGqDklxGeh-Tl)TR7_0)1c(*(~Q1!BQf} z6|X;sWKXw`BTyAyb)b!q`K-n6ew~;9F<8^CB84U}BmI5l`Q*|Yrq4L&;jHQRhcAwy zl$g-Aq{{dN4Zox7inzn9lu%p225MN)FB36!Ok^JQ0ICC@MZD^v}8 zdkyD>y+;UPT-HB59RyL$C+mS+9u5#Wea6NX1^T>_D~gt`%z|A|)oBv#@JEA7;-GG8 z^>K(L(Q!lyzcY~@%X5ZT&j&uE>vcsA2uSq*=le|3V;Wh5zIlJqV7i5isB;jsmfd(M ze)lMzN+|#7Jn}gr0CPgRyCp!gcG#*gZ!NU42~l)ztwNj>X1vsKW7#ux+wQn#?y#px z&0oD!5br`zuIH?UrMQ2z;fe^a1pj-M!w<~WZ|T*W)TM%U8*#1rd?Im(=Weczz2|St zaN$5B<)j&h%h|5`a9yA!>c^gUlD?72ZIO?V?DrMAW#~M(H886lx4}Nub_%Flv*^9O z)!Kd`ymFoS--+0AZ9z#cxcH_9h>Ol&p-~>a?tslwEjb`J7JyPiJu&Uj2K0UJmDSZD zFq!=N4GWh8wvv3MK+2QUZ#r2MS+6s9Xmu}&UimrRCSs$Yz7(8({K#B-a8Zo+_AG+y=f*Vr8r(`7ZiXcrb05AH2UL7fCcl49RHJb%l!n>V)P{! zs!l8hR30yR@t)M-w3$#EC%-)Ig=bPNoDXCmv(Xy42M;noF(G+pSvI}0mcjNeP7&4Z zWT0YX==+FF@*#bFk9 zsNdauAY<4$ept%kz4*-V zJJ-sDH2IlD4y0t&vY_=zETqRYtqMBN_QhD4Z4F(Tp@AJvbhw9$5<8LF-W?R+Q1=b! zbwwovSWf&L#fe%4S&c1;@l1)Mper9S&@L@eq5C~bZ?M0*i=kc_Pb?hJ8 zm8yi)&zFW{oH zMN6-off5cT7S>RKD(kkxmGzciCj?Old+ab?Q)^?UZkj}_b?(6V>60WLrb5VSNr)FR zM>@{@mN$qI`#k|x`UhySN$y@K587Ms3@Vv)5}mca+aM`YPDUh{_cm-y+p*g7xkmm2 z$bN5cPqJz>+|o&`Rou?E##rDFtQk*hsOE|M7B8d`Uqk4R_ey{>pvL1#mCpmlY>;B& z|Fb3BsAEb>EN8V;OkbpfT^3~tp+96d8@CJIHC|l|!8Kpirlxviwx<*@46kIC!@8M<{<2{to@| z&w8FD?pxNycyM&nOJWx~1Z}*dPwLOVreIrYdn}#Jl|qd-J^pl)HfLbOeLNK#69f0c z#)buu@Hz{XExCO1OZ?_4E!3zwF^1?q12jYK=Z~e!NqZG(w8JkiN9n8IA)}r-xB;{W zK=$_^U0H)&iNeHn-G0Bd=FvKDU^#N|?WtY!S=z{+p+EycQv(qU+0W@}{OjE{&{zT{ zR|&s*1p}nvI+0V6v;7aV;H_c4FvM}?VfkX&H#FJ^CQZAEq(E_oYI49xWPFO_8RLY6 zN$5Nuh5iDrd=cg3-XsDZu^{8oaGBxH7Cfshf)V)g;3V2=hZikoB`LMJ73j`@+0+mq ztE9ObB<%+F+XdDm8%x3;$*-N9r-(NT>TBps;SDi-dYJ>7t}J??p#|0fTDdWJZv|HP zWHq%q8rD);bSs%JKZuoHU8G#Xw0RZper)5_IZsJD`7A%d3H84KY*Lu|*=TK!N{k5o zXFZOQwL@%uEafBFX%Cmmi@>5=;%m<7M->%hX0u#-5X9;GKbY5sn2C=VyXhJ!x?KdR z{z4`_LM{0=J$e8Fr}U`QEY8~!9TY&&zw~r4o%9mqzM<;H*UUI?qCp1F;1VkSjd8G} zT+&hJftjwbFt#83$L5dg@Wd_p<<5|IODX#AxQ<%li`PDXX-Z}g@*lBZ-yhK~IpaM) zWkegn>u4BV9B{u41AT=BO0+;NhTEb~b2W3x-z*P_`mlN!peoxu3azmOsGnyI848Jr zVU$%L2X4vi2l(9qwXE=?5C3BE5RLJwzJkP?r?P$OOxs_1WjtTLrtGbKyxq!lAXEaM zd=zXX1g4oyO-%vokkrz$%&H9H4cRqA*UF%#3^l!Sb&x?)Rh5hdKjKAVz-T>^2KBYY zje!nq^zib3NMpEYuc~JIHBR!kO)DmsUwWXxliifsgm3u9FJ2D!3{JS}K%&3=DJ1@f zhJ_=n{>4z7VPn9sO6;9yCCHG>+adSz_w- zi%}L)AEg&bUsBr*bp?_)ypI8p5bN-Q6CFeVNO&whrFxxZ9ycyhlo+7Rr~#<9*lkaS zL!{S7M@P?jzWy=#*D(2OZm$%zIQSWeUS}ivC*FRnP%Uxhu)}?f!|%2Wn=AqXqxR*4 z&xwg{v+;!;!ejAnfpc)8uh0rzMUIwt+X@6q!h+P@-eiuEI(_(5@>BWeL~bPNFu;&C zr=jPBArliJO6?Hzv!)WfW`XlXSb@uRaxmZ=neDP3m9VmMp*a(2%`8c_zjzp`QY%zEa2vB^lx&bhFkIRkM6F_jH28>*5j1&E16Fhsj`ba1}9(bh9KX!?MRD9y&@c{=R@WRO3*2n`Z&7TgFi~oTG z5>=@XS5Vcj(yVtl6UP$dlQ6B?LMt^}rBunk0zl@$y@fyExf@lkyE9!05Cd2-zhPAV zV>HlUCLjZ9$&eqT?j0NEy9=t^TQ4Pi9r>riC!LOw0yMrCH=;+#m zP<5Zc2zhlYF2e2k#sonyyB|vm64}1*sdwckv+&*fu>UkllJ(ctX_@j0|vr}cWpTjlg z=U<+#NP2e>Sa#s!ZDas{8P9>7nt&VB{;^#bGRI&R%#ytZgCbH>Q(GoV|5|0FZOvvs z2ukZVgQCG^atZ!`JK_RQ95rpE*z1AXP)8X^b%*84`a<)a_4`!_OmWJC16yP&oz&9@ zOxIAs1t$`MolZ~b6DOwlu8on#WJq2KS$b}M{xXnF?@i*31xh9`iw^bf1QLHfE0Yh9 z7ZFB5Z|!$z8HRix+f8{-9!_n&YZXF^c`(t0Y#8MDF~yE>BHSh4dUoeeMqg^FOHKObx@FwWdYKY4sC~V1P1kj@58?m z655!n=LFdv|1P8TAOB~fGRr_^o8p={ohQN2>`M7`h#A~WAvX`a zS!l?k>)rCxd0Rz|fm+h(wI52Hx2e%^J}0Bbk@Dv51?MTh(#JeCGy#tW)QF&fE>B~q zx$mk~0xFz1Nebpaz;i=~U!(~Wq42OHDVBiw21(h2I5si(` zF(TSR8!g^I6}a!9!5W}|^BuiDF5EK$f@Y62xP?Or)hdPaMFo3 zmx$PRsDvtyVRyZVF$UaQZ1n-J?;ox$rp?I*m-YNh4KkN;CDYGgieC(K?OXY*>w^~P z{4lfU=4EJ42BQ~y7|(RTp2L8L&JBvxR6yA1czX&XXtnD?n=W^2NsaX5e`u(`DPT;}97WBl_+gDbPsfH!S37gmQd|?7B5; zM=}CepAsDfOT789X}No!g^Be1Ju9=PCt9Sc;5j|n(tGfAW-xQ*896zuAYfRF{S9>F zL2Z=vY^pQ2xKL-dxAReG0ME%o=Y*dLSEO9T9nFX_BYc@9&@;ec0PY785f5hmN%oNM z&%?J>=TI_ji*Hjyo_b?+G3Fbndf@TbyS;H0M1{`p^{H&~YC&Y!hphXM=A?4jF;vEQ0TpN6lf+esgJ4H;x~Vk0EX!`a#mKr@qJIHVXk-&P!3iy~Uu;|~BaGAW` z7dOUy!k{89N4(6-FFMqr`Lo~)%Nft>WNAt4h|x-3*y6_B(RM$0fo41j)RvsJRA<3r zeLiKN5zFngPYT)*DX6K}*SC^gI1MCG<+hlqgHE$G7gSD3ik3!>k&(8`;(s}Kw`3WT z!D)T+oL~?DNviN|0^Fcxf_+LRiyouC;6vT@X#!hjJ07*Gc4)G3p)Hn)!J1 z28hfY-l?^Q=|zbFjUtF70DvPC%w|nZ2mm83M#fiu{c?Hw>5r>U?5(zJ?)X^%tHEW> z(#Pw_Yj53(a=t0EX_h9|?_R}myLMBcUX2pgDFlEL=3?c7E ze9+mye27N%Rf6?pzIQ;~mJG==fW<@(0fQOGX7U8+qR|3CC!?hu1~n$PmbyO`p~YEWYDot+oNT zu_6q%n+(jsfoG`SW&!iLo6+&`URzl)In*Zs`TJ7PXC!d7%?cS_s@?hbPl`Z5H9Xvz zDMrV{YsYdN_^vO|UcE((>gG&^x$^1Exh3m4->)mjh4R zn8c-}+bya`@|BA-it8mzbEWz+MpvIUXw&_){y;Y)+wyhznZ^t=iD%Hu{fV~%+bnc1 zU;e)JGNQbx38eXB5*iwbegl7Qp@1PSoaDUImxx(2xCN#pACx2+_txEKCEJ#0GsvRN zP?V9ti|K(D8A2Yr05HAe=h~X%@d-$sn4OUvc#tncOhY>L_MN&7)0gFNpR9Dm8$G39 z{M3!ec&y_Bf)}iRP_~=?(Xhz2N^K&Cd*p`g>R2`d!!%;b0+L4>(*bM=ywkG`PT-{h z;O}{tQOu2)m#gL(@G9Hi=jo>hzPxeO&%$o{A8HWG2KJeu9cF!syE61r6kMby8-Du_ z&RT-?Ki$!psxN=^DRdRYLGO6REPh?m#PL5g?EzUJX?ip`K0PqwVSBbV5_E!MfTpbD zld>o&sTN|XR zQh!q2Ce1H_GcKqM5+}@N`6O*K-2D^&wedvyIE25r9eyy3Y1sFnv_!HU++|r z9VQY_hZ=t(Z|N}FD&yAw(%Dr)(@Xj2c35h=7yivnizqt!0r$=Gx$hbdQwB!I(~Onx zT44YLCk+6Hjp<5SFcO>?3Xq~SJT6u%-cN)AV$>lJnImlcHBt2Eb+Z}z>3lxiJc=Uf zHtHHx4|qSaJBm}_@wmzzI!UxG^~?O@jc1Ue!DPvYxHw$3%1aYv#~ zVJc-f2Rxc1l$i6^iPk$x$E2J_U%8LlB#F&f*m-NVwvXV>O*Om)v`Fex#7*@oU^{Iv)+T-ig0?lik#myt1;A zA@+H6hqkRYJIXBEzkhjJr&87F%D++JL9c2EjD*Z30H@o-(BGY zP8wiPSdiVGdEViKb+(yT&-ryOb6++C8O7T*&)YT5#_AvXxtYi-Rs%IWIPobruD8%pcR4csU@Q+$Hh zY-IUO<&;9xvHQ%T;LHW|d4YDXL6CxMJMiKT()6K4JrGlPY;lFn zDK>_2-iT-0FA^mcXVvS=++Bi;oHpm2R ziQ3wg-6pp8I30H$<#>{#k>DV8@;K2O=susO+`+0#^)l~G;O@14`a{Qc*BmTa&|VR= zd*FX<2tV`&u3NsCM8H-KTqE_yd#lmORch}?wj@v z*Ocqq-qDUAOcpEX6IztR7+U^H!s!N}7e8s<)K$))kdW1r32mcqK9A%23_;b9vGMrH z>({UO4!&bZePQthv#4~cXCTDau;iVS_xJWCX!DTem*Ck0?1YdZRKCC+1;JgdaL<)l zqafs>t4G&ujX0x0d3kxDmzou_-ltEYy*5uCzEv7X=0UybB;ww?{PIlQS~!b|Wz@HJ zT55gz5!v0Dp|&Updb9!4hMTsi`PK#eyIj@^3kfj*qM%d_|5;=>;;@8=i~`aIubGu+ z`6Ii&FNb0Abt|H#$ar`EKgzx`tm-f576b`tX=zXpX%LW31q1;_k?!v9ZbV9?K~hk< zySr2AZl${poV$YQ`TJp$!8Y-~emy?x1E72qkad8iNvSzCP{7KAA z_j7t=F3Zg6N3p}_>C&?5Ne8Z7Fa5nwg)2?#afgt>C57!5py2{^>W+LhFQGzt$gC`u zd!YqJ(=z=g>IapMAZ+fKHyqlv_fZ-bk^au`CinsQudSvSJ4N>swqYrz>a&h%IrXG# zwmf$P70iG|&l6c;ipdIdlIZB@quqH55VB_vJpVP~`@;#{Cn^!={;$ETyf+?<7k<($ zPf&}TP`;Q_uYDno+jh_dq^LjZ_+PYB`mH204lG#c>|pq7EdGH0ooCM0J1^q+5Jc~5 zLu$2nJmq?xqWc1G(u|7)M2JYKkU%q;V~dk23>VGG`(BM%5J{(%Cjoa|i^$n=6=xy= zkJ2qp4J;Pgtv51oKYY3$r0+6o@;>)kG=>Kmpwh7v@S>Mylb#>9VNtU7@KL!xWMBvZ zE$Cg2HbjJl;dY+T;m;c?{Nf5~$mF>eaZudm>5KU-SH)O{nK&u7H*TO~C>YeOvv2Q) zgL58z6HegpNW&Ivl34W@&(cd?bW*ni(Mq(n85DhG*5AaE8{72}f}j3FSm7RDp1{#o zHfux$leemB5g~ahm&8p3g`#8H==MIv(cYF=cusiEUI7?aFKz+Jq`NR8&UWu)TZKh$ z1bt;v&Dk{6_ew~3J~3@vuW;q?D|kODEazo`1q==g>rhvecxOWXmD!hoMpegFNpdn~ z5MGxnHn4^@5nZ&Q1p9JGEKY{@&z{T+-{x-Tzd@|1&@?=dB!nqp?oMvM!@*L%W>XF! zRk4uTb=9fp#SrNB2yil>bPM-Eft}5HA+=RByBW1g0LfGHjvRMOYyAu+Yp?y z)2(0A3m_0oymj6VNUGGC*94%5<@l>=9^(jp9c{^VXYxpH?pTQJEqSEI#-E=H@^!}4kP3p+Y^S%*Ih5AQd0rd4+Rkar+Zdv1H zNWU~<;YN@wNlXhC|ET5XFF(*ML+ws8^+IbrHqf~U?HsNt|2u}(u|3>>C%D68aO&+? z|CP_#2^q25Raq3F3;$kwzI+|l3qcG(Jz>eja{hx+?HwKhaV7E}>+cq4G-SL-#m4uy z0Z>!dXhQhT+m;yjH$0YK<{9qs@N7dEGf4CW-=b5Hd-`|J8C{N*AwFV6_D~w7yzGhP za)|XrJ(0@{+?DL1SIc1+(Ceaq@+4g@MQ~!<0n@V{8CM>UQ1Va~=k0i{lkrup9V&BL zbV`(0w1G}xTo@DA*!VDrR2gp}!xci%*SXLA1od}L4w3VzjA(kgoJO54Jfr8GCD*2J zhD~}0U{j<)IHJcsT&OAZ8}svWHzQs4oY)}hHoVhuAXAlZEGz8~-)>{-qfS<8OLJZX zz{AS=cdj&z0RO<+Xx;)zjl)~9{^CpB-k!zTfx}+7S1^T7|GX=edmccjz%3sE2o=bJ z(L+qIKq70F>th@N)n~==LyHd8Bg_Koft`m|@xc?DJ8hsH7L2QAL-6lxp>3&M(fp!6 zl-!z$M(dE{b=FqD>&A}yn*oFcRE*uOR3u)&n!r5u@3ZQ6oZsp3V7RDHV|rybO1G2N z`LLxSWj(-6OH9BKJjXj#0Q{v#{|t$|lm=#qp@5vQ5z+g(Q@%ce-e5EggoYY4%w4}l zD6%>8iHWRcZb5;;Xx6Kzrehxg@Bd!oRLC?dYmEdh0j{u!Gn3|yXNWhIy`M@lA`=&` z<0g6^X5im5m34b=iLJ)Q+5V=0^jGS!aPY{j50?HzX*jK#-8FhA++?}YS(dZTaz-2L+$uYl*lyiWyA3uEh#6vvSEun*>~`C|ig9H%ls=)_ zTHJBniwwl%^{I;~PMc_O=UZJ}1r;da2^juLPm)RZ;x?@H8Q`wt4(>KL*An3@HNP>U zE~&v1A;da1HUJT(<3CI5pT6bG=)C;Mr-g>ddC)hmpKkKIswPG$o!Be-kFE&R(@va* zVt~%_;it)0$8ZfimKnMFPD(Q++pTNGiq)8sh9$88;v5 z--1hYi+TC_JEh^v4!qR|*Yh~o4;+?6TfLk)C_T~8-O;DQ_YC@wX!!UNx3fF(7 zvg&dgNnB`Lg8?V+o1SC2vgjd1f#*WFan7Co@`SRS+_#EBRl7J|xt=9lU|_I{x~<9V z1p6Q~dIFhz&sqm8(D=Uv^3Ly#i73_~&UZb{nBOR-W9d^v4S~Ypm}_k@vU2xZmiPAd zb_1yIU9RFp#Sd+3mo5h-V^0wd^wx#&`FasGVK#(2n?<|qIHT5#VHM`?WFSpGP@HvM znf@R0Pmia!vs_!LN%QPULhr5%+-P?|&uaDFRNFqR8>C(Ov(NHeM zI|iF?HY8=F)0m}`Z7m<&N!bT=?Y^N#p^6fm;n)t=H-s@#Q)~P)-k#rc3k>FL!hYQG4elFL+L!{=tV(SUyCDEGNXP&56hTl$yMTEX9IrjukZ} z03)8|pFN%DV$x_D_{m^24!z@|ZvC?xZM+eAbKZZ9u(e1C7y>c9KfD_dgf3wsYICJN zV~#kh(5{*x|Dp=LPX^gE+SUy9%!#jJ17$R5hxTS5U2y-_6nR1FYtOH*xKTnwmrM9| zTJ_kIt5{vh7@bf=qZXzro6YtuhL8;Y!(vKp9T|yN@oM?)YIeE?_G8}{q>e>)RLP@N zQSvjz1ko$FB`JxE*WDgW;{y{0hWN{^Ww?)D_8)tAi_kIWF+Q`|DeXodcLz{|hRh`E z*FPwWWKU=l`rC3;8g{B6mFz^vibk1mIKtvElRL4A!7025r70`S8kY6`(R7gc>`9wHfD;^O-mbK4tWz}vI{I=61Kma#0_DIob>F7xd0KDkW*bdy z3bfdaIY1L>Su{V+gi1%kRJ5>Rs?z(pc-!X9v5%xRZ}lmhtYwZ4Bg9O*++&c{s9KS; zT9DZAQSz#>fn%SOSf%2-HBGOE4^XII9H2MGj zE`#H^gU8thS<5nR)zMh3oX5`L+92gIzGMyD3rK%070EXMfpA&ONXm#mMfJx#m_?c` zFh_2ph;{Sp-=)+QeHF(*l~Y6~(i(97tT-s>%;(0=j68f78_NbGZ6q>%{Blm@0QJHp zTr&@_-0@#=MPT?Zn3zqqhjMDt8 zF3APYtu(xpeiQVWX?yMd*1q>W%v4?Yoo7*Lh7Jh;i@UDG^RMN z-!4ClW>6<>dG8?{xyAuMRRstF#&!BRaR?nbJ;yq4TX7Dr&<=P=k-*b-NM|Gd-?R zVO3O@d%eG&8f^rJfaGSB)Iy#&>aK`U`g;y8J0N6kHd*4YMDtNz4jXkDz!LFFRW!-` zgZDc8&a~Ny{k|gN*k5ipgaIPKzhAbQ>z%2K{F&k@YS%la#=l*GOX$qMw@M~oM#6l_ zP)52k8xh(B?CPEqznv{tHtP=&ol_&Y>zWJW``Z>Z$k9s^Ch3z!e8mBQf0p?v;?0Tc za?S^3mVe{96j=EE(?ZC4@q=f}!1<6lKPbHeKkPO7OAmCfLnQWSt4cTGDMoQ=35<4++`KR(B@Ee%|vhMPJpwaiLuP0mwR zkbLwI!6M`L2GGfyckiAPzPIhs^$jw{1qrpq_M>$pb8aItc^{|>M}I+bFb`$Zxb<`K zOz&I%&44O{p{MXbnE6qS&eLsVHK(QY^!mN?{slspl{8-m3^~sMP>uCITZfbJsbk0J zNkAAJf9Y@A-;9@WM8Dy<#m!^@(ME|P8z1B?ZmG6BEZfsXF(8veRbcK4-j-i&&yU}+ zy7oi{VuiV)QL8v}>(N**G{Ixylcv}6eYuE<^lL|cvzfErs$MuUsZBdp_!&6r3A1M? zz!d%YVEEgIWq@xr5KZaqfaB~g5)3;iQZnx(Bt~+xWx+G^mA*#V4saDRqA50Mf5Ms? z%@K(Sox6f~_Rh8F(rf8_yQ@GZuJ7>ZtVG&9=`C2|2mh9M$$=;SbO!f_qN3ILEC*_6 zuz>aA?^x~4^&x9KbdMom;v$!pbR4#4pWDjeg~|4#qT9;9QE2wrI{me5NcZEl>;gMS z|Bf4a)W866H&ccO+y5A$pe|?q_Rif;m9BJG1rH<=1+V1P%Am+f2XsLuSnnHi0xg}cXL9tmL#x$ z4qS?mL~{1Nv0h9suVvcqB(Hvt#p3=T@M~CZ3wqyIY{F*`8pFukNnk6>xcy&+T;z+v zcbbDU>*U!5SqYJ+g5p+sKlYyoz#mfh&4jX^QEfc!0Y8favcMpywj$!1QXfLj?Qe+@ ztL^@#)YE<~qLg2b$WnsqI*8B^1~U-1C4M6t?$BmcysdfZgNa~3x4&$AzB%V}gartN zRcEJj7!xR8S*C@*zJ4VwM@?2>gw{mUOHMbmv{q8|hB1&bZS~<+aIl~(>?h-IZO*rC z0NcC@Ss$&Fc7)a91czW|HMdAm)jj&?irYs}Xkhm7ji;J_H|y|nx_4Y3Z9}gNO|AvT zdqh8-if5?mq>@Kuj^{9<&{L}$qac~_E-A5pXxZL|D@0 z7RAn0YG_~BF8gEF4^osdvaQqFARThOFV)g#rELUXfk1ozVm6GEx@G(|?B?DY-Up8i z)tPKl+)3Z7lflBVJFT*;N)A6;vqw|eDGFS0mTFwUMbITeGL0!jOr0MR0l5yrekJi@ zSHf|`03CK$6jcI|A7#H9Fu)Ro{(F!gyPpm)Vp8<2_@G8f{+_yOdrsdNnD>^*3Cpf_ ztQMZ+ySuc5trtJFjP_Ag6$5j4h~b!!^v8D5@FBM?KADFaf(W)+sf8IIf+T5f-PxzS z(=ndCWBSrYlNsCy1>~Mf=||z(+ayCatmx2EM+B}n&&%8=I_GdQfkxwSywaopn%h^6g-{A(N zot9;xhOnqN7AP1_>FUbxs#aJvL?YlUxmW7&bq2OhXxjk`@ zi2|BGjccX@je-!`d2!J|*-pZ0b=Gy-$Gtwe8a{r4b68U-`1*K}m)9@2rcbrml!>0% zQ3mE>9Ufm}8CzmdGZ%RQV_Cz0@%{pf)JjosEgn6}us`9rlyEGmh?$o+LAlhx`f|H6 zCDiDDqzXb?Vk{LVifwrEFORZ!znRLa9c`y*Lv1!(agV+%&3c9YSnT&-aZefm#jj8L zIMsL#UIIdxlhg1#n@qXPJuEo7Hav7ns|Ddg1HMsjo0l)|>2`(*bQ%LhdVGlZzbfvL z1+uBeSinAfE@{X3eufhl6pFhIkFjX|M2nc9VD@aeq#{Xc!3gZSbfTd@IsTkzO2lb` zcXe@0Q_2`caj-rzdtc<6CysY-|HGMcSD5P&`9hNKKUIKCk=)z@9S|hxI-$Q9bV6)Q zOmpFehayt*_THzY_>HL#y%Tvs224 zbDKOQiI8ZX75B&i(N33~{$m*8ClNSEg7vlZtKD6Myy3I+Xq=<`(5hPU6Ig_5oOf$& zALNCG0M51fqwOflhb*Fu3*=55s_f(9Fy?UNJQ)`icfR{_&)Pdcjz+r#taLeV!kbR|p_wbABov6t+z70!rvN@{f*21 zddCQsDQs|{WNvc5#`3TVJ@?J=A>+MvyIQ@r{k!(~66Cx}ny68YUaZM-ZcmUKXT+Kl zwlklNu;tNa&9Mdpsx_(lylPEs_n~Kar?QCnIVbuqge7m1mKWVUf=40Xk`-94B&^#K zp{GPVeC$g5`?cavrS;C39LYR)O0}ED9mN(giLO0|)^t)<%os7b(Q*M z65O`-oDKJ7E#kOC?->$@Ez$E2H^TKb*u9dq z%IoM^r(8DOaN|@<4O~usxz@?|c%mi>?~=BU3oR;JChK_!fy6k+2llv%1p*VbpNKJY zZPKj=k6Bbc(zJ??Y;>|tZVSqI8jR(UAqcx)wPiF4Mqghw2#!aB>uz`xf8wNYzzEhy za!*1+svrf(K_+ucXX*9x&6S?s8?vlx`T}-)cqff1&1qz{N;=aY+b#5ae$azzD9FY| zObgRY)rO*py1ifQd<%!zsl3Kh!b?nC9128*q&>_>yWmPtrxrAku&XdkpAj(EMs%=q&OsJr%dUHU~JEd!U3R9>$o=_ zV_P<+#cW@WoDend_eZfac9;8ikAJedvj6zpbX-bR(!bB?qfNBss*83W@2X@n?bABg zT=r`;C_G$9rIgkZqmrc>Eqj`>_=M}4oi+O46aHB~$EcZt=BGIqGHI(KtXgm|4?}OK z#_{g!qdw0?sXR>N400#*#93=IZ|$Z!Q`kBbIlYN%wN;Z$N(+w>9kPLnq}Qb5EcXioszC|$(x(7 zj|!qX$a)G4G-B9=lHv5<4`Wd|4+p7 zmyFE4OwV}?YvyzoeZ~SUIg~Gc(v~?IKM_^UAWM1p>%)cvMuWg*JPPp6u8iM0^=>-H zh7?iJ#s1{6>QbF(%)AW?O;04x1;op|i%UV4&!Y=zz=}jXo2M&hy5B|>35bx%G+Oh% ze*Id)G{(TAvu_n^$$IjaZoMzxl;?|gKV^Lg;-#!YX&2AwSSQT{w?si+!0^T)3mVtY zDx^E^l?yhXjRA(8ry-S}P%!x(K_^JgUiiBR=^+?a@o%|XAUrw_RMMW7m6SvowM>!~ z7jGL*IKL#=pZFkQT?-*vaYPkOM`8g2@w|SE)#BCA>Vw@tvxOOZ<`aQZ@A_z0$@1-%#l7XML`&+Y zN7?F2rH*dRnhH>?aE7>CY72e6AGtq%65KzD^k19ZrFQAK0#4+gi}1?-_H1&(Gz6;o zI5uooiKdQ8oZOl(PPMLThg=@39ne{Jn(ob%GoM`93e@I0mUdJGb_GeQRG2vT?VY#g zryR7^G%i{Dv$L%k1o$;bx~tPK?y5-EY*3|JH1-FAVk=37jFj@fSQAd+M6F<`y=^09 zgE}%`a9y4GY#kRZP@%l?F`b7GIgz>Ib}Wy&lFQdMJZ2u7IKjrp?+@jk4VtwOQaN19 z!gA8Ng0?QBuXp$*Ry|%(RhC3) zTPtT8T>2iew4)gHOV9m*oH{$@T0ZNK-Ku(jBIlPX3G%ySL} zna7W{%mIN@>Jkjg#ve`SA`2&tojVo$G@QR;@c~nkb^FHgWXE`(ah4P973Y|{tw4cM8@Zym+>2`AvNEG0k)De&rEk&f2i~rcz2pvvfktQf*_hj3d zdUicwOln<_o!0Z}2Lq;x0ED_ZR&D9;ylxNvK)m{Qp%UvwwflVbt1YNe;Vcsn;>?CS zQV7~@^K}$O@VP8Giwm}!Qrr$j{RyuQ+ty?f_)!5F7!D-i2ZgSqqU}TWe{d~B?Ma?1 zZI8zFP?{)G=0lI3coV8E!1SRqzyBL6_!C6!jfN{ZG(x_hZdV&%Q^kL1!>&{?+AuP~ zbnyH))EtIyw3p|{c^unXft-PWR|J~uaxYjt^P*{8pJ(w*az+*ewZ2JoF6eZrYdnnY zwCS2yX+MSLC{yJqC>(+Lzb(N{LLiR~sU8%RbvI`kAAlf;;6!#3yd`Yi>RzyJ_B*9| zHpVSNk~#%>S)w`9^*{lsHqW`t+oV}=bqKzl2FM*L(CVi7&JCU!!sk!m273&8VJQUV+BX{ltF-m zhmpFP=X5kI0mM{bif%wIdGBac4#5h98Mf2rmBpsx%;e51I9YNj9!Ck9wX}a|8yM3k z_`Q!OtD`e51fuvMGEPM)?e%*o2BMpeU3UjK=OEMsa?pK0%(i2SI6Vp zrDQw_ZQItimn{-!R8W#{7QNE$d}8gF6YQ_rJd%VH#|1kI-Ch4|j}0UEFmBKwdkM6$ zU9$ANB%zH$^{T3R1}Z(3Kp)lN*&)D}rvY3Cz@@$+@fc5hC@EDs=;&3tLZ}8TXx2ne zhn>Td>Y}BTp*-%7ZV#@n1jH5ImsfsSZ9NJ> zmSR%#`<{hRcw`=23*p$RedXtdC!NSG)G=4`)iMz%rt!eTK5n|!kp-ADm`GeU=){6f z&L3ylcombE#|R@mdo=%(F>L&geffYHh-=!WTpK z<{{WUIrMht2^F;e6=SKzqSx05X7w7Y?K9JG54n}2XYss_NF*N`+f_#6-(QL=XzpW# znbR&2+NbWFToylicvgjmgA<9%K^&lc#_RXgf`rQ~6~O3qr*p2r#zcpbaNGR)8q4l^ z?Up5*+yk=O;bSMe^C_Jao=HPP@)lK#0f6Av{V107e<8Rz?le*gj{h$NxA^XBZPc%> z4%pL%l+9!{4B%ruaQBnP)xN(GE3;usl+<58%ELmhfh^cb?yO$G7pkwKgvHZAHCGnSiE555rvl-O#}N`f^p98zSHTpPBpIU@yT3tukaToIm_ zkduE_$cEck%_3#1(RrUV&$E&T?3O9I`_s&i7d5Hcjg4KNpnmndA9QUd;xZ!w1s`yL zmzn~^nr#>T31P)bf{;BiI6)}H_C78ykPc&FV`FPH`OE%&@TCRe^86(KJ!;s;wGXw1 zK=x=$wI3hqmwcEu?IhXI%S}ZRH*y^K3{q&$Sn)N=kUk9@pHB1?Q}uE+j&;S8_l1wj z8n_2ffTH@7P%nIggzdTr0?P*fN6#i;>Ucp(UmS#cmG^;Us`X(%_hA=JxE5DpJf|rE zDk^GU0%r(NsyjSwgeFHsU;|0Tr?T>(4n$5Oo+XT4;Z6C?CRpNAVgJqxOqK4f%Sy#4v!`gpxELbEaYv7ylQRJm8e z?`=e8&e1%*XTgO6fs>1S*1(OUHUu@DJPe@kTt8@d9_CsPx^?7f)L;1h`vxLdh4v=P z>rb(*L*FZh^4vRh(LS6uuXFoqK9+%CZ^1X9PI=)K_N6)P$P!GB@R-pXf*!Ik(lbaD zL?1_gyxw|&S?e&=m)w`aOOaY(z#?+k|Bxs{(rHmjhAS}~1Ohlbwl}_OS7C{9x2QwG zg?Gi(r(Xg#eQc%#j!bUs#3sGkBcwq_s~Mj$Iv00KAFG!o-mM75Q+-18)Ce2!i1>Kj zm~)v7j|@lY_qVdXvPDQt?WR|7g+#D0KJ@!YcMZL|8`Zu9dsW3fh;L$?6+*r;ORV|E zQ{FVT;*V9f9m7;l-~vQq>kCDOxvEyehb*dgeYmjwr7`7Cw#nBfx!;%+=ygRhZtuUekse1h>gF_&1-?U^qU25-$51&t<7O_{G9pT#^o<+D1j zf=UaL_A2upVkArbAgjD&tp{P0<#<`+eXwodFH%#l)yHz8)Pa)>2{WrxpC(+LvEOGf z%a_UP2ouX&Gf0k|3lV${^xWd*`|V|tOP8MhpC5_Xmf{v|4UCy}8{))6=>tRV1h7A{ z;Mu9UN3BslVzWZ=6vh)h^tp-nyNJ7I%R#ZWOXA)cB6IE4oNlKM=(Y6Mn-z*!qo8h3VVm$Y{01)^q4^rH0sj~`^j zf^t_3+T$9VFt1L0fc!R}KbrIXvEkgIoEDcV*r$w%D+CXfr|5A|$`uR0W-O8H*9~aD zVJm#PkNI*0i}iHAbsY#WQ*gKqJnqtK8CS4MK&#?~+=HNMBzQtuWtVY8| zV;J+ldUu{jt;^bry@T@BRS~^ARpA>1`C&{-r(BtyDCU8!?Tx3ex{#Xoh*la>M1BpJ ztxa;t>lQ5k&MnLWGS3pGBZnuf?u)Og`o7L!+oK{t2(@ya2Dwa%^v>4{?-gk`n1ZsX z71CP>OxAdNhGnABnoKA=ow3Yr6FQUE_C*jvEW_+b#P4H`HyMEBk~` zmjM3n5Eb?V+Z3^Q5^=bu_<_R~Y?IgM))KIbKt-Ba z!cFd?t?mhwP3A?H@Zg^Bl^7hK~hZ;`z zL;5>ldAhar)6w)?a=~Pz=yMnqcXO>aRZy>rUvF?q#@d1TaGS(hc}9jv$Xhmho z#E^!=vkJFWt+y}JyG#pb%nf&V*KhRqq~qT1*iTq!7L=rVYnH6}$+-_T-4b#>=@eA0 zi%{bxQm97!q2#XArm2p)8GTu_i#q;I`{D2)&T>9zTE}YEeta0z(q(WSI^?YuzdO~6 zGjYy)F0}OVky5Eln;V{3ZtQ&fZlei^$p$3;jWg14_Ssi;(={{o?v(KXxTq+o`D_TvDZAOFOxu7| z=FiPt9m~lP!*eiD-SeCL2TzAL)33aQL$+ZiRa-#R2vhHyMX@y$mb{~OiK&z@R~h|9 zg>aJPtAhOJPEl80+QeWfZkcqT8Y*Eb_!9cX)65M8*Wy|;kr}a8QET3q+ChUsrT&;1 zh*tb5S6dQzwaak#kA=YD`QEccO;C6C)Imt-h0xw(#oDXv_MCJYEV4F03>20x8??Fj zklpco42^{TaldY)JFY=uyi^q3@Ij*kM`O7K>Y#-)9@<`I#rD2q*3yIywS%&&O2Z*@ zMI2GeDj}fFfU@Ghfh{7Cy@D};(?L=P25j&bOv;5=5cXB$^pq?BdTUxZIDbL4JyIuV zB;AEwH0=$jxwAbh^ifhTU-GIP9{j#rSF;>G!FqV=AT_Z$zF0XY83-p1sfVE{k@GPM7p3s?nCEq}&Qlz`2rpj-dDIU1%XvO>1(SLMcTvYQztR2{VXGdlfBd3FbZm~;6xP$8BJ31nZ` zbfsiPqIH8)3!e0eGS?8&K=&;D|uxldDd zVk#cTzJ9A@(lNh*#AuBncnYDf{yTn=w0+w)?7P*S-}Cdd`dXo4MlxG+A)H@9)nL`8 zR?9l}{oT@}hJ`0NRO|ksdc3wu2ECM&tX{xT!a3B$!f~@|p3cPMm-FjRfts#1SsC1i zg!(&;O5WFWyEp2IbqG#F0_E+rihzi$#N1Bu5t%?tg*!5&Y+=8Nuu^niuxg3kh;7J` z%DqWh{?(2&$>e&Eb~Tf4YAR*c{Ig`xfCm<*f3egsbnfmIk4HG4kD=u+FcTFljlgzb z+V{NjjAJDD6P7Kx-fY&>*REq(B_7xBpPJ4Gi;Q1wHT(3Yc+G~Sbsi5v6~Xxc2Z-){AdUDD~J*8)N16fTv0Baa}yuCQ!Uj(rZ+QiKGayP2mQKUYPxz-2r zp1XLoODEc$E-fuhCPgrLZ_f&jpWOGm?H7H8e!jAzfIWE~401za?Ohe4xlK$^+w1im zbrytik)t=3Ju}#vCp_s=?Y%<(gakBvq3c@;0oJZJiv zAN<>~xK)a}?lgB-pkpPja=djoDyrTNQkP@zTUJ;o>-={`U6i-y-rSp;>QQ7@Hk}8G zE{LPqazK+D5iWbWwJIFi<1uJxR}yF|p@#FqoeZZ?#mKsUH#m-~zY}KGpUT{>Ar+tE zm9R^XTY}?G$K7SiGvj8~h0WSDD`a#iM_KiRBUOE#!FyEyU9(5CN!>`(8X&-k<1&BH z+}sTJgI8Vca6hGcjlt#Yxs}2Z)i-;b{gP;c(QAsu<<8401h9Mo(zgd5;Gcny3L7N> zx0nWC#m;>|R)(R%T0U`FoFIUI|Q@kDgM?#4H5|Fa0 zzw#`zs2z1PdI0b#{~D1VUQs$$63$d1v!t^e$7}us)Ud+6{lQCoJe+~2N@)993sING zhV31Zc>R_{uf!KRrwLXHAaO-KvUih<9YJJtH6-vy=BT!6(F5vcP~)@}IG8|-$X`T4 zU6Lhd#tG<<@=29$X(w(|0@AT=&u4nj=+}8e$M~JVdk+Zq>WRMEgZtglX_HDXDS!Wj zQ@NhcMZ~BX`ny+{H+z+|Ucm&7VVY;k(hr|QuQF(nLmPMiD%ee4E?i7vPj-O4`lj+|znhY@-B7#r;Ux$D;zDSwN62_g2 zlVg}NH+5*>O~!Y0*ol@wvE7y8sp_1`lT>Lh4I~KTp)|${1R-Nm)CJIe5qO z*`qWTKPGvrVm0FP=eA6#C?Kr?w-WP?Z7quU$3?z@oA^UQBm>=k4xB?-BBm5q5~#(j z+YE4HB$sb8ZC%be4HW?((-t=UYo+JXQ$kpBLoUQrBd|Y3__9~Ki>3i6&gu*4DKuzN z%&R&S4aP_V&T92y07=pGHk=+gv*AqA<@Zy~-p}7qF3t>f`HZM9mWfpAAa(7J|3D&7e+agPDoZ3`;rg;}aR~?@}L8Ecq?O20+=cihjT?=zn*eX*8d((Q5*7V3Sl9VY@j&FE_{Gxs*2}9%2L$HS@zo4Z zl`ah|G;*JzhgBzJMM;hG_a2b@w#YfWspm<#JJye<6!L3016jA@V>4toLz2Ki?_db) z6bG*=Ja8ESg`0wQkB`H8lJL{fT5lWzf;((xUFXY?tDL_oT$!lde`M2KqyEaK3DHLV z>Jrtb0+cKngSazz#1|J_8$5bd825yxWZ%7jKg|1urz{OqPsw-@%FF9^&F;?aDU&QB zxd2aYye3ifJt+0A!4d;AmGu}}92w@{oT1Wr7=%}Q5B>w5V3&O+fP1tt3iWn+jV zw$>A>GikJb#`mB0mT=>?9R3b>eLEZu2fFX`LP2TgMXG>#8}8!#DX?+>(UnP#>gF7& z2H9GFEe`RiexG~a#yuALS)~`Kv%MafV@Tx^Yc$S9u)hJ$ zau9SA+N$@nvJ=>>b)V}1z*Yr+#%*JRm!Il4ftJ)Fb`4e|B-_6~MdUX_#8d1SkXc9g zL0IG|bKk)f_rLMeA8kbK>BVd!$XFBxxkz zKOuAtKicRh2?)nslQ$<)k|w&;mXG23m>i&{)quI+!F$SM$pILOi0yzk(xh`}K4jab zuucm-?ti=fd9U0Y0YBy?5*B&iqgXYYBMts)J&fHQ&(8$SfpIvpLimq#z9^X;?;^v&kt1`L-l&`D&au_8VP z0#J^Z()b{Sr4Ljlnj$l#Koi49DXWl3uI9TlglGou1#k*gjRnn+fP{P6VkvM12ye(3 zBV9z~SZJCa0X^xhu$ndTN9Ft_3)ZdcmBPQ^s}6_(sF2Mwh%Z+jRX5J1@J_zveFer_ z)f{4wZS+OPLJC7qp*X*AT_e1vAoD@1=9u#RI`_PdfgmB={UZ<|a-a4BR~q-m=1bC) zu5I$@Wff-k4lUXdvu|bl|3L~w3%iF-Y3J|q2LeUYmByOjo^K%6p z)!@83by=4~zPhPi7}g*W+M1T3*(L{e#pXSH1w~uLr9Jy&@Uw2hyvP?1O{Wcj9&p6B zH6|j5Q8FPWvlKL-Ljfa2d$u?`)%k(GFm>-ozB3B2yA_oGuxiJD57af`;M6Rz9NlYz zqThLDI7*_9QYS0rP#rwtW7={4G&G#?_YE7p3B@I8UcXam`5y{u5(NKc@P9*r; zxS+(|-l8CA{~?}fd;=$D;_Nxi0Ff^GoR`u>&hxE+Wx`(~uZo!>bH@-v4T|bV3$qyu zPYNDEqwRWf!?xTCxc||jOp-0K+|Te;1?1Qs<|SD#+ADqN7q5RH+zWM$Pi^-7Z(=uc zq>?m6E(3I=Gd1z#v0<5`{$gQu8~db_=W1mnFCb(p7$@dHB1l>3)pc&-<9y~-lFJur z1a#wS?id0tM{ub*1MQioLWbe1DQW~7-QS9M^X?j?YhY#l#4SbH7$5xW-!51WtfA!-K3I2 zd7>V%;gY1y!9Q1ne;2rhDXQIR%8B$jeC%!~N;5E-6M%^z$g8~@^g9)#p5J{0LUpUx zR%8`2xWg0@qrama_cn)`|I7yE_Bv7iRD4%rynly_5fT6t!3z@3U0arx!Wjv6eFrU( z8Y4jTUxxhUeGr$xc~>d1-;mmO?jlaO`MZX0dwQ2c~JGbGw$5`0atpLQqpK7InYUih$Iv z#~^Dc2-H6pBt_iu-Pq5&T9N7cy@SRKj~W_?9zq}3$Lr(ORY!{z+$Z22X*RLC0@<4a zbv`&7X;UKToaGECoXtEa~5CnBatcFYzJ6GV?opT*r3=Q zmmo2ogmzXqS5YEtbV5PCyA5l;Dab9BFMZQ^{e8G+!e7T^=dWV|JlCM4pc^+?!@!Au zYqlr`CfBwzfX*6kG|dw4@jf&|`BIjy!3c^~v?7N+iCN%*?;)6E(PqN_;Tc35N%@OT z3@?{arDuXJ5)~8&jXrp@sZ>IUd52vAba!BxB@pfF@nKQ2I;jFmO|l_h7I2Q2aPX z^n>7>NS1ZnE~#n}j~S2_h+pdup9w$8dz0mM@C3e?!dC8Mlz{`Cw{(Bos+#b~bfFd| z{}lqzR~B!(tw`kQ1o+e>WSg3c@hk!4v$n3`k7`Xw>kF$sqy}{bUVlG78m#+jrHyfkYjvWMTXZudo75d^51qdF zSLK9A!4f4xLVu^5eKXxIo;Sd>2Kdhxfy)Fw0R=r=3-q%1W&I2P*&W$Z+i_N{!AgMS zM%3aU385svaHXpve4F2&eRepz#f4wgQj8Dy23fL+FM%8w9D@U)&GdT~95SImS6wx6 zC3QbH3#vKZPkw|cZF-e|f(0BN+?z|3r2oj7Col2x(X_`ILFK5FQ>l1>Om)1hFH46> zY4U}RA%@un;u}r>R=#Gv01btOB_SueI>NVkJzuvH>$euL8OWwlQ1%RA`Js-EJ4Fwd zu}=qFgU|d}s#vC;2em)nXk>gELMj2#gi_GvHbetA#+dP_5|r?Pzb@rZm457(}D=B$vbJ@tIkCy7)qy@Kk$5M5Rv8(t}K1`20KY-WS z_vUr(vIK!i{I5;r%l>sxs|25Fzv}_pYgDpYE%tcXE2QdQ5f}H&uI}*5qFADek$Y=I zHQ~4o9J(r;J0jC-x_R|ysf|6Wz$L}&gAw9Be7LhvYOf5j2F;wMl$vITWg)C6o}({y z=2e3|_L@iMrD5Ki)lDpOjz7apSgTm5wauNP;KYhs+Ag{t!kh+5JYg=$aXUHpmYUT{ zTqpuVc-@!|HhLY}lqRM@FVEoaD-hu{tia*V>qlDjXea9;10WZc?FkA(ft%ATPiZW0 z@sycWskd6KaLm)Y+VN+GPyb0z_5XvG>;DW47SVVrp_vOT^s zV0ri5IWr!!sJ%E4%sT5;;tL=W6rjB~GooHsvFd!mrP*;LNmro~T79woYAp2Ip3aq zeK-393i*g#k@23l1nIYWD{u9jC{lZV|CAI8 zTsqmd{(g+yciICN=z@G#mKWvM-fS8#%J5Cx=IkFjjk)nJEt9z31B3dVB}>9F{Tbg6 zn~?rPOMIUg>>KQAnD{Ywe)vylXP|u8aoZ+Y{29}7y;`&@_Kg=y#c<<<=qL&48V7^J zn^w43iT-@$aX+)snXJ9fa#%HJHLat4%v3{@8$_ogq9`NTmhy9VQ*=;1Ce?1HOt^NC+BjNHj5Y zdXW1ObZsv6?atxTG$#4wQ*V-Hs3lH;fXmYpJlOR2`J!%i!n%{47(;LSEZ_%5ih^|- zT)r<28|vxk)isXOh34{_*M~cFjs60=A_H90woJTYGaT5*a7JVg4DP%N(kYQYnr|ht z={b90pQP4VT4px+NrK!N(a_NFh!#z*@a1A)k0%HF1{{Jw-^M`B{#7Ai+Ad>9wL*5s z7a!Omu(*So>JR&eAGTu>&_Kg9pX~DP*L?F{**hwcub!M#LZ;=a%E?LNN<503T~X}d zQr?M3UZm}6Xp{teljeP~;F#A)mz9{Jpa0&L{ASJd!je?MUOWc>w`w$-A>cbCIqgrH1>kQQ^MsYmtEb{ugTxiipybv5(wwYxwWT zj!)3L2NsPbJXZS@%vSi}RZc73GXiV^f{uw|-E+`KlfICXU3=0(cHfdVkd(Hg-A6az zgIA#_ibexzpkn@mzt^Iw`=J-XpKTDGiNSkLwJS+LuUW9V;*AHVB4JPMF!5NF*YUZ1 zxdX8x1L-t+_pZo}?6j>&J>`yZP!-mX1f&F!QG@Snug^qyv(Pxrgzm3n>SHX zQYzBd`)2arjm8g|bpk`vpHG_}J>YsPHuYrUQt-dmwLG6TQ!t5aZqMEf zA9;HtR9O>cNUuHkzH55ODtnMWNDh-l2pn?XKyWHIaz4iG9c*cMUIe9Ked|?9cyx|? z$48G+r4pkIBD>4?~T-AS8Q#GR7W>j;nO;pfd zSKzTUi6`knh)L(I1E1o+y1}`@k4dWb0DJ+qZTy#(=fC25HqS(pWvj(d0gLM+DbM}T z7p>jtCgd6=WVwTUGM35G49IsYnN02+xDLB?UmDG*{b!tvwC)R|{WZ`QQa`^w+L|x> za#YyZqVzs6u@cKs2sHFT2wd2(iH4ih382|D=O*ZrE*Bu=f^!i-Fh}xHnYF^LYL5%q ztlAUd?RcqrruIwzWwvZG5#V<`2o2csc|xvnff07S%Ub`F_zo%F%($sh0flfb)^+Gq z_xIv#N?@CUUQpg%hnP2?9Akd4G%j=vY%BE{)Tvg`g0skbR8119=(rm7yJxY!YuV9YF&@`R7J_%ccj7^hAC z{NwCc6v;lFh7VLWu7!L3p~9;RH)oON-(iq`=DCGJFJ;;-HH-E^RLkk1KtMQko-|`l z?zS2;4}ACTv<0(K4*YvhqCLY6L1Hy`5{EmLj}=5P{3$Xs3V$9}(}q4!zY9u|mm4m3 z#PStOp?369*O#0gB52NBd;DJ>?5TYhVQU|@nQW9dUhS0qa}sFhQU7}<6Bt1Q%9ZA~ zZ$6EE0tqrNu36Xne0F$DGzBHL%C9LMv-A5qiu=9pz>}qok75hlD+|Nm_Et?|D@Zq*NuiF<9 zP!Le*76XtLP&xzw6-nt9B&54bQb`pNr9(neq`L$}l#uRtrMnyM+UFe4_uU%zj^7`S z!5N+*dp~-o{!M;o>|!Yy<OgS7ri$~#u+i;4qm6y0rr|^l=fuK~N@x#MlQWGWf&dmV!FagUO zpzLL^Hc_9c%v>X^`K{>GGF2xw;E}nmf8iNjI@4zqhkPfeyWStIM~D8ojYR(%K}suU zU+$sL2b^1`+2w~TmGk4ZqRdgj90UpI zrft3M+ef00c32wgGaC+d@N=bqHkfP}hUwHG@hFD;-`E(Io--kI$m?-;COp63{Uev+ zb05d|T3#%<6KdiFwaI_hq@Ohhe;~K=S-3wdD7V)Bt3v0u2)vo7` zui%S5A6C#YzI7uz9I)PNt&>-zWZ8h?g=~;Qv5q}NLlWN_#YmX%VHtpy}$~5FXYOl=dq3jg4s!*KfjWu^Lz^#-^AS9H}}eNk{$Le$G^7 zvm}UX&;LcFdBlACkVBzlhP(DA zB>uGS+4133ZB2;Qc+DSk;ya(zwAx@+ys?LlD72Xr*L9i1I^2ZKzYL6wI6ffzb_Nr3 zAB+PCjh*>kvVXas5z0r{!XL)1t~oXeFo&=wIXWQsu4}pGlFQz{o|kXa1!>*^D^?e$ z{N?gauZz1Eil{D_IX22o5GYjhCT?)pF)5o3{&u-osM!Il=aKQs;FPt4z(x;;2`$>C zBa!n7ZbkcMM$)dk{HIomM z66Xa4p{@Mg&2X?!!F#7$Ce&I*}u_>N~C*zuo%1;N@K5&vFE8O46uwT3+SisMd{$ zjO}gNS>iT`>kI~;!SalXWxYCd+Enn1qEiWSMjVu3$or!<@mj&5ezlBlWMk(OE3JC6*yjLBR`eq^c4q(>*cBAS`bi^=FrsEQ}DknQ(L={T-Y29Hs31YWQ$` z&h~O4c4>o9j8fQ!=iSCIQKRp~fXzC0n_Vj99mHVsDun-d6B{c7Z-ZiDV$xse`E)eR z;e8iL!4&kzf5ySzDJWbN5fQ0fC{8@=PCWUVc(TQKh*r(jeEsUxTICOOXfD?dtB1M9 z2gU?lyrO4~|Df++N5fC+39am>=H@KP8jiA(hLK35XxmGOXHsQ3p|^ZNbLY*Dt*3%H z&o-edG2gKtgv72epEDZ?}~@49cYE(PZIzCBqf8*SAb7csYC z`H_0FmuZ}gXL&uK>>ikEtnhZvVK?5SOD5aNu&W5Rzs^?vGRlw7LrP0|d=U~&G3zg!meXEuI=maZZ?(<; zHfWxIg?svw{w#-HxJ0&Um@pRNS4VuoVe$4>xo*Dad~ybsC^y#EKaXNjOX;5X>i*k} zrmU;ylG^Po)8BZ;hQ%13s`TmEcs7S{VA$0wOStaQ<`4Bxl?F=5v+%@b&lE&RbUd3F z&YNVk2~r{s2*zjpH0GH=)Q4MRFDH?vNY3rV3VC=&v;NHbY5E)0FJZF=suw3M>Rl5l zPj!~hotRQo`1t%O1{D#PKMD&)mrm&}UaSM9nS7U#e9ymK5sy$|*Zn@{@mCd-_TTX~ zx)!MCsJ%HxSG|3Ve`;xaiBBpHTg`6C*XQ8uijnh>RmhzsTgA>ll}lGA`QAUPP+00? z*vI9>pON~j0s7#kz`-YRgxahm!cawP&u~`W6#cZxD4O!rPx0)j^Lb6W#6M5<J$9Q%=A$$4P9j};!1B^mL)aHqT)N4 z)z?X!Ej~De?pdj=Q#8LR%9oMa_sMl{(iS#HD{314?n-m#f4ocLy|6`5AFirhtB6kQ z4w#sxOIhEtSh|UL;VIJpst>tTZZs{8mxXUxm+1fMpIkvppGqetq3i1T`1t!p2b6Jw zP5?HMu)UMnQ&PvjLSZ^8aUjThxyc~SVf)qs0m*A4Hu^z+%v`@1*d0%F`Lg(vs03-! z)wF>x&&dQm?A4l~@w2&gu#{_V+QmG{SJu>Vbl2BP8wB_96L3||_0-fK`_>d0Bp zD0Rt8O_XOS@pw-`1$8Lm-;<`Gh8s@Xxu`LBi=@hz%H+%$RFg&8R%B|AQPis^H;ofz zumz;qB7`>1!JugzN50|FRCSMO-?~x@{@y(CNNs25NN1udU6tQ|6$QioXFLB2^5W;( zX?|WcI;NDPy^oQk5@c!+6mgX&T`((gRbM30cyTpupI=>eg5->Ihd(>IzN@<4oNW6h z1IrCMy1U**Z~fj~B`7nVnwYEeKOfPVrNSZ?-Ltk9M#Fpw$LGVKGtwFJ!!Knz651kBLg5s2FI0ONXEa((Z^|P_Q@D2`H#En_H zvzHNWxls-gVic!OGG1XfysgQ!KgSsuXQz}KKyCN)Ygg+_X}WkZ5cl8@P!T$s)QuBX z<4!&1Ez3?G^uAqWdV*p*+2K_d9iL`L5{GrFR%awQBA58d4+bJ>Cr=r(U$bu$M1!QV z)6l|x^CAw{$DZdd&{<^q`msE*zWR3BR}&|`S$yR2Me=U)>5%Ntls||$IvQNE-gv${ z^Gk;iy|nb>>&u8=Rj&2~iTS@@-)P<2^JuVNu4%zq(@jZ}+~bSs08}d1>K{YAbNx?c z;CwEm>;CuXM0_R^gFgT z)?<7`7uAeK|aN z5*D4^aiKmXJ6+wl!}8aw*Ao3nipzegL`T`JWnceN`*7+-*Ng0E-s5Ibsf@ecbzM!< zEtn)&?JEFVHFV)8sn>IynHuwvGZdR0afyUy0HmUdhv>2H?MhZxYDeMV*jT|x2HCIu z{n6EoGViNA=$3uoZ9cuvOklj`DlgMk-@WcR^X%{1$oBCc^zN%=)|LnDCn}NIHRo~t zTd}rl7E-6J-E*BLD9!kl7ECq3IXvv!Bx*S;YtTp=wUMewl5tdN7Fg4NQk3VBsk)uhuP{DgB5Z$cyNjWo(B(m(mpZhv>C(>Ie76Fvx z2Qh=iLv}80k?&mLzcKAV&AHctX)Xf`Nd|W5mE&=8xgFep^oNBAWb_GvI}USnlEvA75<6gX~}u+h54JC>y836aR2%E_(qD%g9Ri1 zT<{TWK%jUnMGxQf*{s+wpq}U$zN9(1!q#|A@so1gz?UW`n{`{HKn|hB={~ zbw+^DMVrfxpK!+m!q0XjL`XMGEb5892mzftn8T!_qbnLpr=iR2HRGX_HH==59mU-= zeR%thDjUAzXsl;^V#F#ME%EHxP1S6bA=?+@D%~e!_x|!H=WHc`r}|;I^i72=jS95J z;^O9=#?MuZ2~rd4)zT62%+nj*zFsSDG5JIc>(Z`7zR8u$Xuw{Z-Upjm3MR6IS(GGP z@7N};squOgx5oqstVa=JH2-2ZMX!;_?m_%r&$|JqhwNuhw?)gg7yA)g{*ji@A?rF@ zcyVgie!j!)@#-$SmGGZPYm4_OmuYt59sWnlF`t@rO0o{)Es8^?c=T#h>(AvD%xpY* zqgx+YpAQ+>*q|LY-Oqc?$*vJtR-J#L_tPP{nN&nAcED8l?sFOTlamvngSphQ^74YY zN}?lAdu(*&X^4AOM{4Wi$=zf@`crj(^A1|5k;-H*=#(v{haOJbds=-ji$-+1V6)F8 zETQ(+*41tI#lp*T)wQx>kL9&ChRzDF15MOJNe2fW$}sF|U3utl;*qkJt+4lD;gNDW zEvd)F$6L<{prBO!9l}v_1v;y0YW-9qtEB_A-lE(l?Z1DwM!LAWqoIz4zyJ2)(lvI$ zoumEDPzvrqNTbh0*Wy5lLoXy02a)PDo;l(<=lMS*hw_^?w4qo2quV26FIkZDrfNXsP(FQ1r z5?~X)-fkQmjDmuKO=Ev%PR?)8Z}@0G+wQS5XrdtCGOvZh$H+=W+sFcmJ4O%iVXTzFM{rHO9G&+YC=_eEQ}%vh;>x%IZ2q*%uVN)JVU zHs|nerfu^zv74N~aN#Lv0`T$kukYrEZ%no~J_?DIcHX`0(4w;EJth4u#fVSx9+DR9 zjppjrCLlI4F;Vx5UPArL)APfn)-8jBA1Jtu71Y&V)axT9kkH{$;m&9q)Ps_#U7ulw z#SP7-R}%+GHz-p`8yho(hlj(^V+`nuK+CWhNb8_0O3%df0`x9m8RZ8NPl4XvUU;Be zFm;NEC~L%nL@@aUY;eY%i`&rRd-djx_+no!#(QD+JES34YHDhSR!?wna8z^D?T71jDNt znVIK`HD%@G2m%5EsD<6$f+a@A$A@*9euTGR)%fBM3Kg46g&mj4$-gZP7GivBZ#V8t zx*I3p7!1NqA+3>&n}9jid*#;Ei9wJ*CkCbQe9tE$7?9-?6fAU@R{~s?iG!nd%6$!( zfR~7f8o~S_fE?|2c+X_#*NCTfA+0S0OOcYomKWY82Em!RZ8uu4y(hY^(^2E|EDz3g zDg23cDK8a?1Gp)I2BHA~qIib#cUXD2^jWbed|}H_R8`{k$6Vj)Rf+ir3g*dZQ?0d; zFXm`ju_K1yrr^n^SdJ7;Jw_ykxwj0Z=pVGE_uPnps&pNuWhW*kzQt{8i2z>22C8+B{IFkh6cady*QuQilZre|Gi>dJUn2RaKQ=zn9TPzYI_HQ zD89XBMsU#<0a{tkzr_G@hpt^{m&qGPUZ+D%}YmXjqDr7r=Vye{Z$Vk88CixT)jPIx23#Yst zT)9$#=X3LM`r66i>gG(WwE-u&dWi)^YilcP-9}Cxk*0wq=<8cszkl+Mf9>s!0t3ZZ zYo@YZ`_7WT20+l6&re$~>v|tMViQn*dn_+3>Ev{0a&mHK-;{?E)J9NQ3ILxycnKQ^hmWwU0p2a- zt{?U#GBW3%0{$0eL;hqAJePudzjx8qFhb=TkRg|(l>KtflNSnr9%|K z5!n{W#1fY0$ywn9R772Ktj}IAaR^D{OI)+=kWM=GdQJ1g_f_?Zjn6`&{IXJ&%YPFQ z6Mx-XM?=Z!^2}0Tk$`40!ns-!Cz?5qAj?K$)VFc>$5iL(L!UQ`y1U7P1>3YT?^$Ci zAaDpcF5LUtGFuvH6Qg#CpLY&8VPZ66bSD^6iS2@w5h7uuD@SPt5-PO1P@=B zD2f15=rCI0AzBg49L?fj5*F2@{>84v?Y)D8GZ$%T<(@vhzSy4+Y9`O+MfUweLqiwy z#&00?n?U?ZU_|Xx7=Zo}%S$LNEsb*L(^XSz>6RCj)zqYfeoLWA$D4QW7@%+{{TI+S zeaevfJjc3rW?(3yH7o1h;>3OE^E%XepG`RV=`FNofqT6cFDO#1z(-z?e2>zB)?{&; z)#_&OV6aHWNJ;m;&H0D%Yz}>brhJB9>@=;Dl=VWJvi&vF{9%M0aSh6pY#pboSBAr> zCz~o~y81q0&sLiqIhWjUIp(V1vG$5*!LvP7B*wC~wmy`b)?4@$wq94_z8Agqu*92r z+C*Dhn_R$wiHg_iclm5WD%@XW1Bkrn@!Q>T@5FvR@8c{(=HP~g06YpImo=k&Ht>Y- zJsa4)zQcZrC&ilk!{aA`s^%fCBVKNghZ2Ryip|MZt2PPTcb4kM#}mP?&;0mc=zY9D zn`kmtsELpU(kOO!u;T5hA!=Z=2S+ zuCjl9-s>I}wbvy^m^i%FNqJw+grySwSKoYy;LH2T`=pmWiVKvw%o9XE}ShM zQ9Dvid7nEM5q(T9u*in$;Wy>8EV66X|M*Dl%$QM+3-J?`oJVO^VT2p+ARZxMi&^m(%i%k)qho(E zhz_vBD+~-XU+mdP@dTZgGW#jf$Weh{FC=oeOgodZbxpp$xb!1Y6z<2t#MG3yr6n7n z?8pQM;$Y$G9kSc_BnZ2Iu|-RP*0I+pKWNEJuB?PdL=Z})zJ_gVz3!gSkU9cIuiyFJ z$1H$@mx!K15V{5QW7xOfEn|TK$UD{+wC1 zcyefo(;GFcX|Z&SV(xg}e5pbNS2)qv=bKcn-Vs;uyh(ClJ*hzdLQmWsbv|=W#TA9X z@1=2Oi&(6xc+8TLKrMpiuESg>1{oO{P#w~}-aZ@}WHlb&mOn#tq0H(r+WTtY#HrVd z88U2h@sZ%8<715~ms`+AsROUNx92i4GP1U_V~pJ0_qY7^H)jnC)YrDRje4_GP>4SA z*OnIa)Rf(z?793j80rM2luI-;vg+#OfM|N0?9U?PE;3ygglW;2qw&~!b~TM^nIcRI z88rblPGfI2c6I`JdHDp7jaE0oj12k2b|QJ9=U^8ssyX^_1Hd1;?JPYLwSu3Y8W~-p zrq<^0z(xe2WZtosC!^c|GDkiq%nN#Wcp!@wTwPsv`c;@oV3zd>JSKE$+D<&6zTAh| zbR^0^1a0W;C2@9khJw#g!rV1?7x_g?Xfz1DA;5(M0X(F^A<) zQTfb@IsLuvw7(I3|GjEtnGmF>$LxlngVr?Gzi3=B*#9ik^&G~=cHgU~xz>rd|1G|~ z@)t?tTW>(jk#_bDGAN!Rqy8)Fh*$giyd^FUd@mtqNk(v82I@q0W-@wgexD^4)dx@Z~ z#`&bdzfISi`L;N^_m@I{^&fKPF`2CD5)YFh zsdyLlHnzi%bKx!_q6-FVW1;B)2|Rg)4cSNIBfh*8g*4vBiY6%!tsBPKf(8p#r#=-= z>=y(^?vL&dPuQxHuFkg29`N*S1(*^;Of?4azU_p=XQ#i@7eL0Wb|wrb)^}yJG7=g{ z14h8C;{M}-)&;q!!l=i}w*NMqqgu?5B4_@v&}TYBq~7oy!Y$iNL9$A8k&Z z0gbPYXg}s-|My0Fs#Dv$=6L@y=Ir{=llwP|DVpXA+v+SNF_@KY()$`*Be4pDXm!*{ zrkjPb%(Ihl;mGG`hok1^Ou-)lo)kLBIn!dBYsUQj%*~qw$vH(Nsv(&AH?n#ODsWcl zhjPzZ=aMvC7z-(W%76H4JGWHfQ*T%aX@4O{lM@k7s%_};X?~jY(m|}w8{`crU;jx( zkFZtJny3ff-aF{mdydl zjlbblQLX+^aV@PUjVxJI7uy&MiwUQ_Pm|C3i@`R}RA*lBLqpmqm1zY62&^NH{W^$C zCMP#t+&m_t!`EoaLpNDG>M$9e#8amD;S+!{sp0UfCRTL_?PB%=^`52vri$MoA#>aX zuPuUeSDd#b`g4UHmSe_UZ(EI|iMc}O?K1mch{74uf4)c98+(%n`*v*7PAhe0x6s|p zF97F$F*iyWri#-{sTC6Tzcl?37zNin{zFCMjgo}+mbN+rym%QBJ~b2i@u!m0hV{u3 zw{Hj9zm19(I}d>znqh=Mj(!H4pcWFB74F33F16n@mT(i%A|!*$fN6XXkkNHppD@>v z@mYESNlSE**Dd&IrUj4~TxQsiiNDOh6CdBMa zcc+G`rHiY|Z#k*z`17nVUll=)@tPIpZq=FimglKjXcCURZ%T&Z-evwnp#~TDr}c{i z+M7rN?6L8%7_esFX;NP`E12uB&ElkM_k)NF`=;(mOA}F221})W;>~L{FzR@DS!>Rt z%x6QBDARm?)Qaxb$tVAX_Q^E(hx#7gX1}Sat#sE})z5raeT0rC^f1ZA0pkR}mPvf% zg{1R%oNeOK%1o$x7c@ALRZ?Q}>!vZF8BP-YaLM3Tr*@eF zRsktRp+4*+ILX^9|9|0R;G?IJPzCG+$kPGigqh1Jkbf>;DOYnN9P`}O(LDJbZ2_WZ zGacS1+yLNTWnc&d@r(CB{wQje;}gqy*YH# zqMbD=f3_!i zX^LPyy7njl-e{n)g9_wBWDE?Z@Nqg(g3c=+0d^t}lQJ+g90aAw-BIW9+Z`I8^)Udo zVdUZ23d~c~&}fB0&IhyY@%w<#K&}3ZB!WBk=k`ha!9H5=>a}ZrptOmU`CP`K%f?wC zAc7R`ybdQ(V}5}1=#LgfHDk5ZQ92wHK1~!3vr_`Dz^`Np9KT$<}R6*15 zUK&fXv^v+V8k}dd>RX1u`f9!{8Mf&~LNH5Xvp9;g%ZP*a>Ok=T5oa&voNcyNt(P#8 z7($~PUKs&3K*KP+u#lVE_y;hGfT2;>^XaiGAd$ic!wd@iyE8q$&ZmJUvwt z9UU5MKMW2Ijx9G~7>4-x@ndI2aV||aKxGB$tYsAyb|*)!0PFbq`C&pze1(}gSWV9} z3X(QM9?ECqHNv2C+|bsB=Ytg80C?Q88sQrQ-iRAU(Ze)@_Beivp`w>3$A^!VmHjny z;i3dJG>C`O3V#Q%0t%95!22p3&EK(Vh(no=ydQ#XLf(La#eCeL_zR1Ont%x4aTvn1~~FdGokbWWfZ#>mjeyrPE|5NWxiQ~1v7)tF} z*RI{k2gKRbW$puYF|QJZoH0OZBA_tLfbI7j;EUIBagRW-9=2Rf12A)ghld!-vN4ZMS#2Gi z!BT5Fly%iQHu70kYYy2_P<%}y%DAvb523Vxu@KSHS|vtErPANHf#vfck^#}YhZ74p zY^wDpWmWfqCLhYWdYcp~rW^21vkC6upiKxO#V@5&C#!eI3;_iP4m1HZUmZMT-ocnU z>pQ(8S`RQh$if>>_Q21Vtmf7HraRqhx!sRACs^7^3fMCMPew*Y7Q-d@sa^M>^hA~z z!8HLZ1uckqYn0pFfZMJQ zpaBZ8Oymgx97QRpsyg-Fz9c^IXDVdH#RxQ7toD3!R%_}FxF&m3ZX7quGkjS^6$#KB z_Obe$h6SZkVPGAlT2#ghhGUO%9P*Wx@v}yibe7+#CP#n5ts@oCilKxwbm- z+`dgy+r0ga>%!R?E^c6c0?H504Ft8za!#|FvR=h#L4feMV??Z=trOHd&#L3Q%QeF2 zM<5raMi3vOy*@Wiw<}7}oNKMugWr>=@?uyAL>c1+9KV!GyyvsM3Ky|apYv5zR1@(1 zms?a-RBZRwoGzo3Mi4Y{}i@tXh3O&;AZ!J9|yXLyZ5Y2WdX)EkM5-G~C2&wk#M$Xj^geNg zu7osvq4o837=A)^rpb`OGT|2C-4Vx1k1ksurk^%&^SSL+FJ(63zoCy{?47@WfNn^r&!=tk|v zX_yy>(gcyLI7@;jYe;Ee`_7Wqg(Ji(G?Swt66*%s_<{$?gLJ+5N2M?Of96<92kkfcis`P zsO5b%<(ZnBdl3|b1BDd2poJFm%HNqQd@fsO=Zut;sVp|WyynJ6Ke#}LXaKb!EI_Jw zO++LCx;f7wd_qRSF4%8S@EMd|+;VRvz)?#@(1yjvQbD7xV!;vksrAqzh7KViJ3D(% zU!UDV&&{D?^Oo;XEDz-5@|+)6{b!gbsLWJ2Et^?uJ>j8TzH8~?vM1|Gm{&py03)wZ zApCy#RG&+!F!;PrgjBPmpfRSiIg(h*3pWkodnf4W!U|W*)o3>VGJ${c#IH2r;=!L3cY$RvbIWn7nh`dou>i9*YP*cKG+*E{bA&` zz-767`;p0FP`fJwi;A44rt^BLB+Ip!@7&Zv6+son_H+r7eW(VXJb7~c#*KA=sJUU@7vWXM#l@)?8p~GGG)G$%neb4~kCbzR$bHOl zF)+i0goRxVO z{syd|0)L6b@*?YT+FVtTP4Y|J^`QlKxH;RAueRFTHx+%hGdgcOL-hD1oU1Z33H1C~ zHH+pdFt(7~7`7O{7xn%tqtNr*CV>tJlIcPp5HdXP;}ik^GNMm!WfCPWPTfH`b@2Ol zD?^Ko+DP5?@?+vh*ikzPQWQGno4(y(Xa`r5i5(Rf|L zuEi;q=^*}8Lg2T_q^{$I#S_X6&-#cTX<#D3pMEkmIOGlOP>ZfO?TqDBX4nR8=J zms9iG`7f9a+XDAvr*p8SC1b2CIOUa7Nt`U1yFg+-^SxQG_uuN21zZz8Mi%ayBy;nv zs-Z1R!=;b(g)U>GM~YdP!_x2eqg5SPJgY|+tok&ySV^I@lRDg4?&3+#--lk0yC@o3 z;OiT09FcKxp&1z_*VLRm0iAXLDJHnQy^F0L4?tx!@_ozk?sx|{J7C*380FuNqD)>l zdzQI4wzPQUab$k{;Z%5&)OPkYO(%kdU7RXW*3!5;^bVhTc^SaE+FI{wTM|k7V{I%d?u|i!F!p1F99BNu!={~} z?cjEBD(S(C*HKdDqftN`SiO{PxHvM~ga#XCF0Ob2oFQ9DJTms*kV-Zg6iiG`*2Avy zk%1edulVg}K0|AH%xy83C9p&Gu9+D#9L5bUu3uh{MPT?g_fONRG%C2^j1q6owasD6 z_hHzu?qE*XO-S(=Cx!c6;}{kU^ejBA@4|Ew#uHBWRkJaS_LAlz8y24(+PeB<|2FD9 zaln~K@r176)|Qu@l}9fxD85#6cyp`~%hbk-S=Zei3H299rN~0rv*;P8qe#rP$vpVV zHUWQrwl$wWtXs<(FeGiAv^GQnTZqFEq zzi8%$rd`{?%1M#5_2cb>#eBWZWCcw9{4bVL{PB2}$(*~_kx8+!LZT+K zCee@h1&z`Oc-%wArH^*_c0`@Su!j4*(nLIZePV#eIeE1Gkz{en4&}V$dptACj+?mq z3;~-`EPXTV{*K#>4EGw#8zMg%dr7X>8=yu;M}aYL4MNsCHZ5IJ0h9Rrr-o7@RKGUA zWebPHE3F}~RP9f0!u^x|L1FcTIGc4BRt8pfTlP``_T2iFB6F1I5TDIL z9Gip4X`?F&ocG+XG(2d!Ky2%&qzs>GK=cz`)K!u^cQ z<^6h|n?c+Umj#QKXTcmR4kDTLjiuR^8NkP!JA+B(Ap92;|A%coMVgeK$6&cA^`$?5 z@^cLVqZ;p&R9Kn}Ol2!m2dwJ)Ueoh6z^Xm>!W~l}3?tqi9((vPG1g-*e1uN(k;H(F z8hDrJ9<}*SH z25@B0A#qC-AecS{hAmS|9_UjmS)h(`)a>~l9eF^hY`s^dmksr$NuuXF7$k(YZ-Hhm zlD!5&{uVR4JWzmHt=j=1dZ|N4ld)Jcto7+s6Pey`NCppOw?`qrMX;f@wY71caNs@u zVfCAS0XZ)i^rZn%t^R3es=Z^z_*cdkHSQ<^#?poi!n%FT62IYpmLOc=eO>v9ShC)s zCJ=JmtnOfi@7@>UJ7ROR#R8;ptQ#s0y32D}813N-9tB2f$r}`F0-og`6hs843<1p6 z!6AqD|uyO3B1H00shgeuQJx)yVf)QT49_5R^ zmW=xTd*`}}j$xSR3FJjPr~@g$cwxZBr4fUt4#NGjz8nhZtHTr-aJ7DT0wDR*9*_Zq z@O2&^zdG3*=(HP9<^O_I5#gzT41_}y`iHT7wS45!(K^REaE>NsXOXH+46wS)tgMuz zq=^dTO#L9mal_y_Dw7K$pt%a(1VB6>g@x#;P}Kp#?6qAk{v!4dzT5SiI8j&@nNL_b zA;@m>wN^L>CBthy)kkW(M6dYpvfBOnC@O?uOkjyoFu{sJQjv37Wq8Q^BT2F{B}Qgt zS$FyU+-B~fATZVWvF0$M47>Y3N95Yo_@SW)6)vQD2#$W~lC8!$hcB5iVe_*Z(T=YQ zv9;z#non8L+N^39;^Vt%Oi4$`BK+ygYDef~3ytxsuftgL7q4SCXiEnG+9wfs;YsV8 z8y4d=G|*Hq{_+0qlPC0m)c`$e1JK%`Ml@{wOX)Hl0Q|0c{T+v1J7oClo153Txru;e zqnfXa^x+W@4(fMcTwWw5z6a77pFVv;n(_+^X5hn+gbkPqH974S6#^HqF@XsU%sF6E z-GA^P8Il(iSS}T0x7pb&vmJ>hzySaz_Gc*4A**(Q<&A?io`Ud6p|UFkej4=6Pje?v7MbA6D#ZQxvo@Xch}OAMX1y@5KlIS z_XQ=ozb+5Z0fE+nF9v`F6gE?kVa5qMBeX8O-X^_`3gBR`tXS2qh@S4t!>fi3S~LS| z29PpERaGeigR7(=0W^Y6oOZwO!!Lhq9S?+qP+&9y)SU|d6Sb(i+WKsJMab2HXi505@i z_MfIf*L9{~@v$yeWa48(3>TGqHFf^h(p_;)*QBA?`-eqQ`nP?w?8|0Vkn!&-s`8ED z=oBXM4pqfy#Yc{s^vWNg4)p+J;9hopb|IZ*#a{QdA7POha<5{F{gyZiZ^izExoCLG z7vbUQusyJ>tgMGhw6rV`(v|%wn~jL%!f~wT3iY{#r%gI0SYnU;*&Ny(npu{tgs>SW zcFeqatgP^B7$8?s`A@*d5#9e7d>n47jXc;{HYfCu4x?<+aUR90dwcUSLQ!@FJmwbp z1qA;DGLEmGp9YZ461@&?0?p0E&20n7e6TR(J6Ji&1s%rgG99Jn0!{-1;m`uXR{-4W z?@t*CR|N$tbr;WvNl82mULODTU(llwVJO-EKk$eY(zG6HccE^T1jZ0K*VA(dW{85S z4t_%{;Dt$Drs@R-Site(0ooTr*6X)$-hj5s?X7#865YMyie)N{D6%E-v%^UVOp zj&M8yn!nx>LC46>9uA^F0Fqtsfkpv1S~Oe$SUklA6bij+ZtZino*Nl5NRBVg!`x(c z!{k&K%;}By5>uvf(V{I`frRug@kqkfP5o-iQq3@PrUXB|K$s0!?EDrro*9TlOBdzun#CXE_tO5Nn!w+O(`Rr+uy+r?-sSsXlvD*yRGP@36V2;wkG_ zQt=UuH<6JDSr=ezpIpKXxHO#P7l5X73&`z<1wbdE7V&rw@`sDz!`W&a?uQ||)oukg zlMO7i(ZCUgegr)|h7ZuQE?&GS22?=LgDng%3pPbi&ISxvz08KbLwJ`OIdE83I8ofM zi!SUC0R)$JM{H{rM*pNGC9#0AgJn_g4Elg53JQwf{|}t8e>k_wtd|)`&(C%9rStpI zi;H+-fat$|{W=GT+JJkC?Efa>;^IOf9SoS>cmfWTTEO82W1`2KOV^~3X4n^?O#-)d z4kndRq#@H4=3}MSdJ78+e2xpm=6yNP_kG?NM5wpz`J5LMV|K#A zJ^)r5Asj9*3d3$+6l`|tG%F@1CWfLo8yM66dyao9ps)rDF59QGr5`=^`Hf~flDo|2 z`8c1iNUStgy6}1kQ{Wb&NbZdgRwvN!-@kD_K1kcyV!e$?6$}00p;@=3aJ)*f@kC2! zd4o!jp0oGWihXqdP}oR!IpkFxuR9`uy@_Eu_~i!RH^32?oS)aQJ}OKrzD^_XlT6y# zroD_Tclb#|RTf8JJ8qtUmckRl_=pNL&8>40_2Eg6Yk-+?^T0W5v5-diM+PXJH`4X(W} z{YV>1MTG7iM_fQstA0*WSWeu-0sMM!7PkF5&N`c(Fo$0;S>U}gyV_;#S{rb& zHtwHNBIPcHnK@t4G7Eu6)IC{^o8jzv$Py}@gVBKD_1kN;E}3lDIL^beSniLoRzrA= z1S-;OLu$j2&oKYcjDSSRK9gK!w5fB@z&u}ZdQwcR6qQnOfbRYk1p^+hV#*^M6KT>* zN=SWtd{98#mjy2V*x0sKavV#x(P(m{$Lr(UuFb>_dD7=B(@jD{KCH-1Y!r45Zog;L zRXp!97VWc0Wt&O;G^q08l->W8Uio9JgZDo}8h4e((TOF^F2LoZfXz@=R#q1%dFHbE z5jYHhAt3CDKrDiORU70brqGTBnDaV6|6mOfc@i+!Ek?>-Pd0-6$w7;z?61Kozb@0K zT`7{=x^zM=oZd%hy8T7H)9(wt*~=Z)qZLt*9zqU<0%CWDZ0wyKw^Klm)+hW3r zr&H%#7vG&zWCh=^)`-q3!1S%7@>IDTbIxb1r zybP1Pam*WVv48SEgdW?h#9i2+|8o`EU;x!MG&CS*wAmqQ0=@^KZNi{P0i>C3f}tFv zOP$aIhLjtjhaZ}Q2_dl;vbetCC_#WK6I`awQ`69BK;#31{i@H|S!g1EXbV4I2Ms@% zdvKfN9?*OUlODeV&&(msbGzw>(76HN{|U@_{GOg3)Ps>j*l8I64W2Ft1BaX7sFA0D zbh${A4fbp5L)UX>G5YP>zxMhh2HyU+6#})z&A&Eh?;mg26L{Csgbao{>$3pdas6uDi#sZnTOKHHxseUAVK(a{~b|NGs z6gk;LH?z+N0vRC)q?|VTw5)Y*d064X@Dzoh6D!1jp`*3NFr9WIuahzFwR)T|VCzj` z|LOU9@dl^ZEa)=>p2Y8boyh02mGIoH&va}uHP2URu{^Q(-0voe-i=_ayY~5Yc8xF- zDN7&`iE_HGnD@uC#jT_QYFfk_KXLo|tlpNtwK;7(wq3VL`uNfRc)k@e33c^z16J(4 z|8m`;syi6#0)hqJt&TUTWt1pBeISWezn_^Mh0)GjA*f;Fu%|H>H;$XgF0t;ygOQQ> zEqm-EuzjqDQUc0BsIi&T9cbemvUP+{CbPa!Q9TEm9C8)NKd$%*-bb7Mx0EykrZ$ z(1qp2DX6hv(%g1%E}59nuiWp3&zZ`1Ht~J3%OeV3!$YUb0)G!M{sANNO?vuYum4ot zvDTJWd2mU9(WGLOH_pm3-bVCDZ{WRwAm^U}hG(^Fv9JU2AkL0?XWCFmHE_tHGEF8* zGPy1*$*McFPgtvy_d691BO9>(sUMBY8ylxrJE#7`gM*t%N~)UQf!+^(C~|+Wf!Cxa z`OyQ3++2p9-tYR-G^25Bb6&Ip(WOnLi=0B~HzvqIo*eJ$U$|Hd&!30oP}Ndw|BH}f zbPZ^$+Cs5iCOxJChs5Z&C)Rwfr2lOpnNw>iPt()16_=aS?s%NDZ&OPkhksZoPM`E_ zRZ^j2W`t%wOf@4pJv~r@0;l!+_p7|TWT<$#AedDEPg`U;Oq#pXLWk8J75)1Cm)A72 zHObl!q;qdwTril{#bK-{)5eX*H)Z$Pyn8!A<vqYg736eT zmXqxR1|Jrv`Wb#CthfF7(qi~Rv1UNM{uLIMNU&bOC_ZcE0$2gfQ)$2+p`XSlaI&Wx z*B`y^9edii;!C4!d!hLI!%U(J&NgX<*_U$tFx=v`UtQPNtScqMo1tJGoV!I7pi&N~d|bElYJh4JY<#jFH& zLo~uj%_80^lrSdll()aNCHAo^j8ekEZ||x(yK8pW;WG|%F>Y$tx7XK#T7Ufb4FxCY zr$TEE!Js51yMQGZ7azZj&LE>UY^}fkhQmWTmzj(NH<^H0iodL@wHQW$0MQ~BFkdfm zC>oE*KV2Cq2OUs6nBzD^Z*15BZfEmz)6U)= zp%y~utxR47d;**{FgXE10t#$1jAlN3`Z1TJ7luX85fYjl?XA~76N!Es9UTcvZvc2* z>X41$c?pp2&O;4DrAXq=r4Avc|WJ+&3n*NzAa6#R4>vM>Imr z8TEY8nww%c2J}~9|D@TJHN+CLrH4K32X6l|}!rbQu(q&qw&h9-3S*Z93cr(n>X6vHx z*FcYx%dq(Z3i@kox}MuA0HhY0hZh^R5I_!K&E$h^#ZCmVJGJP;Arz%v2rMnf1b5$m*FSBw~2XxSGAMJIIN+e0d(v!-zp45 zyr+Y>#oyn5K|SroQp<&0W&=FJK(|#ncLY6= z5ePcw2TLPC5E%ooD|F1jK^lDb#=#&c3^`kYic(580BtJh(?Xz~0L=$r1fP?Vy3($x zP_lDytgNo$j`W%&KzP_IxbyK|#l03|LUZ!({=OhU)TwO!uZtfZxA@^%{nZjMO)IoOB{ZJBqWZ?6!9H$=%-Z zHKzA%MYbXqK0K7LIEZ8M^O3K3QKkZxhE!P){?7WG{X?8W_2N{JOuj$MqV@q%i1{=B z%yc99(J6WmQ(Olpygvm~Qbq>3H+a-f0DlYpGJH|M*74*_{wef$Q~X-?i$fIoFVniFZ;&9g-lt%EQAFYQgCO{z%!Ybsh_S_yNe3WN;4HS47cn!d)d*(`! z5|R+~$H;;Y*{y(9QhWzbr~Z3bSVL3w5+F6*wSyqg+^vk&Zgl18R(ftQR_;v2fDSpwdRP)OG^H5Nc=fa42pq#&^q!dkUfXCW*e;tLh9=G*Ry~2LMah%4`VmPo@?|XH3m`-+F zKt_LHhTiEE{u2Uk3{{237(EKIf0Y}lZy2*vXnnA-#)&4$Pr(WKbk{;9#d7Mo!WO1) z)8bi7fP~}?e%3g^2nuHpCM|| z@oQy?+@tX{cg3i2ppd$4;t}UI-oJN#Kz{Rae<*_s+Xeo{5;!lG&Wngd!NTg@JEIId z9;mjp%PyN!gCZfHNmEs|7O;z#AtB|9MG#KP%+LSl@CZ~5Wn;D=YJ$q(6xX44U$Qb2 zE-vm3Ma4wGLe#_+RNkiPfQ@~L71Eu(fl9Q8hYPTmiI;ZzLrCj5BWN}76Iw3QwomvN zuI?SyF=b5~wDAdJWf5kbeoV0I}O`Gxt$C-ZaHHtB`A#DBt8|{I|)JA2?HG$xHzcbgXE0 zsTI8VNx?pLkO8Af`#LfbWmLdH4ip~l$i##k$V!2!07{0*oA>FNBe5L6d&q;IF&HQxp;?3jw>A%%PGdyZKkf$lXumk?7e+ z_Xs?SpALVd&JB1Av9i9Ho}T8Y*zC~m2>b+~f%n{|+2I$|%!01^GCTW^mX%5w zL8?4>Am`yxCV@E;xvmlr7G?sT(2e-al#~zOzI_|A2RS=3oPZ3;kBrXmVH&7adUU(z zHq-{sdw!YY(kgldOHOxXgN{Mrc$>q&MTITlUCI~s{(O)j5iyW=fH{^=!dh@$q^dUI zBl}f|n7U9BG6P&dExc9HV~b3R7<Qy6HA2ls|=2F6u=}&kTOOS4a z1794J$9-NdTkGqe;qrxU$$q-^{9-Ydk=Tq_J-+x2caD&Kjwr)jX_b?y+iQe`rdUVD zcO4p-Jp2*6KV-UFINKYZ$Gh7kARV~h<9yA+W#Agt*BzjjCxX%o;1w`}0E&Zhq|wh$jk!0eSv|a%y^Tq5bLX1j(RnQ6o1! zoF#MsjV&Z3fh+!h0RQHiHh`wwm_>DvorVMy+mYHJTF!FcvVzBoCk;pD0w@2iTenc| zNSNgAv|~30Nwxnce|O1Z6WNOscOpGZ*#DkIbh(c>Cq7>Y&+#PtH>i;5%&l*Gw$W;y z$F578N(HzR^pO$dmt-qiwOogjaL=+kMKwbUm}tKxM6SD%uA&HoGhR z^J8QBynK96Ykw{(qQ!uT81h`9fcga378Jlrse|R8qIf@{qqvUGYBvtAz=ZhNsH7GnrgcC zgM=HlVokJw`Rqa?7)H3EdW94sz(v7%P*`4GpAT(g1ib1Rpndl=HG?3t1wc0x?f~`l z3e3EQyiM<0UyvzAz`P51tKW%Xk|?U%#@;@6`1$fZ3e)Q*>*?OKD(=UPL_e}`A#gJ2_8D)?4w7q zRAIUaJODuk&(a+J_;9&Aa%}S=q?cli(>K1v{K^O2<(<>JtZJ&N@5%|6;VgIAnT&+n zuCAja_ch1f0-$zh4H*$B39}4=G9;?p9`H+Dk>~%9@UH)a*5UmfqA@1;Df8=8Yu6q6 zZT6Rafp+w9aWlOQVO&$zwgQpQe7OfCNc5AvW4sI^`wgQw$S6G%6c#=*)29U=8xOeN zc0ZU{(cF38ov^()ocnB37>*4PZs@QwGJ+$Yu*WUXX{H^HV$*)Iw-0Qxmd~Ya9)vX| zS0w#5B}t~r<*@v1S*)gcvwv!5#DlhoR+MSIZulb3aD{;_BaP#rP9l^FrJDa0Oc(Jw zI=T^RY2YR&AnU@0=Iq+N5R~Zrm?_?Q#;ZoqRz|L(hqqspkbX_9<4^` z1l-LSdV2clKVQQS#`_+=dsn{Wu2F%l^mw|fnc6goxm0JIC!lck&)2|LQ^{4u$Fp8E zTlw!VuQB)x502ZM?beYofqL@gf5PCDbBjv;lizyr>c1Gvz>izaapzfQ?Egu~tgG)1 z?`gfq{8roaH?iCW>CEKw>sGH$tnt?6h!Phh$R?=z*oP3kBc`W(p`U+NHRU3XbK!-g zI2z4D%P&T6xiJv;g7mBiv#_@ZIxhX8B2>k|J>;2plb>dDnp&9f)eh>l-4r8z*x&T& zo10f(RN|S%drEipxP%i3LELx-XL>`$A!eCZiNar?>&;()<@ieJn}4Pqj8VUJUcZW> z_UtMb$k#YEUD~S`u6~S;OL5>j!H4fAiCN9^QMObKiq9`D-CTj<^w;;zfce~wUGD&+7Tqf1UCHZ{n_o!`NFcjLHB)|L&D#I zL>O~z4(8hnXQuASr9CVji6w#alcMn4xe>|z-LQI|O97mk1tWjtf57*S%FexiheJT@ zvU+-~@GF_dU4d9Br(+M;`wE;`A4bZ2Fd!--lxZn$b!0Ab5ppWNb4&Xg*-n3pelu5X zt==_X%VhhPO1??uG@N^d%olzp4vB!c1=K0f?L%x88#tT5ZUmwlgx!Nl%j&YpUerR_ z!GZ_5R!b*B-e0HsOM86*=o%8Ofr+B~=r~kN^4Lvyy z4q4f$ziLn!e#U=yU-TteOU{{Li>dE_t(_wyA6N@iIkn?>ZS1eg`hP_E-izlC;#4z1Vi^Z2UQy{n)uU1Hl;7j-WRy;YOL*b{wTxATk;P z7kZr&R(y8}hkAAiF0w22DCLPh{yHnC5KbpMsMNuHfkErHH>bm<@X1@!bEDg=;=#@J z$>$@lL83u)r*+c6TU(}R2KWDE)T*(u(X{JZFL&w^TtX9&g@t`a$fyTK7Er|TdG2rP z^4E3#-=S78J2T)Pm<37{S^={^j+rrF#sz!XbWb)52CyucdGlNidZ!XB=m}A44p;O{ zcP0~3*KciuillszR?3Js$^}$0J8&TbD+gLDbm@Y98hXCssbYw^k2Dj@;VX|9h{88Vf5O!tqWsn@y5YZFSKGnX%djL;tlNum?^!M zXOa^RKZ$6g7;cC%2TOM4tjGDYKL2e&-oE2{hEOma3$sP8{^=T-{gh~}hZ8rbG&Vg( zteq?q_H~46py!Jq5!FSGqUD7%Z|}b=icS~CW4r$GPc2UJC8kGY7Iy+7@0Z(Op(X+F z4sG{|pC`fBg?aYu8AzEKn1zkRYXMRp?c}uonJGfS>Rn4_Z{#dW7e_m5`)2C#*9}?h zeM?hD@l!{8bsr(*c;p@0-vVAx9fD5YPr|!G1%5Haq7B(#4%}T1Ho7yn%R0^QrPCiC zeOdve%XKcja2CH61Hw#ZF!f9KED5dd$ln#%GAdrTe z4T?_0I0A60y;cSzQ|?XwxKITwQUN!37C< zcTZ(;99&(CCA=R}zJO2^sDTMVL;~bTogr^FxQR&#s*P~+YV{vOW4&1q2`emYY;}y% zQ0pJ4`M1anu>VWPm@?mL!M=w0YI3hFGV0tj+|HcNgz+DI-6bC-=uPjiIh>&gT@2AS zOXLvKksmF@jyT|FP~+3U5pG3Z;-<(4`%_f2Bf{Pki{mjhFFc1;;0bcI8 z^KtC^QCdx9>&eMv$OXn?sMt_32gp>ufBpZPVTNGe zLm8c5@(e*1;@eUB!w2r`(<*Gr@weg0LC2d|P|)Q$1abfbU_r}(EW^RUq^W6XBJP`4 zpnU*cWVq0fi|_w7Fj7ZU{+0@r~^TmYVPfk11w_0Kb4EiRWyHdiq0ZZk^lk$xnt1=LL(bDKfj# znO-t`=Rb7uJADmF6;3En}!Z8l)4|*h!!FlTBNd&!tHe!9g zHylC>0eh?g30G#{Sr)(|^78m0B@snjARrgCE%4QMkq~m{`)xyV(9Ih+^q}WLfqal} z25W$euJC^{AR{3bWKHRTs1PY7VBsRru9`y1qqYy+_|*VH5FI~00_X->*Qo3n9lZuu zJ?t@7P^?3nvZL^;$ zY(V}lB(C9Eh;>18h>|b_y6*)f$^65+1;~H6L|pu2Pb7cFo|j}5b9^n8rE8p)Q+w{N zBRxJ0{Ls5SPTdy(vlEu@^01b!wZ^9XPE%k6a=*S zAkIlc44of{^gxGt3{Ykq?_Z9d*LFLo>5)qnY9f@f3szL>BfY$qpN5x4vx(y)YZ8Osv=x%Ingbv^f z`n7>nKr=!~0q*U}ukz>5zWKL!A^WK>Xm8P?1j5;S4s0~^h*t<|8 zNxtYjrNEv}%|Sy>t+HC}>VQqN#TUmU;!+JMQMx>^*?N0u%D{CbSdf*~3UJ-qw-~h3 z0M0yaar%beW#6N?9j33tU-VV3q})y9&8t%Hi!I074zn7z3AvBv@{!!q+>mj=GSe{G(&XzzX+v} z)^G?dpD`&CLIJ!F19Fi*`9Fk?o0xrUhHA?f?bb**y-x#D2bv}{z3fqXflg~$_grRC z(7su>HZ<1=LLtnP+UT$?ahO4T8{7?0xwIwSp=)n%SFi98fk%}awgo;#KF8TVb2_D< z*C<m3%xp@XYl z!&}bxh{>h3kNK4m28g**cnQ8;^_+ObxET4|dg2McfSHG~^QZ6*@!b%~+t|xH)s==+Z32^UP0Ib9Di2R1Hb?wS_i#A3*5dG-XCTCb3h`8 z#4#!9>0dzl3GyV655pc&W7vwZuBhtnrh%Y#3;_k>ZNOf=4(4=GFw0!0j)sP-?8bg9 zL`K75y%|>pl>*Qd@D+i%!pf8k^RM@N!BQTwgFz%xGL{XMk-_%zC|oMFhhHy!_v8*x z9A{-?>#Nkc&2}0%-@KNcWL-RnbD#hq8jK->o~I3Rdw&BNA?@h6;b?xe<0AF$)vNEY zynygdDM!f;&@jQdUSD5N2+9L+*<`pgoR2=Wqx28r#Fm==ZDeE$a@luIJ#`dl7JV_~ znLIQ9F3jG=>z?EI^kHj|O-P(_Axmz!E9pEOcFW#mtB+bHAnEu2GRoRK@V}3;dai_a12P=|QKrH0 z(2fe7hi&ggY#koyZmn08=9XS5zoIIA4F7Uq-7z_X+M(;gH|&B(Yhp8h-lV)U#yxeR z>N#bAyzt4D8v)l6<@K$-y?t=`(j?BO;p-WyV8+Utq4)7^3$=KEQ0 zExA_gdFSZxfG#h~^bX89Q-}0U=ONdN&>qSqs-X1%)^!nVti1~m{fLnEdpu$zj8c7p z&>D--w0hi=^?2K(Vx9SUzQl(+9W^r!IpEH{67gra)bFtk%$~}1S<$ZxWr!I%g5pD? z$mX$|?RSt>SJv)GYg3rsz;DIIIWrC)EIQ-7hw4F1+mR|c){b~*X9r;GsjQY^h;~uD za0gI<-6~K%^j?j(V7KkxE4|>fOq$(DS3HkQz3V>I|@W(hsD|K|h;oRbmrwyx* z5_M#UI8wdUi3V|Ki^)P=!6UT48d+%wcS6yi)tiP3G};a-zJXg20}$|miYF+)SD21g zm>_61vudtXuL{?yWCm(sT1=S4CM}JY4Gz@R`fyvJh$Fc2puV#2!76@EJd^Y!nn8l- zb>bIbv*(A(_fmrMJ$69oZGhc$S#VKRo;%D8?mCf;uf4B{lSfq2zj;j*x^752KT088% z(o5jxdHPjRckh~*M)regdioVhQpGtY(m#&Re!lM5OFzi#EY8ptwgNT5%&R5!yDBQL z7ne$*o&^QGBv@NIEjawf?`UZB^4QbU=z8!#cnc>4E7|+*g?$lm`lHonUTImLP&Zz< zX>w>f#c8*yf}e+GovY+|^;36=A300v?vN)vX>ZW3jT&@nuM8slH7%w$@nAWxjCk)Q z+o{0pxj62x`!-VXV^rDAHdUk6*e`4gDXk_Zajt5^!xcyq!82qmz!h?Q8 zx4K-zA1xGKW@fI>0ByzUZ@!jSH!d7MhKm9W7$m-&ZUoU*@nXpwt+cr{Yvz?%(K0 zxcu(=vO1}jbdB-(KTQgoy@S(EW=gpq?JrS|K|g2!9|MBGs5CUpXs|dwq%?v%%-QaCm@)_lu z7PMg37~Yo`i^m7CAIy@!jgv=Dg(TZRQq^1~^#sw&THKO{Kdi-Yh_yg7fl(_8nZbzB z?JcmRb1FUHkbt<1s@XA^9ywAvwN;*&1(1Tl(sMmfhp6(;+cY^3_WSjW>=- zyF~s)221wB@Imde2C0X1jn(;fG2a3El2N8tofe(8SE$F49v@r{;yay z0Ot<_>jRA{gh9ZJFxeB~UsvaD5`2 znOD{pm)aL|!dCs|X2XY$;=$$3CJRA_(E4$U-kcXu!^03gY5ud2cvILcCwhI`n?n6j z2_+oa+^o;ckav5ey#k!M?U=GoK*qZe5!Wb1_L3&u50pa=B2 z`U~b!XbGhK6lyv38lm*@pxS9@`**V;Hz5~j-^~5@0RcA@E^&*0C*;0jP;JhAGN^8) z|3_?Oq5pmV%^#kdEstBT>t!>#eHed`M=GhafBCg?P($E_pQ%q0Puf0uxz+T!L#SkY zk;+uiLFvLNgf+nXUxj|~)XP74ZPT%LgfIR!vlsBnJQRED^K16}@Kb75Lc)6$oI$Ur z++%wumt2~;l>D0`WG==D2U)Q!o=dEcT5u%(`z$6ncyD*`i@BY$8*|wn_20ot9W_AS zwEQK?Phc3E(dEahBG3%Z`w+wN=vlm ziZhqGhwFK^A4RjtDOME>%FAcJ6TebqCWB>HeAE8zSZDc|93C5g$50>t9Vl|(w)>Ql z65t|E?%iaW$bYwypqro~&x6RjfBDTBy4z1yPMmVjEq?1v+iN7T`0UGuj0uhWq_n=q z>68M8xmRmn=5SxLByVZ%J^5uH2beC{7qir_Sn?jW=|@iyWw*|+*wELv(~FsLE#wau z3@MMVgg0D(wrU*IM|a}g(IpQTo?uK+Y|jH6PKBmS&kc_Fr<)o`KPb*f!5Bsu(^LmG z%=XUTFd`%wBy4bVdVQ%syEjVV^^?MDyLx>AXfD1d?Vl6v#6giHWoF1LqU^1USoNccl zVr8=5J-|yvMv3`lhrTx}m1vBt*lTwyp%TXpfxV!+yitZw|Ds?+a0o$&C6Tw#FGXgt zs=cYb`CE&)=jz|p-|fFXKir?t$bBW&z%_`kgsW~Rq@=Z1XNu*bhu~myk+1a7^17$! ztjA1=^u*7ZgD%g$S*?qwh$N5p?Qx%B9-7aFO9m~**#6nVv*|e6n(pqafE;)Di<&6` z2B?6(>Nep9pcqMcd2bb%ET2}w7lQ@wx-l4R`~@hguU@)*`8*%rnoTnKcU3j*TEGc^ z!$GzKW~L?(PZUFm6J{D5@iroY*Lp}CQt+ZGtQ+56E?aHn0NK}@8y5fs7(pUW;f16Kw^JR)J-2GxxN*@inpgEe7bq>{Cw3z07#3*{2QM}>s3X|RZ>54hUi0k zoMlh|03TBY^ypImU3+qDdB4lP9P_jO1!Jh{$U^T-%-zFqD%4tguF=JN z?mmHK(@EgfhW-HqAHskS1-JfLgkr&@guQ%5z-yk681ptv<)^9Vu4m2o1z zP;o8t+S9LJXeBr`Hz3VIh?CJ41xew%yN+KzE8sb#KrvuYcKPE9vZ0Rm>y!t zMz;14bn=Q=j1cCez(PPZa9TfD-a`?V3zsWq$m{5UppZgMoUz_G*S6oX5T&z%qaPX| z+!0_dzChMN7Vb2(>;w>}AhuN2F~M{eBmtqOo|a!(hDw<^RWO{o{jd+=TXz51(t?E^ z9`E~%!z!BY0wZc{fLQ9N^zx!7Cq%fiRUw|oWIY^~p-C=R>MmNBH}+LZ-Mncl@{pw= z>-Rx0{ZQODBB>V@!#)Q42K5TeCzW&d!u2=HO9=}i!-H-{_O3y)AjBxq`Fi?xtBD7RTOtw9{l!zRwcfgc>1Wj|mQ6v6PLj?PLHAPM}& z7dQ$~q^h=d+>jOQTJ-y10bGT&Nzm&^8yT^}a9b2Z{1{$+e?3`>m68}Hejw#KRLtBE z9)j-G_en1YtxRUxzd+Y5p&ezO+pAvcWa;bLRnVMDX1r7~$A!uTC=36N$%UQv7s&++ z926@LGqf70)U>KlaMbGLFU?rxr(iOEpLFxw2~)dRjMQ#h*z%5tmVJ}J09fZCOV%HR zjP|uuYRlb~hXPOcVB*qwm{B4jwbP)C#Pm?pkOqAP_!}ZJUatgQYc4E8m0j1&BE0{73nsD=x7|dzDAX;*CM=-afDs z7pa3r*LL2EIddh)IrW_^cLw;o4*1e0xX=BnN532AuE0o+r<26yuUS4q1y%rCpyM9L z3Zzl6c@=vN{$1mKHyo*$%Ga+jRcuqFrX~t=+BW|d`96aUuGI>ztAAvvL&$71*Vs7_y+?Y%>h9>$Jl?@~+c^O2-*m-}B(mBw<(*Ab8Z=4d&7@{#) zzPIB<-$A<@1~gJO^r$dY;Rai{me;`sWU2+ge^k-dZUVBC7W!jED;)0cG(G&*^$3ze zpsz)~Ti6`oh*gHC#)mGE-|t^%jHO$WAzU2hFkV3M*KlLp0_kXQ3)Y<13dZ_@JNGk? zNU)^s;D~8U*A%j<*fEAk-ew5vXa<0t7Uo6?4cN%$+9NOw5tT5P7|tmar3x7%O6N}K z!Mj1J9q=S`0mou=n?8?bh(&T~w}0FO9WpXhY2!d&snJ!4bGL-Rg{6MBM+?;6LLCAasER#!Bw zPld3q^fJfxmj~bHE~3X4JsD}5nZC?O)n*gRz{D`|J+NGT>%kk77mR*54}IUagdXH+ z`52Vyx7Zd6%ft9d&>xI}W=|gkpcr*}>PFFb*+V(;GHU0Y(&A1Qo7g=X6BV?cgW=Bo zv_|eh93>K=JYP$=95Q}i=w7{3LUu%6@%!0=EtJ#~$6(qCJFJh0MO6>r)_K7h%>4Q| z`u!zy8H!Sc0QE!6#dDZ5#$Y!9e$;kYq3bQsCs=N};^X5(n6E?20yNAqWo6=BnYt}N z5f4u6YSoP+sDfatIeY;Jk3NXnF2T zVt^k$mCzEIK`*uTo6mLjYqIx)+9L&~r!^1*(qTYJLvxz=g?F{r5`^WR{s;L&*c!lF zuO**wEy(``Fk&eKot6w2I=sfhDFk2fpPx=K{v$0snbV&Y??l!U%OFvk$*DzUu(yjx zuT@M@KXgm9m(=YHK@X`u4q@|Xv0=!f4`=CQYcVj8P&{YD=p!i1@VuY!TyEfc9RBn) zp*Epy|6p(dkbjr4~u+G96<(rRy zacl%qv!SVJl2B?J2c-f78jXHx22+;ZHFaMTUz>xLl-G7l4n-S5j(A{`vH6L0IPs0s zeronDK`#o>p+hd`h1oD9n?(Y7Sn;4vM71$Y5Y4q2m4Rm#!3crXiv~cf`sW{2$BmnbQIk&XnN2}STrT!8I| z*n=81QOVaVwquZxjgXAG$OO;d_EJ&JKin&R)#cMo&$Ex`v{@znj&Euu)U9@d89a{L zTH1_89WkXYd#?pN9FB;!TC6`RKNc9Arw+bNv$*Egx^oorspVVSgr`_#?{aZY&Ksc3 z)@n{GFUJh;`vC)=YckA8%tzwNy(Dqiw#M}Bmv;S(Z=H=sH&aLXI?cRG5>0u6ZXHGA zQ8<(K`X%2?Eq@Ww=N-q}t6e!070Q42Mt(CI*;b&G3<4w%swZ0HhF;fqfI5)*Y{-~{ zDA6%o*gfYC20$rvUrUShGj(;E_LZ$0$8et!^pnvGzcS=)fKmwebE?+XLcjCR7xQPs zMBqz5-0iN!&4L(FxN8Qh`<#GlEZdz;^Vqi?#3h3yd;%B{%Jmo+aqv6QcthqC$T8rx zp39tv;HseU3Q#bC3?1A{WZxkfwFR!F?cNqoRMK_oErKq|Um&3lju|*i8Sw~{B#AmR zbt~8Hg8~E5(M&!^k3stis&db-$_dl|Nk^CQ`VlVeEr_P-2Ssa`fo$_T!fNW$|!(@ zhFB6X6i9*|{wi|e!AU0esW`wxHPxFM(2~=kQMYp(C+EKH^q-#ZjoD@WYW*Qt#e^)m zpY@2S_o3kwWa9BJhkRjqFo+?` zIvlI|(4T+Zpub{s&c>TFgLB?~iKRR8S5fn@-V#mJTbhK%2(@mzil;oovnwJEip=jN zZ{TLkTG&a1PrJ=hdJK*44Fm@K%ua`DRUDo*`kJ5&hWVNSEm#&>(p7-nu7XVv6acc! zuOWo^qIQ%!WbIsq?IzCHaxz_r-=gO#lK6m^HI0;&$1Dg z6ZTFG)#-Ll(jENs@KZxm6A^H;J^IX=HO1Ib}2@wn5Nz6C+EUh zQRhGIy50E93Q7R_bUehCIE@zJ;1Td2?-P9l-(yMTQjXCtbdaj&o2lle(}oH`vtIw( zR?B0hR?B@MA6@8Vi&7C&Ju3E3G7kziJkN#=_a*EkMiu^SP@rms+Q?4pCfjKkD01(q z4U=x!76Fxe#RvT*a(0rfYjr38=DhkTiMb1SY5_xFvc&=+7ah_gSb zvN}&t?DuRYOK+-ZC2Tg>+J&ntI6@W+PX{)4Go1H3$U%AG)cg1J>yW8qLg+5xJ~RAP zDqK_-SG8Kbw83zEZHc#PMYE#coMh#f{(H#)ybK$1*E2M2My+>T%6Ar=nW3egje>3k z&8T=O=OcaYmCm<2p`lQf*bO>%^SCUJtTP8PGwar`?DPYK8bDt;mC!#X`|`qr>tyWA z1n(XzHaVvpm%ZNef!FLKuW|C`nx-b5eJSm_G20i1mn5X0lsLK09v_WnI(332bc z;-K=1a?%;z=Zb3?Y#IV@x1=)0sw>N@3+bh>%|rbG+9#{mWk_wVi=O17YwWq1yX>-? zH{1Zw@RO?lL{x<7{7*#1bve1u+BF6|5LcN(e?-V0Nb>a(`{$gZOB?0W7^)Y9DeRf1 zkIheK3i!1!vbAW!*lDL^-vO(^l}X3?;dG`79ocM0#$)k%rHtL(M$uW;=4Uk3mJ(OT zeGRXlaws_Cz(;z!6kpVxC^pJ}>yOsALwI^F;>}=OMBqK3 z^l34uPUs-=(xE8R-h14%5cx>_>U)aCD-pnL#M;GfQr}^F<5653ypXnaQg=yji(_ua ziZAQW^JaN9rZXl^b$tAEX_=F)BCl^iv^zO6{v;_2og_#1oSRdxr`#W^Xj0>_xygNTXLR0T`ZYjGw0Z?;nZph>B*4jL;|Sng%sUQqGyq!?0? z$e#Ff?AJ`DWW6@)+Q$oHH=StSWn9Jf^?ohl9zrm zO;*;q8Y`{~6K>(#`?mYWM3Rlm{Vdy-Dju818O=$nBQ;O6Cy-ZFe6=YSG<7m5Sh@ye zHh)#Ka$s+^vD9cwU_)_#C}z?X3(%r)mhMo8f_zVHtJitY|ER#?r}epo^-L$T{c^9t95W7O?P z9kojiP#8dIAefAKg!(7o9Y3z-M5j)EayxI|v@j+Et13W&1wMqc=!(hqOl;({;hAdd zuXQF7BVnE|Tdveu2aV*^@dQVcdAJIETs?D#BTDvyyo~1i`e?JpvOF^!`($kye;hH;|`y_h4zc4IJ(4%HItow8ZDc)V9G<~%? zt~ahX>&R(;b!uDbZ&g?fJzYAjqm><;tDRQ#aaiX^H^eNT9-JPqzu& z#ZWF3X=SW00Q$-JbiM6iVLzyUQK}jMYf|41X#K811rCCK1^!CZh{4SI_b}hv9xPvc z(2!Y`t<nU2pZvSkh1;;EBhmcImq<-46F#$ovW|* z2H}-J>^S-7zB2wfdUE@YJ3=do3uw9Rt`^=b7l=G99R zCL(x6H6Oof`fj6sl`IQuG9tx7v30pueIQ-elL4XwDWncxIWlFk+>75O^f5CRD8(zy zcFgCoY8AFxTxBSjUs+x3XUesxWp1|19E-*wJU;xkW?lOD@`cjy?x(zQrMgMzA%;dD z*mvhQ6uAyFwXuzNxdB!Y1(VE0Z-{FFeSxM7`!h&0kx@vj-_6~+7=F`R)-NgKT|~qx z0NbCgLAZqO(S|N4z45$t_s3`e=$0C__S(}-Gbjg(IG{I2S-7T{%Hb9urExBCa2Nnc zdplk{3Q|1sN+6JG4#g9|*j^0Mms;EG+0>5Pg1+-7AT8Sv@P_CJ3>sN3A09NA6zx8Y zpi6Q1lkEdf&;p90aLz-BRbXr@n=M^$S1g9VD_1v}>+$}c)b+0nnXeh$v@MY-&e*ck zrm|PP$Ju*%<^D)aZq5blX);5E!N5rlD1{sewZriukzGGA;F8c?{ z<87D9Y<=A5GyFpm#W3U~LRcR(yLw5po5>JMxW`iv*->kCwZ}}a=*oL)JcVJ>{bcN@ zC~H7%Cs_6XO-Sp`G#*r#3wR5(a~A7w#v{f7iGf&&35&&P51NGeoSd-`ir(@uBR(WtBM4wZ5SLY948Y4aBj0hL z7eb^td>yy^>&NTEU?lP?m_ACYT%e3asd^|g0DAgZrHe43>AaLw0>E3jaGk(>V`{>x7i{Tj4U@P|!+6xzfFNiT;npy5WdMeV&CCv?$v%+mcMno^ zL1y$dBSXnr$f%JLeqS^4r2-cQb1*(Y$BHC^5YU_jukHz{5vPb>0E!~M`jaIV%5n_C zX8zBhH%xdYhTj9GFauDjkxe~`W`mXwEDJa`kjdBvn-!83@R9u(P2ojh2T;kdFoq%2 z5$$-CUj{e!(3r<8C}n@}-@$z*9X@_+yDiM~c?)(t+B{M@ErP<>5x= zOuy9rC`Iw%?i^s>o(SNVVSWt`avbC&1{D%`3w!`%f|o*>od}JtHp@Rn+Lg|22Pjgq z*C8z8iuS!a{cM{dSL_NjQlFRhIx{_M0XGV$A4hIn0AxF$rvY+iyV<#-8W0{i=qUih zWIOdT3s4C}Pe3UQ0>d+yqcy3cYCyoDZ9we`a%O|F7EuY32kZBtT?V^02?(N%fhZsg zRh&=L~OxdvIzsKW@tk7m;o9=L+k65OWYoPl^_)om{m5vGTN;-M=79RY_i+0Kw-w;P8sI zYFYsr6Q$LTZSCD;3y*!e*$b1fg8&eXv>mUrttJy{1@QmW%NjM$VIb1bKpF(gVUVHY zzXrn&iofY}-k=4&?Poq|opjY1-Ule!5Z5NPZ+94a$w2Z(osH-WNg zK;m5wLy3x^`FkXhz+8{}_sCYbcI8xtMp33pr(Jm$ZKu(fCM$DV>M{+D?OgjAUcI}i zb|PYgURT97Bsc(58&^%}@rhx^la$0oD zYA+m+PrG1fWvO5|y5bgl1x&__f>}KPF#xb?3$5Ol1V~n>g5cmE5dT6hvIw+1oW{z# zSWv79?v(*H4E{JE5O7DpU&9783xJmPP_PKOdRnH$LaYujmVg2&7JfmldJGI$G|TPp z+TARyj9cUZP@Fp#=BGA8&9goZBm$fft8g^gA(j`0ZDGXYyGTf83rEyp;wMsNg6st; z^dV^XJ(_F>OK@eqLpk?xwV!dlsimY-Y56shP)IkZ*hshLG5eE}IcTR3XV2Sr?~s2D z*qS!ew;=sVW~u~$Z)NXbW@jIkG8TZaapbK7^oIt?InnLXX1X3QEQGB6Fwf{LQbs`4 zHVNnP&6Xz+6)>X*JGykkw|zulK`k2_KjtA#**+Cm@aOsOIlbQJDK^Z=}kanc?}4 zj85nTcc6jZ3868{U6O`{aK+Z+tBE&r)iCv+$ zMvvs0$Y2Mj0NnJezD)bxhyjGF1%9fAnx@y>QZiq)HQ%S|8~{p&VEfT>oB7gXQ9y9s zQkdSVAh7Jq0r-|8;VcP%-R4j=H&u5A!unCbK8$gjKt%^2eI@|oD(qK$UBHREaRChw zq!3@mAcQ{Yi5@xW{h*-%^IgX9?Zp>EfNK!*!xJKMGM-U$bs)K{;#!N>FZ3pv>k zj^O*>oWu_G$VDXF1=1g75JgHnBMt6*q5p>H((wJ$yfohnys8K_&2h1ouLAf7o;|+v z;&*x77oSPDYik_llGuF`ZnJSdht;ch!hdMLv)Eb2xJ!zA7haJODy>QZ0fKRre!Ymp1n*M zyzcS+`Nwz9EAP{Yx~D(=Ip>P+>N@;z`U*2MF4Q%CVN!?hVS^HYS_6ax0rH7}o`Noj zg!;-zLBACq;y{o2UW2dMvoZm?7HUnKybC9e!>GuJ3JGX+K$VO}Wuj*QP8HOG0sqd_ z_8yeXBQOk$qj1oP4smBdV^1cN5h!Ya7C`-=P-qG zd2q?I5H_gvIq6H32(z~lj%L_`M06H}-Dq07}w#d7KTB zV=e9Jpzc>r&SN}VN?td}f#pfTxB*-l_bA#v>K{fy+Z}Of4 z*kH0O`voD!{})`@m8*Mf03mOWhk-lV1;EuR5I!}-$1=|-nPrFXBwIZgxru-ur~ob9 zDp(&YW8t=XpsI?F68T`$lI1DDVT<4#+6D#L4d9dk&W-gt-0^o*k_$CL+k4_UYM298 zqt6xFW@)|8fYDpb&@0AjDuSnDL8 z$+7giW%#!WWSRSxcc|$MH+>|r{6tX=^F$Bb*Xm7rcQ=&VZ#LhC-uPj2m*dw~kwkWR zwbH6-SW;G10>X4lS@c?XIalZXO*2)rn-!NTYc9q}K4Z>$PyOb?<*mq$FV-|{!O`C@ z7gxJ%<>HkUPCIsGc*LHpaMa?-3tVQnImQ@(r(-VZTApz(v*QJE93U8HQA|FpDq$G7 zECMoJ@IQ^M?HniYMM22OJqyNBkT65^*G;vlA-JY*#|obetM4%6Dei)Jr(kHJt~yNL z1vU}Y@W>BRytnvQ>SI%^bw#^mJ&VKHywk7cZ;3lx>E%mJosR6~Fhq9>wl=UM(SULW zoMb@`0VEDXdm0wqkE^602!hy`FA&)>VE2m?UW^n-N}cCf1%!D|qLFxpOFQsOk`4T% zRpo&b97Vt3ChOQs0Aw4wj@#ViXqZExFjLAL4%t`CF4Sk^rNakYf9LhN;+a*ZjE4f z5I;2mFNb{ae{BKZ|HBqAg53I0R$yw@BxG=8c6s{Ok3 zQvF-+LL4J^g`QJxm7OJiZpXOgPNMBg;kTXZ23&p7i@#!tqnxF4+B`WoA1qDkmD`Av zPPxm6QOizsO}A{Si4iu)z2O#$%-*QRo7eX8{$}Ho;Y1Qu5Z&EpRVBes*YdTvXxHR% z4`+I)+5^=Z#WdZqTxH4Xd!|)pk9c^#uBx@=>#DKEiM*^3!E@aTf9kQ{qGtQe!*}%1 zQC*4bq$H>64CzSz25TmE!a&eLGGfS@L4Zb^CV*Z9oC?aCadUky#tBGiUy<*G;!{yk z%^X%BPah&g!HC&Zpf5ZG7d!)By)AH?_HZGi@p=t8Q7R-JDG#uJQAkmO} z1msm)9i1Zz=28l&E8FP@QBu*aL9reWa6q_F|Ms5-JRDuepBzTCRPWu+(Wxxj)jGr+ z>Is%BF~k!`b)@syEgPAj#>A$u;v5O)CGFcTzu%f?{eFKDJKvi!9NqbPwtT=^=FZQ7 z1xthO}ge)^S*=m zd1cIndvv1xdu_$_hQ!2W!8=n;>5KOfj*rZ?Ch@L*a?IDxofv^(BeT<&Jqci>(6P`KQJ*kf*itgFPfiHFo-T)>2C-!*EL)- ziLs>?vAw`hWP5r;VP>F`k0!X-JEJQ8TTfn)*U^m80C;p?eV3Of3p;TbcHv`mYcFl?0G48aU zoJGv_n82TWA7Tc7TqeiM^7$1AOu#_$)X03yv&WV@`TTdnTY{vplaAcElewHOLFK~X zO8eV4w@*1RwZ@UuX-VrisXesnY4~NYTeEQ(6Z@w4Wru(L@!@x$Qw1HI$j@c=8Nt_e4u0JVA1xRa5x7VtiZxj-RFL%c zX*b@od}A19$)P#VbW({j{B+}wIGfWK0%ineB(j9CVUh_-B5@iUD^7htV zd#5tW%%0g~D}?OKGK;Jb_xbMg`F-#2_xJrB_c)IG{_l4kITX2E*ZVcz=j%M5&&L_i z|5uIRE}}fbknuwN=Bjb~1@uwRope!agnyJx=&idQHKQ;d+ieWJvqn0?t+Ek$RM)?L z!8bEPg-Z(R8r& z444_@wf%0UTFqE0@dG)hNpq5`)wpRu)C_Y!AwiPN2( zcDtI2N9spLdoK=usVOlFt2i9l8V?-(y!%VQH(LsmUoWZkL-M%EtAYPOZ(-5Rc<12C zyQ6Qs?plosGe+3AWLeIhFe*H{{cSC8Mdtl+8Gc-NJIyK)gXHOUbyXtC={>pf#pOSV zdQxH5%TOs1mP*c1c=JlkAbWD~m3N7J_1L%#u*)*?~* zr@i8cDzt9%MBU6%_=o8)J$=wA?iH_(_R|~N&2yz6pFDWdFY=qK)@`+ad^lt4eVWYv zXoE~-TH&??v%-Fmf}+hW%MX_qT-A^uAE}H9{dwD_F(J4b*_Vj(5@`K<`Rfy7L!`?l z>b$&7M{9+%=e z1LP4|?ov2thv-Xa7d;~@3MR`Bi+&RpT@Ya!&X!6XOGo*G#&u7}`a}Kec={=B8xEe` zv67K|Srx5If(;++96nNJ7G4M>c;q~q`fl?VE$Xw!)1r8Tu4fk4o??S|4=+vN=|HR8 zP`%upk^07vy(O`b6Thl&2~~a#ui5ZVv|{c)wC2jN4mlzklf6oyEKQC zkM-cd$Z@%uFSgJAdsk;0RWECmR)+d8hNn)=Cd_tCMe7iy{HENnGwndcc{S+Az}}16 z;^m)H&FU{E_ufH`&D)@p3o8kBSyzK3RG!tNffqMx(2BG6oKK!up0;2oiN;kS{A`mK zZPO^q^`#I$c73PSEHOYm1jDOenbK`0s`(aF*|vpR$+lMd$&-q8p}Uo@G3VZ{yN3%3 zHY#*Qkkx9rvI&B|sYZK^67RW46t9Y?ADBjqy%#$QytfqyZn&quJjs?Uf~(c zzVAjN5C0r7F=#|5`lt5t^v3_Jy?i`{n7Bah#k+h~Z%7=*Ck?cwAk4-Bn=Oo35QQWT zl1&F%0?|_dPC9j6NnW&G!M>H)G$MX^-HB(SSPy#)oBFJ+| zJny41CaTC*9|w16jv*h+&&P@RaBrUZ%=5$CVz#IMrBb6$cwWLirZPcJT0NWJP7^m> zoeXv`ehD;sFKgd)4T)D`xzOYOW*Z?)I?Mu7^Z3J!4y+-*kO1;!>jhQwa>x~thEzAJFn-~CmN17@hP|29sTOVV1?N+EvSss`)m_jKD$l&A4l z#Z`T!L1nT#?-i0lKJA(LRwo}!Ul|J@3$-+Tf7{lHwlXO^>;c|Xe(ezRq~G)HHj^?Q zqfRVYB3h~M(~m!wn~1M6{N|jH@Jd~?_gr>@i~iUZn&Jt@y?f`tiPU0+1EKl+|GA?; z3p%#_KXVjticjA=g#_ur5u?8c6AE>y@ z?RbFOnh6pO%+d15io<|>AE^QTCszO{XA8T8Ij&r}#L4OUyB^Hj$46f!UPp>X%SUBo z{5cO=M1F^w#e-cj0m^V$q=fU`63*K&i@4}~8{znQw4&9BbWk#ztakEmHSU)<@q#0x zm^Wyt9>*sdN=Mv7PUIxF_)v`Dp6Am2X_>AEQ++cj+XTDBdQ1I-N`~( zQ@GxZz8IE~iE}r%OG5cMo~pAFpk=ij&R14BF)_hOdBnhnAN$y?fon|eE%qh)S;QfR zobaK}Wm}wq9Grnj;{AlSi8Zm0XO(UgoB$EUk z)^10Dpsw7V9vVBIjw0!*a+v7{Ep#*gsJ?UOs2wD}r~?O9Y#7mO+;A+w-2E%O%-86N zZxT|Eo_xh=*k3=vIR37~^`MM-2pf=0UqJuN`}K_h)~`#BtqZWRBNm+UE)*OsRrnB$ z4P^SDo!VLX)3s0-V1w!)NNEx7&9*au0gzUzgUwbpKwm&Cor4k|(nYJrzEWJcU{s<# zC_@D;h7o~0(zg(iqyBz}q=HlJYeT?iBl7q@C{~g5J2XKY)p3u`8x!? zjPzARQjQ3H3F;Ti9jjGe5CtGoX@y)%7E}wozI)EhBj&-z!>RB4AeH!NE91PrNw*?4 z61V_q8nB?n!VF48be}=4L}&~KT!3gcE)auj#J$dsqdn78@jKBp+Oce60;K}^x`oBP zLi~7I%`1mx&W?vTw|$#Qj8jxyi`6>y9k83^d56c2xuYIh8-#|fHMlaB4UHam{NQJa zWsT009q!~`=e`+#4ZXNCuCLzCi0U8{wDDaySm$OmqF(s4eb2sW7?WiC$-k@eDP8{P z0FSi&$BmZ2*#{R{8I;f=@~t1MlQULlwCM8RC%cRUbUIP>XYm?Ttf|y4C#=bOqZw=_ zpGoy}Y`E2Jnjc)m?XHj*J=t;>W4UE)c+lZYv@9C6^;<^$J3CsvJbCV_z`dhQPj}Dw zC{Gw<#EpBRx+P+ues6hF0IA~#yAb^m#4jsOGix*omJeHBqK{9yNeTa+I8-rd&uMd z!w3<|BJGnw5I?@=CWoQ76QBjGczaFcQ z*VAL|yO2P&t<+`$5tXASoWwu6e!1KShHSBK_>nIyWv|0jDb0(T0oimEq?5+YrU!{=7ctEV#+yZY6$Ah zXpL-+L`1m0JSe}PuU3VX(GhVa%Gy;;FIRt5w8ozFMytJWVFX|W~JKlrf!662+fgjIS~eqZ3#Pyq!$Ku8AhJ*L`xMEq;&hWtJAZ;s(q8x zxtrdmsK|bmGxOa$A{%qz-8=&Qmx^L96;W-Sd$-J+a3!jbSa-3i50m{_Q>CqMS~^#e z|0I{N-BQ!M$W%4dI3{N>({ba;{qd!@J}_t~aiA*CLi$~$Di&$wF!12Q>ml&d89`g{ zNw*0*EX0Fi#3Ro`-S&O&o~EW-+d2bO1kBA{GpIo+zzF+3j6(qAbxXkl0#w@$@Xe;$ z9s(ceXP<+2tUSx9r=UtevZGXu`=?=h#rx88wUH%Rd9dE|Aw1LKqH6%NkK|KmTg*rO z6TFF_O8zPF3(>=av_LlvEYEd_Jav%pq6J_bA!NArXv?f&ul_rD3-!brM*2-n5w!?- z_HbVSoXrc$#2~a^a}$L-guRnDEKqQWqC3(czo}uR;OE`5D6?~O34~~&p$e=iZxtPX z9qs##ZKFJNZ@-x2(cV2>=Hwx^?xh}dIoZGaaUYzH?w72{(o#na(AB1cu8!DmsbJdZ z;lnnyEI2cB-5nCFaSFi5_{+6PJkvy9WeeF0qpIJU(3 zvA0vPRrEyoDi7}&mk!%FkfC35#M@EtlGw&#aIJ9`#Y@BsK9`}VR?t&-YSfzaDe|ot zOnv*x`DxkD^kO~g64O_ps!SqQ>OIw!@YSen>@GIOEt6+#xsCEp43Uh2x_T^oK8vot zqOHTV%|Y=b^K9zHMd|$R+=4tt(jqiv?)#g!wQYHwaGRC{74R5m1r;nLOY0O^S{21w zUx;80H{b?}4$K`L9UCvW*8Zq|<6>C9bp6$UjZS^e4#kR1y6{Sk!1HME@%>-u>*;*| zIjK+D@_&@n=bJjIX#kgu^MDN1CWQD|-#x)Kgw#wrL6?7x2nQIY{Z#MRqKMG7I8RoVzDNKfv0Y3IwY`C^7eota2f)<<=XcXw2O8-tiAlgVV4O zZXwLn$Zl-_0XQVKfBGx49>hr+sitvv&}0!K<3y@)sXeJ{Y3S-h#!PPY{(d_PsYcti z{Y2s+rCArXe|m-!jX8~B;v=C7}p zY+F{|IwmAy5~7_Sy2-zhtxsd4y=d=i@*7ZX5^W=Mrm2?sXNsKa<69DNR*Y8 z*<6iF8BA#KW|c)tlnFzK2yvB(?w<-3g}c%*IbnruB@rqjE#dxb0)d5Ivnhdsij|NDJ%j@K=V<16O( zHP5HE4!BI1s$Iuk^t>3*%a85cB@isQ5@ex`RMjs|@R*S--O{{TEF5`ofjopII?p{% z3**NltUI)Xj@d{?2IRH?ajZC=8H~|m$D3~qpKjPvl$cVQ%);s zan#v$Xe1FqzO6=aAz}Q!dK*Vdm~oPOLqzO@)Vn@S^b9sY>+DJ7w2*&_bguL$(4g!@acr|VSBY}A5EA5DS(&XV<&TwzCc=PUXt1GV+S#A+w>@EH)S6e95 zTFlkv7D>q{)X0W&1*l|Md@emNAdIzvJ2=gGo~A*4^U_S?^JxY6u~#PSQS9X6e4qTF zK98EwxZdAxhGGwYUJW;=u1xt- zeTGC%z)jALcNWu^C{EZazTsMk#Q z&?M%fw3VH2?2^?=>0-iL#6zzPdkUXS$}%};sCU{{2v#NHDG#wws(jM(znVf4%WSS@ zVh-2(8kvS#uj(l@K39pf$EJAQ=`?t zv{6E9!a^uNuw8!_}1nSPPz3s3onHnpCfF#IF0KXi`nrdUsLYx&30T(pZ~hD)g5&#hv$; z!{hE=;7~l%QQ?EJYtlby*n03Y{-AsgJRCsV^w2~Tkv=dRZBws$#%H(s_2=-s>P31 z1CMzd29F&MPUwHH#GZ)E4WvXpQsQO$slr1}oZ;Xqsmbu|iMPRN=i;j&k_;`CD*pMg zu7sQMa=~jM=Q#;U-%iTEsQe$w|v3H&y#;l&=jLQ7cA`*aMLU>j~2IU!||-8P|UG?tHS=*sZYz7xPGe4_)SuIZ3u4Q`IQ?H#dYV5<<8xdr5|u-9SG*tNPfpDyY*8@ zmL-Dpw#n^{CgZgS=UFW)Dfh=|gSrHWswi4x31aQA&qOGn(agXJ9eqv`bHz|JaLD7P zdin@n_YT!GCjk}zdcJ;{<9(g1o#V$a13yxj87)cr%0>|}<2)IRwSvpqTd6n^}=zP z1I$_6K>oAwkO*V|y{CPC=ynKFDT1RdAKaYSXi{5!`kYWf>Lrd)y4@VXlPc4xj(=c9 z`Z6oS@w%iC4P#s{i?s{0g+yr5K!|^INV3bVmIzY8V%8=#MZbXk>qIzj3VvC)SzzHQ z;3(Ho3|X~1>RhwojKv{AC9Bwa%e7Kc8PW#Mx1Ei}ChL-JC0LYqs&&|sGiN9{>RTk{ zNa#VECzVf@eQsjxiI+*>4Lwf&jW6PBrhC1M5y#bpwF&+lKB3x>V(${`(oXb`(~KYSMJ&&M z=tfiawmc_q>+NbuFl|jFW&WV))5pa`$%#%WQsHK~`1{?G=d~P~(=YPSJHubxi8*&u z+^Fd9WLQux<&Wx(Jr8IfJi0%8`gBiMm+*Ja#t3Bbvc)3(VW7E@KDX9^8xT2w82TW8 z!fvJ^9Rh%45Qs8h7!WrGWZ{4gB~U%XLj)}H&feamXJrm!IOoKgKo)ijw7!VpvZK%%Pff z*1j9xbv)j^;La;rlG6}_$Fe?>B0!bPGqzG@N+{%LQN41&>bXie%e$5U8KJ;h$r={@ zn+jP)A~jv79`jT5WJeE<;4ISNAuC*8XS-)DksxtBc|lqXH_x`3E}$r0+Iq2mGjVgg(w zHlFE$Zn5&L?4b$#_>p=m*)Z#+K|!*5iG`U|0kxix1aHwYt*unQCZtcMfIB#=>FhhxAIk+v|W^7jDgc?M@+Yn z9<_^_EZQ)`4HV2(uM{nH$~nAj6MX|Qv+Zt8I)AtZvlldD2uKwYExOzx~0dxRqCI{#bz#pYU&VvW2Kpwkm zT|j00fWSLJ-IiNW(6)%VX4#K|qh>*CN}VzF5!5y;L6(dXbB_WFZ3bdI2Q~qM0|F<9 zZ_i>2;D8`yW?VK_{q0pKI|m0E$}8hmuMsHD?2ZQwG&7xbOoCsM0XBk2gj@SxbB5HO z5QAaLAJiYh=n&}`d5ttx^Eg$RIC-l$d4;405Om1INo!mv6Cs%uk|xuhQ&_Vw&?s%i zsxnrsI!dlv3U$%AF6qmnphvc*`Qx*~{-mohCWl-uhZAawJD)UIGlF9*VoK$8VtHz$ zt}esAej8>O|-iRTDYU z-1j&+#<%3jkEE{S(3gslT%U9+;HOyIbf*}2U|g#xOm+qKG!{B;EpHupg+9|w_e^i9 z@Xxm4l*?t7g1L8#3iT^>8x1pD5MQJ40SQW|38;w(>4q9K#2}4*0d74Q4CsjR0YtON z7Xd?zsA)lNj1Y>JK4g4Az-1=1VbD*m7&`R=^`{kzVh9-vVMsw}4oNr~$Y|3)1+^kV zxH63te~%Dc;CNdD^cB#E|1@Us8(K!%U3h^C5^1Y|0H=s)1rT0c2~cW4j|=Xz)>%g* zYCK4Nv4V1Zi0{|*UPA{c2$O}KS)uJf6HnN-6QGGb#eQ!Oy#u-z=p4b$LuX$-izi?= z?sB33U2vxsST`3~ELd-&Ukc53Qx7jjtD9pKAma4beiUb>5oYE}#G3}$fa3SD+T<2~ zJY!bWLV6P+eK%XeqFHAj)L7%@05!Y?zhBm(yz7eTyG`u8m63#|MEuJC_gwhj745{o`pf@RoCgW z%9M+qxD+|5ETMDbjQ8t8Dc54qrOZ>f4tJG|6D*=%$poy8>vbl0X!M3tE!(_ti*B$z zI)1rx!2b1~VW^8lUo`h=#4!UhA@;Q_+`Z@P2VM7L}ZQg|{5f^2v{hb{WE-o(EdP&dX>P~4vtbw?> zq2gcyi*6D!jmUyQ+C&0EE*%<=he|RwefQoI1w7x5#aH7LR`?nxWEFpq($xja>kT=q zL&&Hkq>?C2HuTiLRdLY1xm+(;n3!vqTGPgrLs?~`m1gFd_%_W*r#+XfFeL|=o)z`) zxPdfo1h=P|*QQ|)`%baxxh)&T6a|h~6t3%6biZZjAG%v*m5)h|Q3`GbHBtHYJDu}0 zq4y=F@+oF`QsMKuyG30iQ(3Xp(cr;j%-vf z4eej`kK{kP5=fEX=hM2#)Cz9VXekTXr$#v$Dmuw#7W#-4l;nZCPkZ*`;nzvN)AWH&W;zLe=V+b;~TnLDf%_3Iyt7{4X&%l*>T9 zB#pKR|NrDInBR1#7Iou7)HO4Q(|cI^Ai`+_jXgP}Tm?__8;hETRklYN??UGGI+^69a6H6Mi;e7}AQ=xIJmQC#YE%Q)0B2UF+3)-CS$ zTBzJQ0}0ilLdD&HCWcc_I#~pl&=M_sa)5!!jyEx4~D z&XQ93>s#EeUPG^8)YhEu#ji3gxRJXGhls8fY6j~SxTV>jyYcp1(`*10uygw0r#ql%3!p6Xqy0_j3zIo^4QcI$2z!w3 zSCHAf7>Op?1#IT%husp zK7yFlGu<~buKreQI3NpI@xb14>*gc?9ly2e%e0j;uj-ul^kVCliV%h1gYhx6 zR3BT;c&(h=0u0N+DqyRcASrMxD2iTv_v5Bt3DS7)83kv`)Sd6s^@x)~FRgCu+upO5 zmX>Wd2CpC3fTRIMS{B2H&Q657D!#Uf)Z_6U7t2UToQBCCp>YMb^S<7P633mB5e+`k z-*#mg%%FRtD0xW8A=0j#mQ~p@2PuT{74jK>uHE5M ze|iscS_EPG`LjR3gy*^)2j*c4WDyG-_wy`l2~5e@y_l@l!Z^RgC{+7z<)J4vHZ6P? zKCMqP30<*;``4FqK7w&=bczDGiuUXT(z&HI?5-YT^F1&4*d((%_6ZhoGKC>yH?8h5UW1dd0;)bqy-DySkr>f6J6dX%_^W zZ8=gd_pr7O<*P9)2EHtvqW|>wV*FRZIjl}wH4Y|p5O{*(OS&yMV`u}r+Wi%u1;?Pg~*8^>m>cdMYlRyu8~CDthZTGZ}jOnvdRl~ z-3(~M;MEldOu#HnoKmhR{cBH0w)Od-QxBqJqpVWY+w)Q?tAgWHFL1nKxqI`G4C(Bg zpR`h_LPEl9QH0>chKXcnLCuFIs_P~6B1$>kOIee%*`2e-o(Vyipv0&irCwUbc z{gMN5?RVS-_GdJMPKQSOYbQzbN%Nu5s66{m-?qi@uS(QM)I zvyMVQrCl&Pv-!)T@-trR>W+YR!PV%q`~;B_%qZ?G0Jc7e7_Mjz?Iw@B@aY>gf= zf_K3@Owu~Wu5WMu=MV~)TgJ{udOlzS1f%MQmIm!=m~z%b*#&!ViMaS2LkU|jMknFEsZq27vox}6P=iSy&m)yp${UT=o^($L(eb@}z-WlPfv>%U&UurTmqp(+nZ zEWoHU482spp3D;cz^Zw3zT-cE+JA4}9s&f)Sp-?Rlhru@^4JBI!LQ$yE+4;mdg;Tv zXIFU1J$WEca^68Ds`qISQDQubSyqep89yl#DgH3j3UC$;)Hjae=K8K&jj;sP(5#ca zy?uUte7Xu-ps`Vk!rW$QieydH1DE?E($iIqQJ;%ELU-B;&Y4CxFp|irw8|V@+`0Sr z9THu*3K4cVRgb5%40V*`+WM%rHLLmg|!m6C~abReFJR)db z=${Z`E0GS=fgBmxeSkp{?n2TAh==y^#Jb!3&tFJGgHXbqUtTxBc?O9Jv+KGJ(G53W z4O|`4=sH6}S?YOb$}~tf>w}3Cd5DmFTYVdOy#mWSmN|Md{HvS(LDG{-=;+#XWf7lG za>pAW?GU)3gv1i)yEB{Tm51-^AGy1Up*l+VIwDe(JlCrV_nf)v6EJfil)}U;Pt^%N zoOF^C(XSjs&e)vji&N<&@r0qz_Aw5#q*n$lt1Ra?7X+6x*OaU3u?>1~BlA?E=L~K>gv$sjt$n@`ml!cDbOgEDJ|~(K|Z-Snk2{bgg0usX`$rCQ!B5 z+i$4RtV-*s1Goc-q@xz|p`RXdU6IGU8mhVB)ZhHqG_3giaWr%Fl+~S%!4pvx7m-o6 zeqkZ{hBU#VV7vC%Fy>vC$$013dKrUJD0vl?~} zoS8*N9p}>g=4XW>{`8q~F<`_jL}Z^lO#9|ibSKT(@y}(FGobPkujP)~#jL$WGY(1> zH5{yvTT7tVrNHHv>HPTLo?+YDUgBS0mvqEAKr#w9LyHFlMV3%R2z5Z@Q&#<(Q@RVd zdmv;h&U_8#eRRiJFv>v>iz^0H@}7V7xj}B5@F>UEKzKiYKjc%91nWN@b~pckSpWWJ zIX=Ace>?_Rk+PS+=3B!)fU2}tJI(I{jbU0K7!Rwu|MpsyFirV6BpjADhd_;8`NbLff?gSBZ3n zfeaNk;9*I@SLzP`R$)ZmI{w(Y8x~Lnk!D%*n=U`XkPE;p4m529E=?y+46j8Pj|+N# z!iMwnPf(z~fD7d81)+zKWf_8Iw?wTE?$&IBftbgBQV6sLO#mwy%rbgLh+PbAPcGNcVG_?v8a+2%8!jWS0LdSd@kwF1#yXBzy!H1M40GD0UGJ zAHbuq9RYh@YBS1>aAR4>u$xRuKuH1Kh;HVj2Ke(xvTDZ*fUm+* zUh%8G-Q9{<%0p&=t=R@Hhrw4aHrB@*h6}jxX zP1L~U0u;50-3kduz2Bf1$q+f&eH=9cFGo(_|n-fMAa1 zDU3f5b#AJuk%F`hpvjIPU~>BrhOEs(5VpX}wT2cP5lBc1fB)-|b^Fcfp#NBX$ZGtz zCH9YR{@n~X?%YOR{lc5SDBkG7ene9ZgnuAkO?s>rPCojYPA7f| zE;umZV6`FtIV)X5T^&*BgR`_7RO8csHVPOL-W9}A3%{`+NlF6E30g8{1Dh=KHS?b@ zTd4p2{fK9wp@_r_4BU&zR0@&JL=ZxB;#dCl7ttBO+J`fJ5b$D>^70Up_xp_FZ8p&Q zA4NP*K`u+D+G|P_^6cn!Gk^^t$Vezv;e6Tgcr+KnRy`jf3R%_F&s~2if#ul$X@~sN zChYI4`~P>q{=7u=|8^Jt_mTeZA4IOepIg*_U!4EA0{=Z%|24RoJ2^9XwmQL5rv5G# OJQQS9q>FAE`Th?$4_-b1 literal 0 HcmV?d00001 diff --git a/docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png b/docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png new file mode 100644 index 0000000000000000000000000000000000000000..732c42ff768e9c3875abde67d085522c65e616dc GIT binary patch literal 144099 zcmeFZbySpL8#g$JiGcwsNEj$3A|RaxQqtWjB}2F5U?7TgN~18u3?bcN08%2|pwit# z!(NYq@B8iU*|Xo-bM}wjZ_e>Jz%%!A-&g;B*Y(UBIT^9zB$Om56zaHy_&o&_>OdR{ zMO<5ekJnaI%~sLE(AMFRwE;@{k*%evg{`UaYhlMpP-rs5T~q-kG50&2}H|{^!*h{x?gYQ_Z_O@(&0GR;-A-v+oH?=Z_IG$cdWkA*kImQG`=xg-iWzv`MST%7I#kP z=_Os)9OIVzgH=A+vbX*9V^=3GD^Km5!oWa1*up66r^xTax-JSI?oJty#g@uL3t5@g>ccSicOu6A64}w)$`7F|nmX-VPkvlfzaC2UUP(>uT}eqvR&1m) zPN_+XwKRB;RLHS(8^m))L%Xt)_t%G}crlat-<9i&jlvcMjj@<*^~}u7iJp85f*EVc z+;d7!G#x3aAO7H+@aEhLbQ|s}Y^E(;TT^&@RnNW$M8XL-Q%+G3b#rqgIeN4O*2&1p z$zdiex5x_DJkA{At?$Y*cJ=QfCJuB~_kdGy-a8--@20m#MefILcb$KUjA66SHc1)af&4mOJ(Q z3tDpWvD4!6sF z^yQ%A)2GSm*@j@6VSSTD41$7vk6f=LDPSdyceb~>i|kAYW=4%MZGOVr$sjo``^^N2 zAdr6f=EB=+vN5T~Es5m%HQX$+va-jic;sfwHfD~m%zx*febV{y5wAI3B`re7by@j1 zthhA6z;J7$4 z-enY9nwn8iz#};S^$fbN$S%fYWo}L`R>*Z}GNXKCt@lI1?H31eT7_9?zZP}CYajI^s1+)r>hE% zOm$_$`GRpuDVH%AOdC#F>g`#Uz~Ee`0%g1k$%zvxZ%$urO;My56%`%k!ECKBPqd~4 z-MMo|DNA3%ZmN^5nkjd%+7GXqp|h~EqMD0tyJhq}ifXhmR#>+ol3Nu%O-4f#GwPRW zKlbA`NV8ylWj3RtLPR-LO`aCBnMpVPE#S&TS9TH|BLjmtn4Z(14>z4b+KA)Ya9CTs z80p%^#^iMVfC@N((9Y%})s`Qvu#OKUXCPha&h`f1)48~j#@LK1p9^(uZOS1`%FW&s z97;(t(NnrKJ6p3Dyv^u$-gREPiC_T%0SE)vB$_kzs$0u37{tJ^Qei6PZd+s*E<_F% z-S?wOgK(mIRWkTnW+1vYm=WAS@!`X`b;2|rBs`V3y86_IidP|oSyjc-@}tx81eW;F z3H^M`&ejULIpK|DFvEL)|C4=g8`H3_hrSvEZX0IcuM)R!-=^fVP44?L8_n-9LFlni z4QDqXJAXd;EQ`k2SW|pp0;%w(+^JKiNDd#CgFh`*5R)?8xN)O;4cxeplFKw1BnE+F zV!oQTvB}M>?YflMMT`eN?Jj{Q+gRW2O6<>TUIx|Up*G(%AmZV!NGWl*vvvgIbSHlT1K%%k>I%b{iP%%B(*WB3JMBKpO3O8gZvSP1BYdGwkR8u)WmMu%&6x)37BZZ z(?j^Qn`j-+s$RVx<3~^`b+SWjvb(@4J|T6qrgWtr|Ko=oVowmZTIL6JH_bY215?Bo;*%LXTmK{Qd%yq$cCwb@&VID|B?bP- zq?&HZq>}oC%e?nVstVo_jM%%pyqp)aUBXuN&Y&(d9Lzpjzvi@Fu7g7%hIU*1b!hPS??Bh@3&V9jbU^_D0fQyA zA%R1fBpJ3GUzAFRdv24%Rq|bVTC($JDc5v(%{I5 zA;`PF%yq;2?c2A!mOn-8H^8)nTax9k=y|ijDH07EB1Te@V%Xojc{B3aEAS{VD5wFXF1uT`) z`SU;Ghf5)Hcv;!laJ)Epyl6tt{~Wz+%&kwKK8>u6#&SbM<_-@JAAt~j6Y2txJ|d@V z=M{4hZUfXF^(^_lk7vqvwx<@8q$3T%c^~NOC)9lVcJs)QBZie;`a{F)!2`ARNeq6r`l2947785dpQQX&UwR_SR<^)OoX2eXXjB zfWX4>==1)dhzK-DPkQuwbi-(0(L@JwreY6E!~#lH0}IR zGGBFp*W4K_Gh261wK?0Uu}(*f!|3}hv+f)=5bQC^TVuFIDRV8SfsyLvk!f1D)laJ% z8re&QA9g%NuxTfij zmyX5GvSgHR7o|WIfk;zATAC9n6A}AiE#J&yyXLUC4XHU}t)z6hUDHNu04kPxNV7hk zo}OKKW|4539nf2%>jt5#+J{E4``*2K?@FClB64zWaky`Ow|CJvF>5Q_mADW27@v&k(A0RhOt8 zUiu3`HZ$OP?(=hVHx&~fu#Jz88-mqcqoez(l&;0Qu{tS8yU3ojnO(-bHp*7)Fw2Fw z^Dj?YE-?4RM_*ozK=s8vKR-XxTWFJLF;JEW3Gx~T2SjH!gPAGt`UApw?yxs#OIB+xy*i@QC3uDRx^nI!M4ZYbC z^{Rb&-55`hl5&Ic--8mmvW+6aaVR1iLi=dWoYA8**x3gP-Jtq}XHzd%a&mGSS;g** zC&h9>dc0Y6KHF;W{@~1v`3q}pL+n;E^m_P~STu(6))W76FbUtD2rjdTEzcxCfArO~=e=vzzXXCY*3{RR z%=~p_P4mLaV;8vUTk)!_{Q7LynB58U^K3!aq-#%1-w5eJt%E^#O(XSiihTTiUz^L9 zFV{QImO50fgU1UBZe>*q=9jHcFrT5MbZR5u5Bz`{n3L9ZN#3_dXnpL8Vwy%SGhuN8 zpAi8FNWZ+yr^cxvj9K+N6w%e^gLZCK#}F;$j!XkC4X2NJdbyg{Pmu6#yuY-Vtx724 z3o~+XD=Vu)^hU_OF_TiRbnbJ*obKs5rA2E$Uyx??fKeh*FN=7rw|>LXYJ71R%=o*T zZpkh8JdL!fo}QliXnyYM!sW>hZZMxjwkoz+<_>2I?rW1m$4{K#g2IX&B1KIZE$jAe zV@SlRQ~Vi3O906XL6cGOu2Y?vy!*y3I11@Vn-f2ITlsh&F|o9qoPUZ&E{=lNIswVN z>iOo;nQGS&V9-@vU0F#1m$hf;#!gl*HB+WLxN@(T$pY^u8mam0diFt7FB#}5WpT7o zH+Spjrx#4}A6`m^FxgcS(lqlYgm>2bxXii}00Qe{0)7F;%x=t3#g59ls>y77)`nKbN`_c3Wdg!7aTC~TX1n2QW94lEa*_0ny?gg=TaqlT zbU3>xt;^iA#0TLYwzoFw1_pE$+WRZ2eW{Bd>$0)2S=-ns0yw>D;QDnn+$aVC>>oHy z+Vw5sE$545qxrC>uSyP$eQ9Y?6!WF3**?D}JQgPRl$`tqkc3J+HU23O8DY{2L3K;EMib}J!F_as4i z*Ja9e_3G6cbZ%{1+j7yURG9E6R1?AB;VBT(87^JA4WYBa?S_>421a*$ZTZ_3zp?V2 ztqfoUZn><^QR%ulq>u`&irn=)5fW@COnUyN2qgCA4qc3E1E0+x2pix@P*T#UvR`5Q zA_gi%DDd=rS!Zrl0XQEUstM|dCZ;nUD0A&Dam+US9u+>AI}UZX$I)}>&o7TSjbp=* z0D7F7Zz+pKK7F*=Od660YtP7#Q^x39o8HO}9UUzT-;xM-clSEN2VsYqsD1nP-QXuw zp3`jwh@VK#tTOb}>;wh-Jz2r!;Ll&aEDDsxYn${7K>=?tz$^!4r|A9rI@x{~T&ZQ`7M(Rtnsw??!=!lt*1eUyh^QMMPRj2pE_=n!_8{EHrxclY6gqMJB7%mXJaYF`* z3?NN3e0u<1re6{B)!&I4SP8+@HaI&28eCr*j|U?$Zu%fP?8#Q2=so6mJ2}VuH}+J6cm|_QRKo_XAEDKQh5q%D~+*I=14968ZI& z2H5XnV?c6w76Wc=gpVI@LcKGSC1}qIJW2*!p9p0zC=l3~HrbP&2w#m!qj7RFj z5j+hcm`=iVeM#QhI_vBWtz>YN$r$Gud8n2WQYX{33ax!7ef|7Yp)}OUMTg+KGZFw^ zEC459YHI51rl_FMnxQLoG?#N{&$@4KNK)hHFCtx7{8vD<#K)84ZuE#0rm! zf$Rt~ea@A6@7|qXp6>ZWX$4#n3-xzlqP|5`jIsp@q0nPwu zd%MWSJvg^<4$6xUVl<9JS#qTy&Ik@8-`~G~-?I9B55YrSw-d_CF}}jqT1B2If}&z# zb&ZXz8-9<#1VUqD2RL`MkD(!$r2;cGIb*aTd;9`d>#O4zta}Ngu^7YIbF2n}Mo7eN zh~Sih&y-!4(}YqPVO{_>GSJc88*o|lK}uibM;4ap`!inqbo>%+YHG3{m|REOGzn{OUKz@bY21n8Pg61Rb?4$<+`i%`VBIh}T{(MeH3^VK z>D=d|PV3`IHudCS9}5c$wHbJ-I_clyx+saLqZ{a zbPe;J1AR^U40IF&i^&%oCZI(8K zP;r)*p+D0p4Wv+x?YM$^T03(dw^=uU-`wWAq=Lbvr$-?|bQjtrC8UxY0k$VOcB~a@ zh2>U7Ss-$Tz6E%Bd$&T62nh`Q;O%`3h$8lmUo`-O(%+n+M~^pSYwPQk3=I-Fm&kgBsW zL`c*zY>Y`88qx>zX1;n=6xbPm0Exb|!fst*+<-d7W8KzM0N!!~-HL|&bVKRSS$!NB z%mS-HYvnUVC_<;Wd$WAC4$!qq((dh9@SC;rrqAS@0@mq zUkSp532iMYrm1D#QNb&Nq+94?T|WH!;yF68SyFa_RinW2{Y=s14QK5t&e0DTgoFg! zzva4a3wNKWct#94*2P!v;Bm?XCBfo!t@T7H9V=Y|@JVC`1x3pwi3fxh5zJ)G<-a38MX_i?PKddo34ZVmT z#H0doNN$1POhQW95qJBAzPCJ-AV`=MTpx??n(d*NiTXBQi4Q|}=cKr8uNhGbyJ>+{ zI6?Iv3>dMmA1;OPWh875RdQq*Sl<+(R{()3LmdFMKfHc@7*LZ2fNMUex@aI|fyKyy z0_*-bd2s-elA6Sz)cP0+u>fy7-mI$aW1Ojm+^rNzzkvT4FJ2UZfK}@- zH#0GT`}R$IZFMynVkN?CL9GK+G8ME@pf_U-e(f}!+mWT(0vU^2jF$M|u^)hr;sNL| zK~M(~_i)~gg**dkpQ(dL^~sZDs2s397r1dyj)Dobzhl!2j)*|==Qy}}D?|kbAW@O{ z0UR!P{@kxG%OjB%6L08aco+%)%n3T&?J&4fyG7oA`r*JuY4>}0{9Blmwkhf zm>&%bFm7w>>o{<3q~HKj$gXXWlvXHea-*)NM-yyZ@$usifU=T+(SioXPcQsjMgo+y zh%hc+xq@(&2M!!a0Yb~m*EiI5i}$Aji$=~5FLGu?@Uno0gV^o|(aWj;;Rju*BJM%Z8b+6#lxtJX{4^Pk0 zcf~hwa_{f>VuXZ*7Ut)nV*j)p!;FM6dCd%+QdlspL&uq=!*xg*3eRPvA<~*G4>774 zTUYaTh=n<$Yf+>3D1ZRjjYChphIec ziIyZk@Y4(uawcfhXhUD2(cGc61QP)0uOC>$x_)8cq%r_@r2sr;(k{wDQU;h4G#S`b zw{&Lz(y72dd_ZVoq>6T3>7$3XAH)n*FE6iVO^ed25(vCRZWANuT-(?v3;QdZppTDn zS&-l~ZoZdm(xC$0sSH&u)G|WFrO-k{YL|ZJSwT?U_`uF~{(WC6$1d04u+jJ{g+)aZ zP(3L_WWqu93iK5cyg;y_c)W>ysiWhA3w!XR8xY#C}Z{ByywDV)#_Qk;mKw;sui}Mf^bEV+Mm=MVeBvOOls;1|ECWy!FI@phK_t>ldq?UJI|}*3pQeb}Juc+NW zd}-)95I1Jhk={pck*{{$%g-;24v-H6506?*OiW+`(vR98Ob5{c{(k^)s0BI!ZNTc9 zZpGkA2AOII;>KdJ2C+apt>7SY z{9zHg33Fdab-6}f9yxRC(dU;_v4ewyoB%Gh+9D$&m{?geT(mOvHgd#teGBgM(D-To zwZ`wsNf)cG&)7ITa;CYo@^s2;#8s{nZ8&2u6ItmRof06524LBtl>!ms(@O~LPb{F4K`1J=s`Iq8v^GFyPvt=-3;*`*+hm@$ zERf?i+OyE(uyIZ0SpR>vnMSGpeVb==?;Q3_2B;$w#q%CSE*XdrCtDjUc(`j|0;G2I z!ca|M0zkYm;Q!DAWm|SVyDmmXJw@c>asj$|aEK&Gj5tW0P&RbHr*1*(rVr$6ZDkeE zVAuk!C#4jHs}e|%#~z}2Dgyj4K<6J}x^9dP4l9%YZ3qDtKrt8oo`ycRBJ_pRJI_*5 z;ws;s8!g<8RU%EF&Qk5}0rh`lMDA1A$$ft=kNYWY9GgEVmTLQe8b#%`Hy6k4!5WLX ztXh5CWl_+xov8Og=ks0L5|yb>pXMtw3d^oTSyHF&4&#Xg1V+3|cAVXbWp7sCG;KcS zxW39$OovkaM6x@8M@CU~UQ}jB?u}7nvMWnXO9S%fj*9>MnO~K)BW4*-K7+Y`>Qi_i zKR))^XYalJh~8QESk&N~bY62*Z_TZzQ7>W%z8M_7W)TXM4Bu=zg>R zT$<*5t_yKVTRXo3nq7$`ePk*NHMDPU+>oUIbx0xKbwN(i#aPz|H`1*pl+E*d1zfR& zM!o_b6yY;M*dQ;492FNK>3MqDWV1*OoIdaMTsN%+=3!*9)BhI3mSR#Bm$E8xRPwSu zXUbF~oc&uD&DUJ|Bn6~b3OHPwt7B4*wl z^!%yBcO!RIf6=KlWdK_?7s1_S%ntQ1pkTeH%LW<_HJBZ?9JvC#U=4(^gFKQ zt{(#fotg%_%)(qyxA5fD0&3{Yo+_dYH@vQPZ*1_jJ!!gc*KHTb<$5W)i$2%s!2YBT$%FX1k75QAvVkg_vI|E;G=wJL%1)xaHA$ix&~2Nf1M>GZa85#e zD*bNT&Fc~h&BiHTYyy*Fv!(VwJhV50%3xW(wVEa~)rC#W@54%-I-YPeDIu3u{L?Rm z_(Rd!G${T{d%AgsxS-p{{DlylmT20n^To-k5DN(k3577LKzyX)B=JMxf2-acXRL`3 zHZIZ9?rSxvZ?U#<2>VTtydWIJl;@A`o0vLH)OqUPcI&em6DKxPg3$awFL@}XM%fKE ztJ3L(6m6+ysYPjCL>2!tk7u2W8yaT1nfD$Yk4%>L>&Y?~SSZd}JeOi`k@B16JIhJ; zzEis+w-vu`MENKV1;1G!kjnW5$N~{8UL_vDDsfgdWs%1y>~ICqo~i$ot1@i)Ogf;9 zR`^gPxKQBoLQB^hGGoN(7 z`)|9K*jrjSF~6Tu=(Uj|yVA>0(0L%UBP1O8L;( z>O4b{{2OVWV&eT(a@K7rU7*CZb+IXF==bk+hr!zX1aG>OI-Z)g24l{B4Cnu?46WkJ zNis?bo4j^3>!gM}uc~gz6bL$kLSUFRs zG|H!VR7wmJ6mcx{&gWVQ(1A+`OuJQj-jMfl>}6{o};i` z)aag9R2-L*l4;+RnElq)AYED|z@o~%5_QNlhOu%wKtuT2T8O<&04nO$o<(q*c6rxG zz81t1NRP=|dU)|(XgQ4~w~#EuVngGj6)&Qa_H4c4cvq=gu6?RTBF-l=<#xVK812XH zyNvTn?d=z0+`{sS83_M`V06SP6NxtcM9(I;ezU=tWYi%<%dQs>sD@Drk40T3;-TKN z--WN8rW&hF#gbBz3M*nXVHR?4>PR)#LW5%%?7)hPbo)>IO#ByV1=bcjc=?K4@szgH zShTcx;ih|mD@5f4uLi6}KKWBu6bb2Gj34SAzk4WWBr=>yJ^gvN@Z4Mosm8jOeqC_U zw(9z5I~B^DY0r4Rg?KOBYS{Q`CUGATnuBac9VuVY?d~<3_S#X&hNC6-Q65M4c5H3X z7RF&q=#_;3 zB|9()^NIX$+>a}}0l2~-HJlr!_fh00KOCTY%;h>@ekw6-cBT#vs|7!A9sy7HX)q)5O&EmJ+VH*cA>DOY{c zW|>=(w9`f^kgeOza~Eg%R;kzl*~PbizLA$RM8~pf`Mg&^07AQ>wVh4z`X?#f0Ml@V z{2j@&QlH3={P)$^SeMk7oh=h{6>=fhRewz!qUpOhy(;m?mqn44#rIsvoz8f)JEXhN z&;!S5mcFj-q_rgidJd!et7l8{Jw40anKEXH-VSit_oCJ8t6r9tiIh*@(S$3L300($ zSTvbd0LqJMSJCc~Q4NM|&q?VSSa>A_?+aaub+5qFnl=Q7dItd%KGAqi%?IUm?cdeC zuVOJu_213Vom*VW#;r@;hs0dLmqRstLP5>o7ovM=u8^K;edemM(icXOE+;X4P{?-r zMN3e^u!VRTHbF&?1i6-S*CZ;WnXa%XFJAKv)0ao;mA)^F-SnwD;*tVab1^ClORWBs zL}X@r=i^QZ;Sg>s9Arbc%-#+;fp?mIaYhfDJ1cP&FCl&K+&xE>7ZJD7xxbZ*`}2Iv zAZ_<)US0CZcGVWm*^=!e_-$E9+2A4;eS1QDQnIx7g zLGUh(^AsDjd|-^4fr*J}g8)M^2>T0A1lXaNgD0ro_2oIt>L6SyC%RP*+Q~W49oEYI zvnBTn4%bGKRlaZDn{gwWSo(3LL-^Bq|Xk9KNow$tGKo}L;B>!xL*&n_AtV-LOVU8Bc(QVl1Wghrp4F`!}6fL`6 zxFJp*z%v(_)+2ly^wPV8 zVcN}vFx6Fv!pl+OA4*j`Ygl)09m)6#_;{@l7WASIoTi>_d zNa=<^=IKzy_NKo(=x(x?rWo$a%=E-B3%jKGdxf;+?fmSGJ-?WgqnECf9i~(m5?@Pf z8MK#p7_PA^C~{(&pF)RanQ}1)GMlx-2R4NvslZB62rKV7_Z1ZGAv>}&%w_M=ZRmaR zW;>sllnW(iWP7v4`ASsKn{i6G#6gn%mM#AlK1eWImDUWqp@elh-d12lQzr5Qu!5ty z^-2Jrz-+-jqTb-WBF?KIVbw-MV3^o(Z4FatK+m6q;9KfpxIp@_%1_ zQ_uUX+h&Zy4Z7sXxuykHiolH^$Uaa?3S9ztL8VIP9=VsPL{8;7{>tDZ%aym|x0Ux9 zNn5mEu%MMVCj*ftNMMUv@uGX=UZxT`MGm+^#dSI5`JFmk%go;P96odJF2*O<~5Z=dF%`U{rsb76od5%F)`!83>&4q`z*O*Ho7s5 z8E&?rNG*w=LMpjSDE)s@zN2uO^~o=>+1kE3&C>hAEay}^O6*Q%%NsPNhM@_8O!?Li z0#I}3_Hz8?&*$T>wg@Tx?&5$9a`=EpXpPAsP&Rv(I1n>%spXM>mp-n>8FVl9P^PnpbI|?5z1=ENP8^#=XYwh*Vd(b+PRDOc%6w+Vah49z zc+6jomrCt-H?g*{-{pu4vW8=2RX0=4Oz=D{xWlAUmMNoB%7WGP=aUWY6_^6Fu61aS zE+KgjG*e67@LTfsxJ=6BF;~6lD8Y+ntDx_DIGZGG*mUnd>FJ+tjA{DKcxHH*Hg4uh ztbgU>!c5;#fQJU+P;2x(p+)(V?iENLuQR)H<_1@SBc9&wyYC&K`zDaPy(!bDnLvxB z6vTdPC$OAGS-;t>vExp5<@`KH-Cv}>%x9THZwi-ksyD0g$3%bXC zRLv9Wv5d(N)a}mSnyTXOE>`0!xsSdj$E027c3__f%^oHo+ZR4Q9! z@u}m0nivQl+$Zz%#k-hZu1D<|WzLhZ2srU>Bql_#C>^?xVi3?)la}!-o3S=Wk{it_VsC ziur_u#p-VJX|3zlgkG|9Rj!rwKaR4$zh|Zuz&mM1cjjwKTNF!$T%H)s$kw}nofekl zbP|7LHU+@B^T=+ID?-GV=bYrWGp`v!g2%P52>O?Q_YSzM8+!c)Fje*`q^PNXmf@ao zl^(2r9zaY>n%?$RZikgky8>{V7N^L~YDOb-a-BgenfeRSTUG}A3qQl{io{$id#QvQV{JE4U?A=FQjXn0|O z7R7_w-LU%$ZJskpS6i;t@L!bcqjl6e`}Qh^I+0$gPAAOeIkw%eCrKP-iP~Kmb%>oS z5+P|6f=;(>1ym1RP`bPmy}g}j`>UXJTfK();?5S4`-{E6!Skca(dI8()qJ-t;#MQZ zQ^F{L6R-qMT$-j9gF*ciG5>z!Z`6$|;By^wZ+BVr0KzKc1nIF{*(7{_X!GfW{#hw= z8p8M)6xB=eU84Dfe?UwkQYW8}*QDJD>n$x=7#Qz*(fn#$p|DD!ulUay&+na)ICXo1 zBl->R2Nl;t3^{1K4cnrr@<5im6YIF(0ao_??)&z@=tHAJ@m%P|&$>!~P`p*XE1zJV zL^qcvuIVnWxn0P0pZkkFjdaRee|BWq9Mk@T;^Xy(fB%lyk?{d-2maCa+AJxG8B;71lh$s%VC%8_Dr7Y3zoly&1TTv-G zfBa#rjG2mYO9& z+kY)P6vWA)c=Vc&s-aU6!p-WGxe>;Bu83+7iiWfmz`I@7K^A-N>MuKqd4~CS85R>a|g>`OQ`$UNLc!~m|;`a2`?`?I;xiu`f0c?^h zzb+v{zs|p?jH&SC0E(gL-KW3yw!m;{CMWd4j=0dX+Y>?vadmdX&doF^XZjFhNwE?( zZEO7iiheIRe}cxL{DzpgA#T zu|YVFlDV@Ri2RRI7^ff{3|&fx-cDd~Qkd(Z&IKP^r{dPf31n*hWv;~kwTI8OtVUx~ z^XGHREP%IC%C;D#U~FFKxH=oC5J^AP!;RwEBf%@|s5*0QYo2XX1SPlTtY{Nxf3DOh zd}d48;s{iY+uAsUqTkE!1+Rt{=QVDvxjMp_P4yq6M~@PHm&6Z*NWPSi_Uz2JKv}Zy zSq1)P)sv>7l@J#p9qm^_ICp;q0DAVVVW zSjr?m@F?4H_uro3f6>BE+W+FMc%4!OTwSJwzs)x&EUhR!P5$q{~!WIa+dCCWi4xtFWIM5iZlMWt%nl1w8wf}G#;H8?#{Kx0%!H! zDY}T?(BGTPxXUEaK_aCg$d>{hAw0FK3drA56 zuVM`+JJYApt=ajXDY>kAMetQp#-rGtYvSfHz86NXt@K^EU~Si1x??`_QxYY<=iA(w z5Ma=KE4qvYcdq763wK%(NAuH}fp42Jv`r5dSix=fY>yl3-Kx|O%cBER0)L2G(nINz zI^f;XVNjS`U-BigTzpvdw{OdhCFe+*?0ut$w%K{XM6J?hMNI7%_@O)u=MaL+vFYVO z$sIoRzbAwyvIsETfILz^(_7e;9}UB>@UV{D)29XSgh~z!8toE64@r-Rop=H~D~_y5 zMB2Qx#fq5Xif9>K&1E~+N~^)%m+Z}{k`fOb>?u&`FHn?g_KkuoPhe;X9u;3OFoR*d zW_SX+^@HeJs(j)9%fX7Z5HgHRH4F}hWyCspJ1LwMGk=qbNwKxWwpzNAX>`E7^x2!! zB36E#ZHWG;Zp;||{l=jRPK4=#M?sTdP)o}mo*Hd|2kkgvu-nmTq*rVbX0KXciuSSu zd^ zIFFOdeko7_E!5Kp@O|~=!MmHcVOC289`mv8b^EVzjeH>7{%c(0V)=)&jVxFk}Riqg-ttt5`}HZX4;hgxC!I;h&V(tIiKj zU6vDoCxnebMev+7^1v?gbeS>CwpcqjsDf~pzMhdCgNe6Rn5_JNZ_XpE)y60nGWO8O zAl^%GCeRP#{-w$~GZ8P{I%u7XHMWa?Q)|0u*Qv(+fkDW@Ol7>2?``w%UNX5#oWgO_ zA_YQ}^(BXmJI))4OmCT~(TdaTUCG_-bre9ygO2QI{!U}g=p#g2RkIf6OZkvvX zM=hH*O_Fc<iWD;&V? z_tzuv(1JK9PyxRB02v-Zp79TCg~4Fy7y(msJqS@y71F%QyU$p1g@h-O*&z zc^ep%J<^5@J-<%8AIxL6|KZh2KHhKuYIt0HEi_h}l;WrbrH zG!yavT}__=kR@X zSTLqc`%wN@#uD{pzT-HSFew$GBw8t@shlU#Yv?f3lyI3;vWn%KXa3C7xIZ>VNQ!K? z+|L%q*w-htsfB;eGcd06r#PvmFbx>k#+#}{UKZ|pPaLU7-k+0GbZlE}mCELJX3GZy zc2klWU8x&7E)}#4^-n!^*Bjj&57OY$%i5F-R_gc#vsyf>Qj-XT3e*oM^0t{$t9;jh zZV9B|&%v}9E1PI(Xw%@Xoa-p>kaeOjV6b<}d5|doEsoE$oQRBQw_1o(%lt!kJa1N1 zXmy?{AD*WDxq5qrHRMEkFy#sV+Iu{1C6B)ua%e1c$xLr8=HNJ)u<3WLUo{$womS$l zRkUU!q0my*X?ronC!~Ax@V{<6gUa^1jZ)v){`c4N``>?DKX8(%8)79-&7Y)R?WcOI z%k+Mo&_GJ*1(_7d7boqgHB{5XUNw52AzK;eQuiiXkkfWlIc}|!iCTCm_;<`hB-85g zfO*=+FB1fcqq&ZJ<8EmSPV2&a5pzce9q{j$-j z%UH>8p{{Eb@n4QNAC7%?d9UY7c62<25h)q9w6xohS%}3s%gs27lxEQS>}e@Gt~NrgHPeHk+vA>wxYpsf80v@7UfR(c{pVUpAlCa<~&RaS%T`=VWcS!;h2f zqs=Iw=%ITndg<=vA&RqOU(rOp420b#qiBD>a66+@*JI`M0dXH^!i{&ggBqc<40z5Y z(eUaloc$%+edhA6cxn@0yh3!~#G3fM?{kg=ywPheomOH~k5?X;Y9c6CE4^1avh0!&)7z_^ z-J`e?rPzCpU*F0`KIPJ>h2*hW*z3?aP0K$OkNf31q?0}GELrLwXAY?{L*Me#F_)Q- zOVvs#ro@cDXP#4JpNDL^?BJYhHp~{1p?;_Iep^^%_0?aCGud7nkBhI*++gAio}*wm zu{VP2mD+F=)ATDXlDsK15*$j|l9&~vos}pB!$lb3&scnvz#qr(gD-iS#jp-_;e8QzLgV?jCZRZXG_y#aSMmY0o4m=sm050Nq#Jd-sI z$a7;>p1rI*EoV7+_EFXOw8&Cf%}{FPu8C(w%S{KZS4!wy`mOF8m34=c3z4SL`&qwi z6tUW?JH4Xny^kS<7#Cg{;XFxMXP;jXSTK+{n^MPofZOntp?A|d?*@79P;#xx(N?qg zol`;YUe8OrJJ9zwmTG zVZ6dR<+gCjgL|KRH1AR9E%nGA%5j`O6_P?3*Hd-N-?L+2@{(d+r9qkt!Co=t7qZ2# zy90J|2~_=*Sf9laoR&{F+HJJ-0Qbj+P$;p?uTTwv%Lh~#3as8H3%dy>3y;z&jMr{YY^mLKe5!j|$fEGPkaKdY0 z`kd{4K+848`CfH9r?FGO1Tz+V;!wZ>j_JRez@>N8y!Y}%7+^IE(@Ptw)d zxaZWxue%E7{M~le;_fi58+bGd&2*VbM9NN1DC4L(DFU_G*9BJ9kN5%J)g%lq-|^cU1?0Ch>kZnl7_9m?*hTzl|Y{ zW@eLNnmc$zI3_T4EvO2<6G=MX*HHrq z?hfIa7#YO>=#?k0ga5%EPc<<=&KW5#RsXdZGKWg~u^GeZEK@ zE0OvA&GEm3?PQX5LXo(|BpKYayC<2}dxcNuPOe$EV~WcZ=Jd`H^{7y{;4BCX47ZqF z2&Hrn%RUm+>{qqWJijh`~dW=H#4jHJm!Z)`%_cy+egkd<1znUvh_YGtjWi~$How#1fjby}H zmg!v%n%nP<0FWHl%QpVyIm^V!DBnlps8x3RDoNSvu~}?TlfqlEggCDQ!fu!HOty}& z>{y5GyHjF23i4C^Jjnd%ya~waN z^ABKR*~l0or^|KB`dQ8T)qcnRj#POk<4&MX(O}K}1y)B;$JFc7XK6#7DRe!K`S{~$ ze{r~9-vh87pOLvE!}d`o`MeT70iIS~(M>jGK8zB2g|(*V2tMOziXbMcwYkxuHsfW| zj7wI)zUQUcJ*PvdzLUh(tuyJX34S(0@$T)AM;o}g5m_o$J#Qx;M~xXB9Wa>wK*)mC zQqMvXG+&W#u6nfgA5Y5BuR5`I&~f5B{RQTWbj&VXRSb`6PHC$--J2QSQbLQr)N<*K z)?#u0{A!motr$m!0l$BxlE;@3DEOGxe)yG*pXg*4I3UhDW6{nO~MrP$_gJ#etSn#D) z#+ruZmlyvzvDBLQIdHCx)lVZyBScrYiNd0 z38f`fx?|{Wl$6&_6Klo!h{+#WKB2y( z(zO0h03n+C|B?T#Mhiw%GpY3_=j4hr!QP(!SW-p%rc-WevryUP>De6+>JKZU7a)U5gxc}bImh5pwMT4DU(0>K!Cme1LJLzXEfF)1b_F$zNDHdYc@52ePbWds~1 zQF;{NwDSxrxm!+CKfxEe)-unsGZ{JOLCmSa6g*-`yOk$ssH@ibtH&O zsGlDx9^H(Tk4#7c5{hVrmUDfz3&P#dwgIR#mRCclRhw1!DVCS%?` z8ho*}-U$I?bb%CUESN*9qpaqJk#h`SzR0^X-(!$!sxXm)(_$0!#}|EnY`*_AW};T5 z)1p=bZ;jJ>(0ccvuhl5y=oVdi;?Am7K@v~5$$4|H&5mvFh77CX8!vF8%TEgG5Ua1i zFIoI+sNZvynEJTI?Np1aWTXxqU|Xq^FqPP%Lscrl1NK?{t5zjnA%<+5N!d0#R=waV zkF~IJ2=0xl2x>mt4z~ax9oC&QRX;N-JPn?`zLO1MXuukOMb@O?^L+Fd4W%nS=!gE=6{<;kNOR*T6G>}Muowq&VhJS8N|av=MNLv zV)AJYR1VCDp}A|#2aRio+$!be8UrUNZw#yx23QGU%$ZxMe4@^xQf2aon*$Mz0b2ww z&!74IvxR)bKlv#w6p3wP-%>2S7X= zvPar4tDkvI9Gt;(WbRhE^s`x(OO@}a|4FN2>!L;U%a9Z*s*DqgpmvUy))U5Gey6d_ z;`@;HPCUP8UMsCyN&k>9ia9At`fvzloH^Q4N-_H!?lAfGI!?OqA?6Q@uTsAT=TU!p zv_?z(C<+QV^9fGkaP_-+`p4=q0x90*uDYc7@ZC~gb z?b|dQnq>s|xkAFpeWR=4ce>TREMAy%?+k(=@=$EvNKxE&16Fpw`A->0W&0b6!PC$4 zRAT=qKxy1Zr_UGr&Qs*iPErc?QnG2HHDoS-=CgSvP)kN-nkrSwudcmlYT(uVFv@lb z9wR>Kq&$Tsk)byYVjJAnmPm%O&EXTK$_?qY7?rBC@$nkQPa&5k{^ZTxI>2j=B@Kf} zl*Z3L;;9!`K>hwbnf>n38WNX%`J7(~RF|rKTVbE%&a9yyqNfvkLBOswv6L~uVz}6! zUL?MoP3|W#ixCH{Hz@?gSDjvtcW65fHUB9h$bvm}0ZMr1 z>blJ(%|F_`yeM?kAG~(9cTq~+qUc2LVEJ|ZWO0cL2B+&{=5aO_jMo=B@?W3rP8p=6 z>1QG+_7pQL@+e#NRPhcP?Nc_m(xCb*N^{7qCj|UNy0}nsk=Adm+dEsKe!o8B_w)Z! zB|2dch7$%71~EQ?@-N>Q8%Ba?FjI^`2p+#nq?J7zw!^-oyz*gmNJeb^T87PQx8Qz=*|JT=@Z_p3WC_a?vt;C)&_>)Lk*ZE(KtOM9h zZCu8KKk!_?Eeu`)O7q1*K&kiBJqnEEhw-$3a{os< zogF=3r|WmN$f8%Szj=6+N(?2MGEFU!u$Sz7z+Fk`<0}xJRlaGVEUx2Sfs+l-#A5dD z{^lE%4_WcKzc5@8eZ>;4u2+X}KXyh8AJeWox^X-GvHL#UR>WG<`R*lTm3G7;(&c>Epo_nM0hW)8-nh+Os>e*N%$m za(`&RaYw=B``*cUCx`>@;oa0%u^$%@vovC3g@!?W{r!0N2#vBe zA|zu(riAXS<@%9Bdi;;rcf#zjDO;1!rW||*0UQCT4Q?gTUHBCgj`ui67fu(lb+FWt>5j+qFTcOovF$N+Xan_!4CXsS{Ag)iu30Cn9XJ}&i z19k?K=67zK%nOkagg|FK(SD?cWm2A!{M&;_7Y~}5GN?4in zVg;jS$NS@!cw}U`Xt?ORckj9waevt1g$PLf6PHWW`%50}98<|t8U2(!3ik*It;p@1 ze~u1vHh2|gpy#^6ib*MkG(Gz}V}bH@L$B`fFzcss=P?X)@e(QMgDJ+Dc<&2En2FRN z*ulz^!4)Bty8dVq#K?#$T6YUr0roa2mux`dNI?Umw>AOH-H30m#6NL`001O-OHR& zGi9{;ye}klKzHE*5DePdi>bZnPu9;B*Ln{g+R>1JNvg&)W29%N_$LV^pUZ4YX?9#< zA#Bef*8RZKs#IbEyY8rHhwvVkVDtSTW6gH!Rn3)6=HLkcKnC35CjbUL35w!vtjYVX z_+n^f|6@po*-uZCfJ|PY9$A_po|M|fgs>z^l|YFW)EUNw0DF}a(gpr%Wt1I1yYR-z zu;OHi+KIWwgMK_7Ms)(3P~090$;&am$jr?Jf4u;8Zt;`3gO!z3nYEm)%7Sq;$OOP( z04Y-xEI>$FTG|7K>%l6&qo}qj1yO`JTawBoCN3E%eGNEKzS)c@A`gbq6ao!jOI#!L}}_;E1ryr>AEsj zHizQJRqQs4NLYK)qIg4{Ze#IJ(`9KxX;c9$ZYX&UnGX*jKLrAPmbwghm0-K(SK~$ zMK+~Ale;pywvjB{E$XS)Q0VI98rXi+i-Hwy&X$E;F>mCfYGyO1pPPCWvXL_~zyhV@ zX3UiqFNW=jg}*!($vr#&{*X*7%ObpTMWFK{^vo-w3I7Z*0(?JzmIE}80Z_G!cz9>V zyByTs|N2k1Y-9S+r{p!0Y|5u&6X&&5!L{%rF9f#aYbbX zp)Gh)vUI1>S*RDin3u=36=gdreu`KS-$8urBwP036G+z|jc_j9h_bP>?zGUTb}k0h z&;TmL2q?9!pOvostB#|?cd#<-c8Dgr4iiD~hdQCKg^E{GLV>WLX#98Sy%W$29SE_KgY>FrmlRdlT$ zM)5h7IqQ|8sXJ&K2grM%88hX$sR&{Z-g!LCU*Db6f}bQDI;|pJ=Y$qBp8q88Vxmg} zv4j^uY#Rgl3`wTeUrsyVSd)}Y{5~zG6i}%WL8%`rp|n0wTZgFhbvxuW+$0XTfL3MR zR2miq-a}BOjQu!J-?IUh37Qb1UF){J`}DO-z|}JHKTmm;B7GE$Wa2b!wP-E*JL5Q5 z>uBt(;C;r{v4e86tA;9}mKkCM$46(hjU-Y7jMeaTfF%V|r_cGFVi& zwMD{F(e9RffQEqd;#r%i_%I#DE@sECZ4L^msiHbl(R6D-G3jE!^_cwg@Yh_D?2yZj zKKgt`!UtWo$qD|r89UuJRqLS8>h6a^cx!3V0_NHFt1z7)P1!=q}@HYBpNpTf7L~n5;P>_J8E+Nb2u>p;v_jCA_JeuFq zfZ&g8EWhemfjEed%vC&=pMO{& znb?nm7t{ne=0~jI%Njy8QbIAB#^+{1<2ldUn_AijB@`;83pLql!orC&^u{M9kD)jJ?p~Azly*yS_t`4KoG@` z+|Dt38Nbq1s!0w|shZQPZN{ zmhSwG^5MEd-{{~(-tK_p8Mn2cH0c}JB#ZEYxxh^Db~Y=}Niz!vW}Rx2`FM8cbHf9| z8pkrGWbGavk}0|Qo+hrbQjnMiV77FC2&!1isa%*7RJ@MM@n^4vBo8#a?z{k%Onkh* z5;JxTdik%!tBO|5>Eb`ww%;;Tfz|^V0Z8pD$BejZVO1(BwpV6?xNTY}9*;TPOpJiu z$(`|NfO&CUerM5aRSheMIH$igZO>qwV=zasl6V7P7||>QG^Pf<;YdkIr899oC{Tt} z|6D4%{y_Z%>eJ^2thcPfX%)IgsUQ}>u(p+FF_YS&-BS)qipfkTGK*eRHOizXBT;=4 z!p8vP4^JM+(9oA?Q?$Am1A9(dQf7eSOVF}x9XX#vJG7#BHJfYf!TtMa;*pp<8taaa zzwH0wGVY((Q2ei=f#X3?G?4Oy+uqKeE0Twd;*4JnBOrKn8kkfIF#AE~6G_DbozLW#~uNsZK4 zt)n9L=!y9A;^Vb1H-Z+7N=MDm+%Pmy0qi*1Cr|3c!mrTMqBGC`gY5{Tqyu#-fS#2G zXmkmHq?HZ`8oy9jTxl|K%77pUsQozrQ3dEs^eX1RzTl)_G)sMJmPcGhOc`BTOcQ_C z!pq7g3U{wqvG}86Ht>5}3p+63OEQ7V9XZg<@tmr$DQya{ZV~BaEAC$1I0JOsvxS`_ zyW16!#dFITlgfMSd@k*Uk!R0!0bhCy(0^Blvf%&%!ILI?={jH}{4HRy2^Ze;cc?ABrJ#ld zVjmS583`y?;b%~^u)yu~a6PUT6h*D$^@6RUP;RqW1UfX>t@JJaE?_$0Dz^+8y4WfP z5_4tnV2^=xg_Ee#ZTN+)PBMg>E(4R)03;3{GJ3fS&iqYE>EYOCrfqXzfArq*J=2Sl zOqC+X846bkxZlOZHk~Rg)89eDGFWD-8(=A*g=9ipz&d?fM5y`yR{ROlevBU zgL|=MB4=$W@Ph_6=k?0r9B0kFJ~uA6oEIlPyg}E0fZ{T_$5ZYXu9McyBq7OYcqs*i z5P1BxTHpZe2|!2YjQ-LND)dc-1i1w{#kh%fU?IWSh+E zdQtO`BQjdUrEE#JV~y_rro(V3zpPJa0z56yozusNBhHNJ1A0?LN$ zxpD}jwH%VIqo!7IJqaMmk=OjPK$FO{u$%Hfb#m`UJ z*r$sD*}oA2mus1+$W&x&LFyvB_;TDg@{5lRMmgvg6~wi7PX&Q%gh1QLV}P#@i;m6$ zpje3Yo$jiZu@CdnB}~)s%M!IJiIt_w$`+Z_+HP_4WgjL!Byo043fpN3%_`lRekZGm zmYjhSO?d#w2WUK-A~YQih@c`U(XbzI{k5O(kDLBeySYsR@~n)pFXXh`&R4lsTk8n; zCHlN&k`L?U$MZvC>J3Hn)y#cQ^?b0+0+k&5`&M6+*()EywqQdgL?MEtzQJjs^@c8= zxJgoIRCwvM9khXWwO6pE781dzkqRK38CuoOCV=b+Y1%uTm%-9}0YSyOsVkL#{@y@V zAW;jr40xa*?H+}Q5g>+vltz z<0=3jMgqVZXfe~i&iuGc`B&xLOcQ}F+CN9D55}9%HQcC3O4(l3BsWNa@>`7b$82P? z>n1jh2F1p${v>0yD?ro26jQuCIw)*>a}Mud)V@_mu0_ah5hyUgO={j<&PP~m8%DX3 z)J2w|E?e9l_!-dHz!6sj7bv=C1yGAdl7y;kI+4j9%S^9?$=q(NJ7N`sNDhMn(HJFY7Ni1&ut)WVblaP3 z#-%A`%hB19fLLD|(r@@%cc}!^LoZ%p^6s|%qisGzbaYbWb)c-cfiM61X~Jq`ru*n~ zfR64(n-yS%(9cJEHN_IcIb1aXm`pQFDC~6Uq@qw`9(Hg${_XEDj@5%tJkF{hAghSx zyclm;8Ad|w5W30J{1Ji_$;SsER=QR&>L1}+qu|03Egyb^e3iQ>2Q-Zct2l_B?0BeBHl~xTP0@7aOXfbandRNE~b@L zK;h6&6^Cej(F9t{PBwz5-tBKZ#U}@$7;v&gWc?G6B;k8hZ}=#jP@T69v15#2(`mic zNZGX9_>ehML$$O$5On+K2OS*Hr;OkA7>&?6BJ9Ucg5;jdfYBJazjZ3F5>VUAyuJvR zxpY0~cfJr8YFcw7m@kT*u?UKWazq%>)09>sA*U^-G$t{~!&CX)^aXp;qBgQGXOBjy zTr1BR(=Qkv()QczY2!sS4W0x_wfGNd%!?du(LvB-@zI`u?mSa<6;gSyww zSUwH3qZ7#l0c~?OU;)Nv>>G#OoaG%yT9x9;%0Es*2g=p#7}aeLL!S3jssSj-m)v}d z0Wu<_tn;z!v2Hzyidk*SsLXZEd$OGQ|73YRbk?nG!`RQcMfFVwPdSBt<0NuiQsJ- zDbht0juEq|e*5l`ip`kUJNTl&*06DIxMDN52kYk{b#Q2UZg9VPJGlyH#Q97m5LfO%ADi z+N0mYTm{3dS^Y{sK%bx$G!Pn9370QvUg=QbS)5x;m9JlIPs+*#PBMqC|J30b79(fT zO9;JPuOFjUl};WEG#NVsOiMHo8O?#@C?%TS7>tO`S-zLFSi&n`8=0GCq9CBW_6Z}B z#NqVNf!p?w;`%jK*-7pPdil6>0$G>d;`~F~@=YRvM;Lc+nkpqZnGTw3mmMe!fpWYD zazG{kDp0y|mV5z2<7`sPT*FCn(KJFfS8i`{X*P3VnKLG)Sb~Y)f}Slac0$O{?eM7> znTTt;rGqCzd^fK&>EM5{FonopF_4=t|658RsS5xd|!1-c$(PXr7aDrRSE3#;MZDj}y8VLpsWJ)}yX_~M} zMH9y}od^8xxC2mD}kI2kmDS_U_)vXRj;R*)f}uIygL zY1f_KR97zc`Yv@g<2wu0nX@P_lMaQ$a|ziGU&oS?bMR7od=UDl=;Y;doj-b=)b!@` z{O$c62Ya#MeMFl4>AQ@L&=46hV8KDljiJt|G{*M8OtZb0lgCC^%Uvqyo!X6K&I}?o zVSKC9n@^XPuhd-Gnll>g=arZ9~Zd*a(4dg!s(3& zc&(41_i&`k8`RA+B8bC+X@6Tr9U;RN< z&Uuq0m4B1047pVN9vge^telBC9KnT{V3~O=fPJvij|drNiHr!bMN}^Vg&-^E9EVI4Hj&7`L|YrDA~uDdxSpm%P^ z9V~3j<}(J(CLFv)Z8cu2h(76Q161w;C_)-wxgJMe&FO22IoWGDXYD>`!CNO^F8w#S%l^e|r|cJu1B6&fWRAqy(uSKM zOj*1{_HNhCd+)mVRKvO2k#P#6rPMJVCCCX!cTLyBDvqGOP3d@4&pZdk4RbKSAY8K9 z^XW~|wBG!;BUkEp>$;q@O-$$ke7UK;-4Ade;s>?HaE796pj@!=*Sl)Qu*ow6>Q49f z`|pv4lWDXVt*saFWsVQtzvt*@Uq6>nm$~&$yYGY0zf*Xb-L=HC;8gR5OzeBhH$*Ox zQ0pkI>rfI*_uB)6E-E{ZD&58OH&F4amG}r=t^y9uj48ziuB{qc<+?{ z*lTqs!a|OYCNs7|Fih?vTW*9aK%Hl4mfw$&Zf^Me0JI+|y80;K+c)AEM-FV^BiQJP zy+PF;r#Y;PPsLy;e55pbB|V=q;Q%N9Qx#o86x{O!63PYHenXz zvrsKNQkJ3a_zBsr+MJU|d{_zAA6B=&smp}R+D z6c2@=fUVv7{mHE>v_r>}-JR)EuBLq>dkXY5VK$!oa<*ZYvYO534kvZv#o9Ia09x~qco_Y9`s!o%6_Or_*xhTLNz`!U!) zK1Bu5TqO&x!sZV0QHE?lItfmOQ^BJXgFhQWaPn4#7-Fkvo{JM#TqC{56Q?15XqLG& z-xnmtD6?qEchMBE(N$~e2B<8c8($A7DVUp|XEv@L6Y}AFj$%sB(OKCd;6*i6?Ra+= z7k{Iql)m|x-Tt<;bT+fG6#45N^1tV9;C$MRnh$je;vGntXlRMvg31h8?xX|U_{Y)U zl+6D)#WxTZ|!E9l#&caVAoDkzS(DFr}Zq)KD|1SEOg+)f-c-_;ks0v`cdU;HT?3- zj5lV9yZil3WhQ^ubOz}hY(cofJ$Vg;n4ZVD@#k2htUr0TG0s5#evtlHWR1JkDrf8QWfn32IHthc~&cobl? z>n!+bNzDG`o!?gQt_ne6Z_Dbz#pgktce`gnvonrJ`x~9UqnC)Zu2tW}giTBhUJBlF zp&qZ{`K%HT1=errcgiX52>^9TP)Tt+klinLK@QshFNIF2LoJN^H$l8K7E<^i8zd9qA7X-$=Au zbGc>ASKhmJoBaF#oMQcwoJ0S_4%_3vxYx3;!>ZejzgEY+RO7~U#E;&XhF3fl?u?tJ z9QNtPacCT!eD5puZEqIp!Q#fE4pdNET{RAe4%@XWWD{{J$JHOy?T~NMQ&YVc+aqoq8KT&bCYA$sq4PQ^Q-TI zpKgSb^q?;h^bu}QqGWt0aH4YY2A(dxTXDqh650REZZU6B$_mH(P;EcQJ6WKBPP}J^ zK$eWZ(s4OsH(!_=tR$qsoDUV|PUjq5dd8Po5pHG={k%Z4 zs)&$~`=H6E6!7>7tCd$W$y%LO>BRRevdT8BryR|QbB=eKK@-I3(C_>F78Wrp#Ai(O zO7j}7x5&S9-2ThPyp|H?GJRO(_c8QA5`yo8x@)+noqNrlVyb6z6g{J*ke1o(Q8P6+ z;uycW5g%!vZlb=7k>K}CE!U@wL2~XK+2BxBc>#G+{p6c~Ex+^_RN%rl5p)wZ0j|@^ zXdw+~pJ1}9PNkhox!L%L&USZZ{Ikq5)(LG|Pq{d3PNBQ_ z-2!nvU<$z^FZb)?)}U^bGx49Ebfr12oy&2qc3ohFWUlIAQ+2+|wx?vbr*p1$PeOYP zB*{06e%t9=)_8Bms$xT3?C%a5ULv`PSpp7n>5(b)X=p$^h7bmWm4RLtU{(&? z=nsu9ds$Y!@{SUKQwzBFZjHXBr6hkx|4@SQIfTD%u-G+e=z*XnMDM>r^>&0WE#|HC zH6GOy@^sk%Br0K?+hb>;&#igm3SvyAe%!sHhl z4z!R0PX*HK5vysIDmL@G*ke1J4n?XaZ{9>pLm=!VJXP;Y9co4R*vT6P(-x~LF#Z@C z#^6S?rBa-!Iz85B7F|1>6%}KZ$=>7OqEp5{zdu69Dv-|;6c9(!I4IFw=2JPhwDIQ1 zGUHHx+3FS_2|rIswBv%_ikN}rSF9hRLIR1^!$%H}h6e`nU`IBR)DIk{T{!^WA4^MA z4%g-!g@LE-+W0(IwnW+`!d)8aBMm*@Qj>d1N#gdktxd?&t20Fjj>2+(3oc*fb|){q zKWAAzKITG84B0xrTZVcl$kRMh8SO0rge#^AJC3)I29fte4i*Y$`zgvCR|yWRI3{4k zbQf&lnF56w)}=j}US8d!N~0Z`0-=OZtR0^Ynl1rk)q#4pvT}~5n6WV($Ar~YaG#iO zar-t3?(Z@alN>2fT3bSv(1h)j)$T{peaB@b8~-^ zXwZ$^q_3TN=Nn>~?q}hy;Q!x`Fmt8WKa04|k|4i*Yj5c|#gz$)wOu_t9_$wpxn(`N za98lpS|<{`7#P?ZKXp&&H6egr%p2LM^j>xnb@Pb6+GEP?=mkM7%4{&do$RaUEMLJt zuu|bYjixl_fb8=l=>UPPQOX&0`=p+2xdlu%T>Fl5 z`F{%?DAoQ@PAj-On67Zk*!5g!;X6s;J@%Sw&JOrnBY4{Ui!aI7`1$=q*DgG};nMQe z3^=Z3H~B-8zH!GV!cf-sdEmgl_3EjLFov1@>b1omEJ5$&kKj zwcv8D;=ytCq~SlXHIbVwY&Dq!&y)Pxw*m*Y6+Xt0R4{?3r!e%fCMF?S=}EX67#N79 z-mDIhT3zSDci1$57|CQJ-J=nf%dp<9@)Gta&xRF`@R6^=oIIt*%R&zXiyr^&{)^E2 z`Hkf1@k=8~{iFA01_v2RbBNC!A55sf+8kA}T`6jKGW1kha5GVVLI{Gwuu9#XZUwh; zGEL1vN!HeArmB<0IP~<(-c63;%Fm)Myv{#^!ZR~LcU{%@pra=6UoEW;%`&nm_1jqe zD;;(e#%u7&+(Ocb?(#=T;sLj?(dcQSLR#t8R$lk3Fz|p1{p$fmjrLi|s-+-ycoc?w zpZwAyt0hdL%{O03J;!vqVJod-Hd-Vkd|GmN7h77riGKhu_obnDEzhgYDHw55)mAkP zVR+7>XSZ+SRuhurkZn=rh7aaaV-0hna*V*-^QcbnIteK#cwtdCdjmATBP(U`s+Wm!b;IgL^2-%jd%!OS8Ah)$xsg{#j{3)(0nU z5MdGdC;9ccX^;c0eWo;PdYiY+qBGOCb!fEm^;xhV6&lFh$38yZ8s%y6>a&!Qg~nfM z#-2Ky7`zu=qQ6nA@ii}au{oljC6DH!3K?fKhzN4P6`lonK+rWb6a+GRvNh`MpG5ij zz9~Q8f0`z}E$nmen!vN5a-k>8+|qKmfg~;weSIX?Zcd9CwRnwxxJ9nc|Cjq<5dWCb zOQ80gI9EBl9Er>TLqxGxV=SrT?>|Y;?XZ~dNTSNpa=ewT)hUJ3IQw8hu$7}ZgHQF7 zuBaspY34M$qas>6m*g3=ywLyo;AU-*R#NTE88C$uC}Ca*`j!20)hpfpK7LA$O&1vKMlb*N5ghL2e>YL=&413r$hTIbw|h4)1m$p> zCNA)~UF(l`*om%-&d#XZWxw=<=%I~PPL?j!>9O1s^_cJZIQEUD;S5v^L2^;5p1L$- zFlsv^>)9Pt)AV|*k&y=!bQzm7t7D(LdN&hlesr0UHxvtL-Tw0oy~Q(rJov)5 z@WrxwGv75&QE;;~{ki-!-;?ByHcK4s)xTy{XXD_*_p!{oL#0iYv@d&4I>vP?cI0w+ zDo``H>z(~9bmy1wfL`+RVQw1lJ^65eSX@M%9Zb)!uXh0RLu9)gPfis+;6kHI_jI8; z=K=n1k0N!W4|{q|Umt{+>zLBn^>Kc_M2`tGao`>D9FqJ0y#(yToUO=i4xLJ_1;(s! z?JEN}*3sC#?lW7@k@77IRrlHUFQJ-k%w*K@Lm)T)lk@Zggig)Pf2_Wd(+<^m5}5w* z5&vfUbTdU1+0V(JF^v=tA1n~s{ybq|U{I@paVf*Yfpm<7IoFTOK+WOwT1Mz*y}WW$ zt+S-guF%1Y1kpDiSK&F*0BfXBF$DP3YUyc6xc1V<2U2 zNQ(4a*-gDDIb78P;&i)b3+2?>Ob9&!fo_@CDaz=G!(pj?isRAq)j-^N+%=yAPg=$A z5rfb}$5w0Pw4(rFxcBw?hj)?IvsxY0p`QWTL0(SoCkRy-^`~|U52y2my&fAd{+gNT zR6Wcm>A1|)WQS!T>nfsq@AhqleAUcP0Y~^8!`N7aG@fPlMOl`&|8;g$fXq*|blKIU ztsS4+jveWCzR#3_!oIk~b&`N!-Le=@mRO9XM?~V>m%Yp}MJD<(ov2ZpGzIzQd6zXK z3#==>TcY-+pYmO`gN}SKuq}qY%-sjmahx_ikli;MvVl zuoGE`-p%{PJNUd0;#iS)qFX#Y{rL)ars+iYHl|(Dp&!Kn33Fq4lz`U!c#?c+))rSS zcN_>g$ePeDZoIPWXGk9wZZ?~&2nRXVdw6(_fS|$b?7r6(lidA1;HTMQid+BJy_stw zA{<$+-ifMQumw4KdhEW`&Zi$f(y$vv-<=zyxcr}+?G?VDSGcl6Rd#gz+;-IrJn4=1 zT`^LOpY1wXp^OXkw-p!;kIp{QDti%a22rI4@Ca)Y|FlZPMIbLbP7;pJ)gf$zxC+@j zo>bCDMIP5Q^z``ow1UjnERYY-Jbvr}TK`-xKCTnkMLHhdT8U#yx`D;K8`nsIGg%jm zeHkkUwUEI<`&9E|?FIgrm%!MJwcsbyplkoV_we+dM;)1;*FGZ(@Q*#A4P?qeFYYS+ z+jgRvA&j{wrkBiba%a*GaCC}kC!R4!#6NTJSD0|BXmUR>Q$Jf~3hRlQ>fAy1IbrrNB5&{fzY$kG+LBReg9NC=UEM1?`>Q#-?@sgxX^U!rjS4u|9dlIq_+M=IK zzciDl^uoX%^XDun?gb4DNiy)H-q3HbPC+A1M-+)|Vd_qVW9vGTs0-cz z01gpfP=L(%a2GcA z6c#B1IK8dd|9W0?7|#Y2cg9akdzNkeYc8I%s>f7IRIo;AUxhMay5AchjFQm~Q)-mi zBy9j=bGCovTEJbnh>x^^CI1IRJAhglvD2*3nzGO)rRxyGc4if5Be7s zqGQI{9kakGq5dnX#PD{W=1vCf)3-f*3aHyPThfCD@EKa-!~1nNUM}1MV~mD0qUX=( z8T2fRsVMN!@^F1DSIG}xJ*LJlNT+GsWu}!~ow6@XjR^0qTaFfZ0nBoLzOE=xR@we^ z@q5VhWX*hF=nb^;isef3~0CyaB&+z4?KT$SEM6&<6%s> zM%Fn@Q#B^xL@G9fN17Pv<+-M8fOq6>Mev!g8E$r%vMHC4KZpSIoyc%(hg?Jr8mI za=EB=iq#J{>Pw#wC_;Ba-J3`2`$5!|Q26~`_eb@cLUDyF11U~}Sa#L0$Qo^YbZT%`KejX5nVg|0++X5#tiIbo3{m z=JIA*Wd~)y-rGe!+p><5(onYIhUKt6hl$hFC04E0Bce?v zyuIi6pB4#wcZ&&&@ot@5lI^FE7U=Wc>u1T%rePM0tXu zN!P55h`7mvnRmMBwI*TUcv*TQKtfV-0d(Ap2Vz8jA`WD9WnDHKn_g#>=-xg_ z7S77gp!&`EwQsJscdg7614MLh-+Yj&!>)3ds1To{botj1p?_t}@)E+%T;8Z2Fp$So z^vpL!fpp&D=sxKHZoSmb&jf38;;xj$vIr$hviQt_A9*f3_j_BT>Zp94A%&dH~@dDrYSGkkB38nSO2PGxs`*map+Vh!KM zLG($YE3Ib8r}FrRLA~bIL?dH2ggFG1n&{Dy5_MrhnP!2`uUpTt2vxMSy1+S-%}|t# z=DY`5h7}_7!@|EjH|Mq!6N8ailelBYv}~_bqwLHz+aC$%+bk^4hedAEu}8QgjW(d+ldk2Ermim{D`W^=uu|w>WxA|JC%}p`X>4$7ed3;%lmV z`)>5UtN1XLn_~|930A0;EhkAwKdiX54!enqlQUvQuD0khF3I(0`LE_NZ(MDUWKOH7 z_zpZWFXikmWW5;^6ZP3t>i`uMN@>8#KJaUUFM%r|3IEk-aVNhIAGA?BM8dBdLIX&d zJ?}nx^4D$H9#4yLL`TE5d!jDCznReQ@zZvBJn!M1PUWA^Ik%{aR&c&hFqvkqzPJjX z;(sYDNAeNr|72BQ7AnU`%Hcgf#5^J*%)tzDEzK8>(bY#T<%RPx9lqZNw=qa>SvWjf1&pgT02%eAFz|8bwAd zfZ`1dByVVX`Z~#2a~k+TMz{XDhi>G7I3S7G6Q=|A5Qok9)GEwN4z-smrma}MR3G=z zzrYATrk{TGy#!18^hPg-&>Aw5SIVyZJ=rwZOEovFtD8nLKKNu9kEnD&rxd8>wVdjY zW~#P33A7j&V<*gR+zJP|k8!Q&JsHq0`< zkih&JTqb@+GXClN?@e7o)U%zXQ`5^^ckZN}Ea$&&I;c;xyLAaj-m5_oQ_k>Iv2be% zR1Z&Y^W&k~42heV!hka25o+>k*phO;CG1*^&e^I1e60|*!LybjRv zhaT8dB!_QXZgQFs4fQ2vW%+|<7=*>@ouKES6%=~XzBg9y)B2SgOF^G1cfX8iGM*d; zb8sNG3&4r%T%x~!KQ&1{yf0Id3zOqwJY{5st+!sv z>fwGtoqbv#eAMj`K@@%EEqh&4G#tNU%tFieGYx<7TNJ&XwP01ElXjJR#=Ck6O3rL1NCAOa9;RP1hXr zhxK@GLYjRX?1gg8VTWeL-^I93FDWUB=BZjxKC?1QLV%2WQ^>!O!UndC)WqnJc`;b6 zI}d@+{XF%KbVU<&2Y>1pHO%+;PG**%SbL3=rI_OKZ(E=(Ezx{LA4l7uD23ak6LAN~W#5k_DAy)D;n~;Qez=b;$!WKgU$FSFqNlkG z_x?*i7SKYzt0{n}wXLnLwe>Sl0Rg$VcVS^+wZwzp9(YvxQ@STk!`RZuDe0l!dRkw+ zVi5_WNYy7UdsK;{yj`TR`%F7+=-b=U1ATkKy{dVxpJ``gW3)< z4Vz77FIKuCxTcDQ{9*P|A9i_1%jwtk^+iXgy%ZrI_uy~kDm4o;>78yTO3BKeQC|YDoke?M$Pu%tjrCk`~*SNzQZF#9BON81_ahc;b)e-3ge-XdoTzt;1?vgT;1 zY8WK*zi~mA@Uleye1Z&klF7?_sdC|Itt?Rp3BB%-C@c>UP3E*7MlSG#WrL>+c)AMstPUsdn~jW471w9 zXMX(I$E=QKVq&)$RB~R!2VVd^fhOeQ1S_~;vxefJ96{%RTIii(!RO85VV)cy*rW39 zW@B5Sr0lvj0IszrkiFb^<$1SG8MtAFK%D5aylezM;Rv)&9|1CtAl_|s%}{}Z@$VyK z{jkx5istBWWuK5IownoVkJ0+jk=fw~qj0ip9y^hwaAwzT_zXVr;gzvnpM44<-VRm6 zB?Jz3NSyksQaSVGvHrNkAe?vEg)>k?(db0i?|DWi^c-O@ zlBx1K6;B%``Ideiqjst|uSSCYR4Lty^D{~snw<`o?q_bNjzGr9Ka>M8-R%4Az+Jfn zZxmFLrY4GDnpub?0g3iYGfQitrM{BXpM)W+PwCWcZoy}54<|Af!33ugE}2I+w-$u*PMp27Fn2fo*sT8aAv(JR zIxQ1C*8v(oIl~-0Mm51%4NHX?f))x?KY>6dgG}J6RqJfwpvmLs)(_d(GKzv=qyn|? zew~_;57&C;FatzBiX_bffr~C^PsF?3LSefxHaMk!`=5+Sjv)*%5ON1z=s$=j8_jGeumjWT18|bGS&Fd7B1F7swlW}dw z7M7zup1JDFul_R7;aSlB_a?riO3;&cKk^inMT;RC$SYS1XZ@+Fan(cH2p@Tmq|B|4 z@6fa8OV5!!9Y&T5_JBJ6w&q9gub`34*|Yc23omqhJbRKH9t%r^$aXx0ZLWCgDz;xdiSoA`avj zY1R3P>u?@=jq)(`25!MAM+T1e)`P}@pWWG9)F)(zN50MDu7saHDd>%9i`~;lgTFcC zKIi(Wsj2XnYqr{Xc_d5O&E0*nhUxcqv~o2oDvNjB&Vv1r-)i!X4@bU(@MH>H7FLiT zF5uzzOqE$4U|^Kk7OB@f5fMAp-QdFUn$kHwz1-*fw1Z>pL^Sg?xek#I6ty85O=ojv zq))V<;}M|qHU%9&XWr!r3dCHln(ODaKd- z&aAgCf`f)nv1UeMG{$wVz3nOa_~t*wo>JP_=xmVQU^2v%raly#fPe!QW}j2 zcC!mEM=$OtT_O$EC_HL|kR#)I?S)EGK;0#z z%i|#OH?0Ytb5t`=5<&B4A=S}*|8;0vGl(`a_V(<6uKf7vQ|z3kXQ$#=%4Oy_p3T*V z4m-QGUY6?64LlDGXJT|DDk|9)i@YZ+r^?zoECs=!b20H1z7jC>+ivCOL(p8`)d~5t z5lD1>ed&+%vGlAoKo#O`Sm%j0%JUTzB93N0;8OOfC1l!zE$2e6?VAyFO=7 zd^o)%ZbZuR!*Kr5))6^HEsHZxOoCqTecH1hEyZxX*aW)gq> zhYHvEUx(={JnDv}q&zzIsH^M#H#~jEmH5UcbkZZgzLbl8=a_}XiI_Lwr>?g-&NN<< zzt^ljL5zE$q42r$+e(3c&xAHB7jstwX3j48DuowZY@>~Z1kW6yyp0A$a7Kms z2Z`rxC_@#t585DPSy+$^7fFh#;+FN6w%{t**_HhOZ=v0U7~oSAbKQdLcMTFMH&0Iv z{pNo%bEyp5Ww)?#F7jF)32(U}Qpj$#pXm6p(>=%iB9qw4Mlv!$M*${hmT85eJm&_ z_`2MRXmD^a&ROnv{-y7$mJ_&RDD`5lED2OzSq${ZjiViL`Q!61j0)=iaoq^e%+m*S zTWnz-l)w&s*!sW^ic$*aJ}oAEM!<5BwGeywNeB_ti@rYp#Ha<%kFf9tls7bz`=hJ?VG#rAe~ZC41d$4BSbhy zzff%1+oLI|Hb&s)(LP+%VwtIu-JPrqNQu984ZKD=LzTc`;t1H!4>b6~8fv}QdcO3; zv-<;+@uOMSd3LyHb7#^~WBgD4a;nds@biTH8|1ioxjp;h;OpvS=cq8g12kl#+gLUVZ z721(?b`~t`%tXYqpeg2%v3SaZVqO~b;GS}D+`oUnYC#Az^uQd1t;BIKD${Lj8WevM z;5FVJ%R^ik)3P-N+D4FHU;}JUrW*g@Lwi0f@cY15JNSYo?OkcWP2x;hlJb?SVP-yZ zV}>(bQHvCKRIz-lr@ElsYb@RZq%b`GaQ`t=lpM9t7z_yc;>az+l=D%B@BAa_dchFs z46srHQa_ynuU>UUa~*mBz&-*?R=;ZKO8F=nPHg*!Bu-DS4=!K;KYx_Z*=+pNr{|^H zz1O(xZxx`MFJUj{!D4G4zXN)Ov|)D~f*RbUd}9h^`T#Q`aCB$f*OQ)0NU(#19B{bp zUGhcl^HEa=0$IvFC^n8N8g9aYvahPi(Zs|A8y7b~O$$c(<5ku@3=HZgPY|)NB4w&+ z{_XtA^}p5vd4v#%_o7?x>f+Iz?*N{}19%>B;C)~-I@PBdf~<`D8YY-o#&&#B!lIRj zN)k&(?QVN_xB8MjMeh-{9+^bm_<}$l8fKm*uyM7{HaQKNOpnbtG%BF2Dw+)sCoZ5v z1@Z?PF0L3bP!xN8Phm>O)v&AEQ0eS`uI+fR!rtynh>LKXj-|CV&HekscLOKl+j#kO z+@5@SAjV0+AfbfoNkF-?2apHj!Bh#W>2i!bwNQ}a@$$yQk2AQq+D^fo&tAVK1kIqw zEG!XVawGiL0d>32nn!_do!iaHI4Hs@(=Ek=(D^KT!Ml?H{;qoFySuv1Z?E=9j^jaH zaO*IU$~@s9uhlb(8>z%864g{Qe~A@qm}pC|)>q1=>k&d+v0ay}hY+Rgb9X;C zXe;B=O=Ns~svIu!G3KY;J-(&4s%Yl+&V<@pF4~G3TAk`;diFzZQTi+`gsU#lxe0=( zgajE)O=4h>(dy=bUHFDFyNh9N*GE8W+G+ik5aVHT`T6~K#GF&UcQ0Nu4=aAoj}tO< zgiQ!R*3?{Ou6@s{)*$@O3>q35a^*kFCczQ%)#py2XJBZaoFsBRYrjD`@|F%QZMol{GbbLuElbZ*4#t>aX_z3_R@y!w^VL2n$vc%j0wC$B|=+O*p}tT{$AKA(`k@n*xR}FXQ<*{4m}qz2hdFJ2 z(s?xL$^?&>uO5QbC|`Fjt}de1W%^UwKNRias&@tL?r7iiX}`)vZSb}KY$$MeFZ94z z5%x&BK{#USW$64d%LanZ&TF?}4z1pqo8DO}qNvIzTbRiazSV$D)CWiUhOTFZ-#jLrE5QR;Nam9k}{80-ohy4Zb&#}8yPnU?q#DQK-5s`B{au&_Z z#<`*u<_fXAZC_y)yY>b2yX!!;9BMGnwRrvD>?}j8!p7@jHBL5J@C!g1lE?D2AooA9 zo_ziNiZ2bR;^KX*wM6NxsQ|pudvvJ8d9j5PHqTGDWZceld23`8Ch-mDpVILm`9pwR z$=vZOd&KU~G&ur_{B4rg6v6@!Ob8;lOQ6DG-F5pwC5}B_QJ0*t1hH^B*M9x_C*a)% z>lUoX=IeAqgb`(Q$XL(5_6!!XFhQ^t;>O}&wl4vh8eS30RDHUK+_Vje>;lB`M*cA0dPO(e?uIJ@2(d_YwouiP~#ksfu-M z;@w_nZv^_QHC_t>GM{0?fs2ZrNF)g*H_!R$Yr8`feqN`AnA?IBwJZ`YDLFX|(46Qn zdTOuLpR6LZ|HvdXX5CYPl8f`}s`B#Y$kveN*tKt#ImO4gGsJpf@%Ld~YM9r6F&^`4 zWgs}LdwHs;;0n0x6mD|;DU|~0L&TI!_yI%$kP;LLBJ(@EynMq&ut(tRVdGC@B+Y#) zvmBQ`pLt~syJHcu(0GvgStfwxby| z%$Wpg!}4!+s^pwW_{;K@mU~S&092A*TKaiWiA+)G0+*&Nmey0@2`g1dK}N^0aAPk` z+1lD|!pO*(ontr+|E11|#Rt292kNx$C)lRAczBE3+n$3Je=6>;1QJR=MPhnB%$^7& z$v8URv2$|FI9hmE8o38Y7CEQd1Fe@BRKKWGJiC$3| z_t`fttiAW{JQ4ebSJIBQ)9}@4Yej7ZOlZop%=C8PVrOEcGF$53= z?kB1}9#Z6+?16^^_tfO~P+w0`VWqCRieqfLMkE+st5R^Vr)-&heda;HJnu3sKBQbN*J!0R^<47DP;N=E_EK7 zk|i?Zz0K1}W!*``yXAC#y=TM;^|&^165Sp?Y*{Zew{&;!=mz}wLbLP7Gi1c`j}6!R zl1O>&-m+m~P1V8fN}8&Z@37;EztDnzZ+wBZ*wWTkG!^;RaLqhgKsWL5ek->IuuM0q46P=?#}19IAt9j>-DWa2@eb;Je`6L?*TB#TV+biwQ78)@(O9- zCMdz2Vm>bAJ)H{3EKBj2Uo6^Bz>oq?15eWwaRaUAlUD|QF!@b;K23MMgO?+p3YP=k zOx^n>LFmj=SlV9chBw=tEv^+Vl6md@fQ)o8Z+!S<*f@Sx@XN3@YW4tkZsJ_R1Ka6R zR(8TrKtsU`{T?(i@x8KIa?(hbGN6zaK$``1r^KxF=Z_g;P_ZIm$wS*vXkS{Suti7L zpYJzG`*bXh0?$a5JzbU5b&%e==8QQJJ&pQ#UVk1Ocr-eU745t-fV5t{UFKTwtgShp zyY^lPl4NZdf)(tR>VAVMHtu56!rVKRYd6&JIP!)~cC$KASXoEQ{E;i9vxi(%xfS=<@n;(GZ(7cv&D~;G4LQ(VNZ@xVLIR(NGLwsAI{3@PBjc3-#>ub zM!N?-Xr2R?_4;PdcYV(nCQxG9Z_?z4bjkw2POymgmdQo=u_K6AU=87)5k1*QM-dh% z#v-7G?GK6Ou^?lVF8QxG1vUj1&I<<2nWXJpfzB+%pXBL%i}j`f@ZGmgsNofc-qRTU zu@u4(CZWz9jFP+Ob{oOZJtVk2BDS_HeleZRR;t4r(p(OVJZ8KrgON)NJZuqWtd9g4 zLIPW)$iJmN{Zzg)&p^fx zB4ViNvy>!8+}_yV9WDu?L%|v!*~P|cbcB{!B5aChtE}{9UZmTu!k%03AGJHb%LGsk zN8&BxMT~^)Z|%dATdcTCfZ0!1SJy_BCr=;0>5q2V_6E5JdUD(0kc8djj+N*F<=2%F zZ{L#K$MS78IYOm;?)d26bWV&Dv8*jH*X@-3P@Wp7b`)R&P?}VAikVuH`2sa<|Amq=%p z$D|-ifJs9iKYbD%DKb2dACdi#AiK>i>cv+^Ae>+BNzPFsOv`bEBPi%GkhM$#$Z;(? z_Xdmq;|seOW#<@9SLoSqjjLv=FnG+kp?P;Gh^D&fQ@0?i|nX00CZ(%RWM82tRKv7*HlM=Bg@8UGEtytYYr zxo(FaH_Y}uV~fY7atS4(HPG4(S8L<^U$TQCL3a%euL=ih5C3$t4a}uv8CnJXqRvx;%9VDEJ1elOmAS$XpS^ zX&a`<6F2l2>Cp{$25ISYgTcqS&#rb}k)zkdBtN|CUh3(02R6u^pap7a#=ni%)caqywk8g4P)~0U z5$OiJdw4J9D{)YyXg%X-@*Pk_E`oMfU6*zZjp1KYVf4*%)o*Xa@`)Gn>yL$R@jBE7 z7z+t(bL^c3I&fez zaNC29*z`&g?1apZ^kpU$gzsPEr0Eqs(!ZPJ#eXoja5i9h3?k4g-M#_^p*DJ7U+o;Z z>tXXpEouJoB3XX@C<9Cz3#z~I2RI~X&?m$)uboHa zftDJfpgS@ks->t+a}QvT{}4|oId^pC)BVaMd2A&$TTbef2XurEy8-DSHMHF~r? zLSb-#aBE>|gAcF=t`8RcN$;VFo0b+U&W5tHH`RJdk@AO6{Ww`$y?HL(HC3Ipk1rk$(Z~HN}G6ZP!?5b&#V>4+M=i|_q=}&y~-BfM!5czMXvP0OJAd-j&a)k z5hD)4+@NZ4HBzye{iGqHvf;7=>RSp8-~Am(KlTRp&!~qo`!2^e$D}{}5J-h}+9~Xd zGMQ1k^X$VmiMzBa+EUBx0qvkX_=yxONSERTK#NPK+JjdClZUH@TUg*vGWQ&ZvFnB5 zY}Ca{GH*~$B;;dh!|a5DzPaIIe#>Yv?XX<#zVg1f zax*%5QYlbb0+JsD9Rw1gV0zl%G!B!(5@lK@V&(>70SE6hms)#EbmB7WX#*AYjF0iW zXl0t9huFc(k38w*1AY}^Z4$`0KN&*m2T{-BzbT3`?3w1p3YTv2(?`2^cF*>hwEfsg zUM{I?ErKk|*<}sOkhjIdWhKU5M{%0hi<>y~<%{@`wpTA@dkkh~_@{n-jVAb-@BeNDcPC6T0Q!jC}R9^-KYe&tde+FRjRAP0Z-b z@makPw2qeK1~XO4F(7ztU7*FQq-+0{`x}QDnPBJdulXSRMG`e0?yQL71I$ ze{U}Io3hCwblY}s=sB8`fh#$Qh(JhA2I7;nS$1lT!R?484$~zr9O1ap7@b5c-5rKZuB^ACSJQ(WTI;KqX7l~4AF_G_NsKcfH|=g4U< z_$itC@3qEIOpIx|q(15tcKJu0)fO;zKfc%X{z}z|{n%YWm_%5}FEki2jsNNmze<3v z_Z)!F<-s<%ie0BN8)$=@Kmgdi)4SaW-W;puZpo--;ouEaA0 z&g-B+H>H27`Cr}TV&hM*A95C4yXd4_MS-`!7C-fGj$b`@odD*Gj3u9$kc#jyF}PqD6LuYK=zs= z+{RJ4$b)NA;TmIR=Gc4S6@dq91%Y^ZxjyT9Gb=8+ZK1s-L%uRlK7ancxvhD}w>O=*k!M~@rkd(2hD2+$B zdR$5W?ILRv*MRMH-Ob*C_u<*G^=KSGN<$=PD{U`7-xP+-m?n#^?zUX{cPtmsH*v9r zq3jV)Q=NS6jR8u;mNz}L)x!dA?x$A2Kj**q5b$cgSe>7X7e~-6F0J;4V~M;p@)F@Q zQsk`qt&eOTl<3Z+Q(&T%whudP%JAjO?)UMF;1PYn>KQk=5=+f&Xg>7H;M%~=bUukYsWi-L?*nxe zdPbkH+x)}8t&(lg@9)0(R=9qVg9fiqxLM9?J$IAb9~_9Z8{>k0>|at4{=U%5^UayA z|1|lzlpL|9l}98#CX!r=T*4%`9&nM-1RR+^E-2%1lg1jx-!=~QTa0(*1@ z9Er~*Cu&A$$|$`NS$Evty%~Q4?~4V`r8ODiNqpN#i=?}JBV3BH{>jD_Cyft)XC_@j zu8UCU92J+T5fcsVx@B$MB@W(6)O9)t!`g5E`=R84XM%=rF74(Ddnu*|vg}vcGxp^H zvsD=?dt)PVX8n_9@r`2(Nr`GhIrcSRpDxRnp`h?FD@D!4o4Cy zD@!wE;w~~r!!7Y12Rs-jzL<~s1L}PR?8)E?E2Hi>h@!fN=xc4)Ai1RQVBZwL0viXi znqN{!{-m$lPXr5ps4mQmQWPBQDw(Skt~=5R-}g^O9uGn(R!>Mzr6Rw+5hdk{0OEl6 zif04Jk*c|BhkEWYLk0C>n>r4H|{CQuM z9ztIaGulC2_`TF}8WoWe^iam{Y;cUvt#W_)T#O6Y%;Bm<#dhanskLc9VN&$^r}wRU zR|AYEcEUzBqB5y`OHgd&yVK@SAn1>AzwG7WG6yJ2p!9cY?o&fjvILXhvIk!GG~+ZZCwI6vWyG-hrWr7I91INc7olU5YnJyYpUeD>SgCDY0OvW8 z5l-i%MT3leLxtncwmv6_320WetE>5^)F@<|THU^_%32ytv8*DC`)e3rO~(*7Rv{b0 zU`fW{zz>Y9XDWORULW7$6B0oqqI3dfydEVR0+AN-#36Egw*CZKin+U9zB)1EmpXm! zO}tG(92k09ixjv5W#zHCDCFrL+iK|J7^YEs_hw4fB>BFlyyk=x33zzGr>WsGN<|8b zx?kvuiT}VJy0rNn#&o)`!f1YHOO+<{AH;Q5&v& zZ>>dp!}CdsI1=4EZ3s_vgYZw;Ez7^UUq^OK#x+C=2+Vc%#DBpcx6I0Xz+raOLy z4bM7ADAVJ-QEeWpwRO!`z><2uBveseBPVZMJoj&)H)3I?Hr!G{mx_4BZS&YQr+RsD zrExh4gH+}PH8*!G;8StEoNa5<;`6!YBPA0D)qa@d@(9&7;BA)yQ0sRt(W=V0ZHB!a z=k?;H3ypA|I1#ruOP;{<=^7^caK@g@6~N_8eX2!7w1$erRf&18-Oy>eq(vPrih3uF zD6Us1Qzr*{bhlOOkZY$cTI4eFR+$@JvDrqQR!}rO&5&>V=oOWEKl+Q+R#-(&a@|fnExUp_M=&O*+$#y5QmL3 zXEP^$;K0EnWLqr(avc}---^XMhTiA747t?ZqT=mMk6+LUZ{MnoSs(N4Eu~*dKK6RD zqi8>@kyshe8tL=$KPvgodCbpCiXVxj#n7aVun;y)citj1G28~9D{9TW*q6~X zi&OtFyY#GC0J69t+sa_puoj^ImjC`K|3WDCs&w7m#w|mUFBAmf)@S4Wo}Ymnxf3MQ zofJn%qBy1eHYqV;fV zx2M>1>3sKUb!2SpDG=QvARt)j0%ju3=Dxp*8W}ME3ciO*YI?*4GEB|?;sW9Q;kwR|&!CJi<8XFH}%{Vn0E)Y=!eK|1M z>U~H^v3oDD)_Qzl91_a&5Z4XOx@B$B3j-8bv41I6X640^WcoKz7r3UzaD7C}kA7;< z4E2no&KINcU(J0}1x`yMCv&Lpq~D~GkY(7MMrcg|4uEU75l!@HXS~UP#Y}VW^uzB% zKn{3I)g>3Y_?msqk2v@3)jkk^VF+>b2V{6A>kO=o>pRkixhH6OU|Kax`K*%C@ixM} z0@|7a+gb>QyUO(Q>@9F`I^%^AzyAnICsH_Yi$DQFL?#G+|Ltx9A7~8%@96Ie5MjAj zt|n`=My9?W;?yblo!lgRvO#dO`tR8)QC3%N8(ZyuF;j^gt|l{o-*xEd$AOu%!_O36 zo*dP0)jiK7^t~;^x|aC6&|ozQRpmoYV}xarf$a}9B|OH0yXqs{t`!DyU2pd~5)4$? z2u9w#VC?w#`h}h00Lr@cRa6?!Nhb0BQd%aZ)G28C#x)y#Zb9OnTNQRQy5u!)r^k z3=`nxGg%M`z5#OwCmOixgtb}n(^3m#yQ27W@mF)%`=FR^=X?)`2CrBJ;%B{+><&gs zd~Wz9BfIC^E1#Pab)Xyn0vy*gt#MijvQ;oC+$m5{C=3jRB{QPSg#r5o_wQT9iDrD^Zl!(s z(ER?Dy_2KqNMi6Nr``K&-fONE*5I2Y%-H_xs!V+GUSEVa%H1At% zmYOU6CFpX1e1ZAE)EGZUdsx>cSzi_&AqQiVS2c66W}vvGI%)IFV6gkbKs#BujJKV3 zJ@lKQ_F{`d&z3B6Kp)*7#q*h`rMn^|R!1234n20zVb z2)2-z6B}rG?q7U%>nX$YI_l-waN?(t`x*c&t$Vlg^Ln49Y23+tAFkmSt!()#kx5Pd zH)Fn6QykyBbF&JfNd-bl=ZjxR}C|G(3T%VfG4rFI}`kQ6Uxx-sIVjfE&Qb=qQ}v0z6Wzbt^hm+M5Eh zo^d6m5iCH-4dLfj>^knhrFQb!e?AFaSN{AZLS2!q2DqI{BfAc_x)||ES;DXL?KWLD0oCSS+TCD4X zK&IF5-z(NOs;Yjl9@77EVxNRKJVWE8%H7P6*(tFYZB zuWv24i8EyT`_Adkp9#o)V6#nL3;Q;bk!0^QXPxdA7!<$h$;}mEV#^~nl%x)6dgZHx zRQ-$~vF1Dpy0{b3uH$QxTST?kbxb63%4h@)AXjc2KNavSzy*lau&;1$d7||Uu{$G{ zt7oO?a!4F=+QJFXKw!|}}eQyZCF<_(rs&tCGG!|NrkYdtG|(D?ugex zg57T`V*pV8K=5eAu$|d*#MEG)-lb+w;UdyNRim@QR7EUYv|0f5h~YA~B%Cwh1r0@L zD$ECf7H*4TsPmo_!7jV6pPoLfm(MabL-7@^Eg=Dm`(NIla0lPPiOl&^*w4=n( zJI8I=^$l0WEUO0iDRun2zQJ@R6AoZe`Sa8^&XAjUgYymB_$-GUCXtmG%yVMi*C&Qx zH0l1=92pEc)VHyTyB*soNy0n*v=ex)4VNvqGx8Okm5D94R!!P!!R&bm;ud^CrKtQX z0?OCxpDv5Z%6KI`k|t+&2Bt7ZVcSI3t|-F6OGbppxRpo~HD6^;o8DoaueI_VV{Y|s zBkD?})!V%Xh!C*16hDZf=5^X=3?3J0Y*4l!BqD1>!KqnHQB_t^X$7>nP=9|!99sF- z$;rtz=eW;S%M(SWb|V#!xz`LiiKpTC?9%n$W6JquW@R~7?;Pxmj+fJyKX+LD@VJbrN#B<&l122i&2#~g;J)$eI1?@sZ`BH zU27&rH$SOKz%a*q-Gy$NZ#hFocg8dPLjm`)oZRdtNr_9}GqT$x(+^P3Lq(3inC@Ay zOs)run09`nb~@N?_Vk}`5p`Vj*##uzTgw9F%Qk`yLjyOyXa?}V#KD5euYg(EVst_5KFzLw+3k2$B- zr3Dj@UiGMj@u?ERh8y_m5Vm2ZcLpy77Ka%3k1Dj4c$;#D|d zto0tSn{in%;iCbPBQrHMbr`wOS1@U4vew4`+sA-_tk+Cst!+MjIhLNk9>%4mzxz26 zmP!YBJAFSC2$WPc#q5IRK~DzoF20?z>}IFD#*>FKzJXOcNjqfsyH7x*^Tg6rf#2YO z*=T0j*08U7+E^yg^vxVUn~7nqf0v8Q(SF=@S;&`%U}!=evUzM{U52Vt$k;c#-C`2V z=E@0UWw`~p9p zxBP8#6n+S4M&R}-ehY{I12vUwUhOwdM1rI-Q@7S+Uj-zTGW%4E4r7FHP1V0Sjy#y^ zyz0R;ah<=M?E&qNq1tJ=ME~}CT)`o1KR~2mTUBe?tkqaeY5&%{IoGRC&Kpqu&BaVK zxW(EXF-(tx%WSTMk}dZw@Z zSQ;9JwYplA)K4l|<#}cU##~9cx$%O>vDke!#DH>hzePoj{4tXid7fYU^sktT@i$9d zDt^t39}5`%wX!^=z#Ncx>?E6rooe) z#^S1A;JWvQKELIon!QP|WAI~B>2flpXS=B_MQK=?81Q;SP2aF+pIGt$?*R+QE=)7tw!mO5k_Sc1<^D75T}xk6|k+nBgYQb%6} z9h{K}B=I#q1;lzlS6*0NZn3VI4U#mxvgM_sZIFh0a3Z#2&z({x^aHbjRQtcjQoFcT zxieQ!5aL&=?1uagyVDeDvjH$O&CL<>(L}285tLXPd^KW$M{dnLBUnPgk_#@_sT z*S%@1xYJ_{^Ei{L@(*0nIR|9j<|BAZou#(7ooL=$9dOqJ1{6^Lt4|urGbh(x2whv$N|h$Q)s~Ht=9O zILH{_(-vCHs>5AZDZ0u~d4-AZhnnhvGUO<$I`1+L??zG-%w3J^1 zZSLqe4c5cQ&!^?Pz=Jz{ZWiAut9FNZVgtff;J=yXd&!q`bxHFgI?sW%dft?)(RrBr zj(~$~^Li*9Yp!uWYZmEz5(;X7>q**pxb=bgs>R+=U$>@V^G98K4_8N|_FU=n{-*bu zc$j&!TUcmT+Y$BQ0v1vi*f_uZcchi)uPzu~X{vXDrTon4TOPeFcuHq9fV9U0h7s@q zlZ{}kWv6~46aJI6*YbE+SeE_%EstTIkV&}gi=LTzHHgIKk)YtrTA8{ERg}{?r4v6Z zFfvFTcAxrni{d12UH@2bn0*XM74v~PJzB&WKk zacUt%d9Y>!7G%J}^F~&vu7)pA1-+1!J+#9k>|ky)PDV<{c{#!RJ8q9_Rvjf zrp^mhL3V}^KL5vkGr;Vq;r5bob93{TgKS5jtBPL8)D#mob_OD95qqe;1L`j@jglql z?=GPgYci9b?-G>fT_Z$)i^!};B=XvG;xpQRyp=7xqn9ARw8q3l!}xBI{Ft0X-cIGn zZ4OsLR4*^pLfz&DbA7~t$vB8(d=!ew>zrkqvG`!^9ZO@?B!!=uD?W#n+g&FXyV>Z| z{2xu|_=s57w|9a=u{aoA@LgSMkNnOO4iDidR(mODBGfmYzHIj;efHWUJJ}cRg@T#{ zSLjS9Xf2qH=aT`a*+TbS8f7FfPJ)d%baTjEn4EmbUl-h@|9ZpgF%XBS6PdMWhyykj z>`supcSyPY?5>VliAgR;bRl~*;x-4{sK&neuJm5fV(I3N;W*+TZYZ9i%c$?Pd3p?v z((-d6+PedneBcFs5X=qTnD$6H80x-jvz|uxhD+S$as79c#b*yyR>gH6<0fTdL1qzl zbd3E$d`O-+{HNx~g`=qD`C4mV)xu4CkL**L`Cxhhl8%n#mAN6sq0k(6b1TlX4( zaxlH^AiNp52vW7r9FY$SPu@C7m$HkW@m!qb1d``@3}l^ zF8rc`JShL`K)A#z51nn>gD|BRSAI#NzpLC<{39?rNN{YVcl=sUtq2hM`U+0 z#BmPAblM<$uW2|<-ieb8b-(4$4l{M(S??ElOP_(7o57m>Y&Au6HAVQQ202DkLO17? z=IVLr&i}nlhsYIdiQ^yv}hF$y3V>~mbltF)gv`ps*BGE0IfzLaz z?yCFRVcZ#iHUN^8GGGpoS7%$awZee>57q+34eXt!^KImK^>RmN{8Ky`4WjCGy|q3S ztV0G1_oFdFo9%~(ah^FjIT}D|Zc_3E{xt0FS|42w53NNfoZ0?%Rdt`t1CLzq>myh$ z-JkBtV+NU!^P6ai#F-LA6k5|D`6Th3LZy>n^pFzZytq<#v2)266Vj}iIOBBOz#O&l zE^lDVYT9BUw+s(S<|SFsa9u*UIbM9IMZdOsjhX$h9MIYrsXm(1@}&C!M7@d@76Y}%$k7p-^x4_Vv(w9jBm~!So`1;j zz*-`~rusu+AzQ+DazaJO=Mh(X$M7GH$5)x7*w&yU=SWj9^ruO(3>xDo{vQJYQ8HQa$BozpMJQWe+z%*Dc0VN^>4~9Xgd}bzO*RjlejD1CV{YeDH55<@HVOI zV%_05cRrjH*U>}Wp%J#DcnaH_POO_O_Cb6irR#n7aP|B@*8IBvH;RRHXa2o9E->DV z&yXgh^Pht2(3c+=x-7!H*osID4g78$3R3C7C_GjpBcp)~1&V6tZIh!Z85vjmaDfmybu4q^X(rcffvtaNxWArd_=m2%Nfqn<3)_WUrNswJK>&6Nh9{&m zN&=G?GJH8m79p%NmrKWGi(vl4w%}3b$bthu=V>@UOx{@Ccc3Q8{GGks?v$m?crmk8 z|6Cv7;k1r-#sT)9JS6j%vV#ASyN zh}Aph=OJ3Qb$DgrT4#je5~Q;6L2a+(fKkVcw!j3O*7iW|FS$6MJ>zuO1QTrD%$zZoOxvXD&Lw@|NWfzihK&K*kI9BpOHj|-SfsdSkO#3!NI_49=#bGg zMQ9UiF4C}$8o!m1FwEj@w9LN8CsZLAJJT12z)BnMg;7=vNh=!7xNE&hv6=FjOw|?B zN5Hc7xo4Wk0X8qSOluol&>|PI$w;A~_R*Lv^|`fV=2b#wHR1XU6vXMPBxhe;UGjNf zm{4C|AKa}>ip2Lxw?P&)r9F+a^B1T09!M0ByT6{K=fnTvZtpGOnH`+n*_v++L^SM1 zT}+^>AE}AZSn?Tfq;Ne>MSiCf1qtXOL58>JhQd`izmvL6?36H?ox$L6KgQQ&SuaC; zdHwCxfMTM-wPKdFlUa(vGizoEYi7~3&ne}%XX@j}q_q|nperI-)|lel?|7^x?1~oF zT4s4aYPQQIM*C||B8KCgKaBq5v$;cv&*SyZjDd4E<%7eTnJZ69H6-D>0$T6sq&mau z>}+7jruL>xv+Cw_`rz=e*nBs{V0HhKR8}VMiv;eH`jM4Gn6lF~soEMQPJu#1)=cTHS{RdQuqo`d+jreXg32 z>Z*5U?nZD_^1HE)Po?Ett@E=D`b{OH2wc>#62^;bfmp&P+(qsKDM0xm?YxZ-E8PxyqfN36#p%xo5pq=ngauqzo=SdGMPZ~X#8^=i-q zwwxR}iVQRI5V)ei2U(&_qSk#j;TQu7uAPkVRoL~ot`q^W^xP55VCyRrX{m*9!ceHq zH&d7Z=(WJzJ92ZG!2Dh*ZSBO~?f$OIsIZ>zZf;H)${oI`uXNt2al&Kh-~PYmBV`@3 z=?1O7dOh`b2(ukwX?p|Y9$8Tnsjn%M;Fz*gxS4r$hmVUl>V?5 zgK`HaPZm#Kx$o85{y7yvJ{6J=@F+m?SM#9PVpK_IGP^NM zuyOttMWk&=TF0QmvvmZXTEyACXrJz-sQ6qO^4V)T6jV$*+YHYyQ|?=Qd&;3knc%ZDCD;J3VrJjh7NR9un zc}U;aXKl^j0N<{^?w$D#0D)X;=d#kdB?7?VV~5WqkYCzaci&hy z^3pkBg3Kk{^Bs5W)6>xC1YxmIGY@W`=jP!tRq9hx73F_lnVlF7*tU=-Ckt7@S9I?R zY!PSGXpZ{PPD#H(nI@RQAb5MeJOf-2aB*?%`i0Kywnpf|yc!DcT^pHIb*M1eRk7W46Ea9^|8=v}WVb`v?eq2YbM2t5x+Sg4tj?Aj|?B#NcSH zx3#tPV{EKsFeX{$YOI#qVJGe8Yz>d)SgwA5vd|1@L#F@ukvhD|;`ZAn;%%p5N8et9 z$sVf+Ku@c6d~t4U8qh#)5AZ1_F#lz&=#@)87~kkYt%<@lD^*5m;|9#l%dbSf87{jSFUBP%(X z)PpigJ^?9!{A#sxB_>fbB$|9J3lfR5v)MY%;eoT{4IRglP&z;8py=%DGiiK#7c6d~ z)a75FWLUat&a^oW|L+&FkoA6wM6`02%IBDvyNXIm?;<0y0PF?K$%4R0YlMLkQw7a3 zi#9NB8W5uis^`_w+CK^+eA@g1kem)TTR{UrynwHUwPf_oBaqW&O%)@Z4UU)C&yXg=R>A1PMED74 zN|s+wf&mxYN5JJ8jI@60w*Mw!9FkoBF>%Ir|CIch$`wI>)&FqymSI(I-PbTE-6<(Z zBi$WJDk;JSq)S2?q)Ta$P)TVC>Fx&U?(XjH-0#}w9Pj^gz2E%kV*U19V~#oI7>;1s zr)?Yn^x+IOCPJ{-yB4Eu)6(7^Ff#J?0cEwqC)MvBS-A5xk-b-wz11dA~J?aR)_muJNIfpvq| zeia>WS=4hc*Hy1Q=wrAZgLP?&^x2gWVU&ti#0Bi>+ohoCHy_Ehoct1qeD$OHckm_) zQ(55yF~|kJ0h--zRFU8QXz8Vdq$KbnGCN2D@&dq78-a}Xe+@W(EBri(y#@ma>jjX{ z&F#a^%)AOrQf=Q@J2^QC2qiS-;eAJ%m%5f6n45oN(8oI! z;v$QVMdapA3MnHLIpw{|+_uYBHJ*4*5ERFV%4girTuBiCu>LvQtEE<&!4qul5RxnWa8HF_+|> zxoRj5fkCT0u3Ljmv(*4)f?Md#<|(`&1i+lAmLnuF>gXJWFZzc7(5i~SyuMIpdp5*R;t zZ0v)-w%SCIei37cLyIL59y$5k(#%BuyU`_&eac@4Jc+?(I74v$N8DoslXgg8Y z>o-vmH%vW?^MvnT`bc-n*Y3GnDeBb~EleJW)_6o6=zr8e$|p|Eh{{YnrEs*!`QfnZ zy8cQHneH%*Kgwm|sp`z#KGFL%VeP22o?7$)eLcOxyIp-Kh{wEsPshzYx^b?dk;Rx$ z^--12`XEhBjlA;u4%5ECXQ4rh(#J8cj;_)9|8;5L$pMwz!O>CB{frU#eXgx_M-7LT9gU1|}q30|OhiyM>Scd#-{lKZ7)z$tI1`&8))`t%8qFooGOr zCTl1o=hD=+FjfMAj*2c*nDVb|;PN5y}WHx5!GHU?9a&CMBY>yMEDDaQ|* zbr24Dv>vOUHaZT?x|8_V<3X$Ttj!O$G0}74c#Mv2c6C;RApNSzK(gSc-!Av7sW=#H zW*-X!%>nB6nsu3EFF~G!wFDI*88-*fc$&l5k{AoY5lWJIO*lZp+z|gEVJ||XHJ@zvF~vLkux(GX=!>Llam)gq#*yrTn`M2 z6m5Vv5nzdcWg;3jut|7G zSjlIi+8-*mi9htYA_~Fa^Rn)}6`2SnQ-+Zw(#9VxHFr!KNlE)%%Fvk{czwQiti|_U zQ)f(?{Xp5->ozaurQ`pw@dZ5Ds#Tg$guGT4nlld!5T-wt^y)@L0jxBou<-+HCxE`YeQybnv9&z!&h@d!YVQ7SUpHPC$T!39eztVb`seYzN>V1@drMWefX4D)oxVf2ZoV1{#OyDwOp`eYb7Tv z=+-56pg)%2E`{U=CX!3Vmc$h0rC#<^Wov@osw~{ai3DZ(%Qk7BWt3-}oZrPCuKd6< z1*h|#=Gcb{MatG3RcKnO0k5}rda(H=f5HF=o=m&`%O#))GY#~Z012cCrEuuiSbqKi zS71`$%{M#k!CR_DsI)g=d^wn-_GA(m>NPhD&y42iGxG?|A{c?0Ucn2=mfKs-_3>1P zH>-S%2$S$F<~pRrBH9JTmhe`dsC`IV+}OZjPlzF5jIqsUEbc>GnLSokkMgl*L?nmgU}xsFtT@aTn!gKXl_}) z6HQ~6cD_Ixfkrgcg-7Hm-9n+H5lVBqJ3ngqSqZh!2os;grXkj1^)@t$H+n33uxn=w zy$z{jQDby^EkCcDhVPO-nn}Y1RLI( zqjqD1X;iA`1!c*RG9DSSR1cA@`%B%d$`1KE#HYx8n+M)5K8KE4;VG#{54ZOG{P?GH zj(0h1KL)W0IZHGUj$rX?{22-;tWh)YAQ_uCs#GRh?j}iJb3_BC(%i zV*n^FYWd75urOu{H!G2_Hm8&JcY$XOx_Qo!5y3d|& z<#Erw(lkA94HRxDu~YV)g<&SbKq>;m^lcp-?q}qrWXkq?SF2ke;Z00fI?XXo&AkBe zmjv7`|7Ea0e%lCiuWTqwjC_|cOr~hE;LR>++9#9M(&=Jj_(uEBrVx6*CY;;sGe%s~CWM^i z`3%Ki+nn4UeP?I@CL&o}vPhDdjM!rTF z&K)1mJ|N@se7N2OZYPdFI5%9RFCLNX*RR*{JsmrhQ3(m#9n%M@7BK;%jCgfw20#A0 zAGY-@H`l>vmcFv#R;aQA=?60A`=0v<90HR4{fYYDBO_hO(KQ6*MKbB*LoPZjt){g2 z#5+IbPa%+Y=gOGCuqTNDVX2ci9DFg&sHj^rq&9g>`qL0I6h~2Su?lm|N-7_?( z*i437q&=MvxH~$+*Qf0V(?-2r=*oDRzovDjyfEg$J3nE*yYv6^dvv|8CwRR^n`>y- z+r8stqd!5=>xKtd&oBU-kbYmMqaIG~23OY*3VuDWUv;7UdmN{~<1{JZ+~?1YT1#UY zh1|%SQVj~dPA1PtMIS=uO^Pe?zcl2Xbcwxze-UB$Jv8p8oLy9k>`q>8$i2=}Ofqyk zP12WHEj}MHHol;4`6^hmPrZ4>obqW86NebRsvdP)Rb(U&M|>@8)%NJ-c(OaVYpLeT zpy8003xQ(m%{c*dC3frX{)^TP6Z!>z*}V=SKgO5UL;m-vqYr1Rz29kMr%H^GLAY37 zvgmAj#Osii6;OCew3NTcpKzz8o4Msz0&b!3e^k)S=iY6TLn6wu%dg6Ibvh;r4T1KK zhaH{b&9en=(#Bz7UU{bi4S58RfHio-(8MG;*LpK`TyPlCtnfh0C*+8j1udu9#E%&0 zQ?VQUm)=2^yMm6!ZJYJ5{cM6P27PUdWp4;qg6?+bnp_W>9W#)#%3M#3?l2a;9DjR{ ze~?RdN7H?U@?Bnk{d>~+USG~e$30v*5iUsLcmiTqzkndg%2}IMgQLEDC-Zw< z5sw39wMi3Pd`JB5?uh@+1KMiv;{iV$k^G}J6+RYN5#U@#eGMxD)p}m_^D!Dc`~m{S zd?~B%+8NoRrzl!N<0EAhw7zwQ8}{a;8%n~5-yscsIHDv`T!Nr(`LWw4${Gvi>!FxU zym&z{S4`lpyISHMa79Ii&Uccf>qE`#c&I*y^TbQ6}C_XI>u%H~~~?w2>agdP<^ z;97kCrx3%ou&^?itt)ag+Lbv&qFLQo=r7(aY@+5TDd!g!EN}Vqy@s**nQwxD~6!-amOx4=;yDRB<|vL>FTbW zCW=tcQQuFGy_dR@_T0RPuUh&Zhipt zTgr}(+*J$i%$%H@6rw>VA-pD=cl-|zqQo2qoLU8&>gs^}-l{FEH~aH%5p$ioRzzBS zrzCIu`1NCC?)A@|DB-myd?X)d=E>*ZcKtekve(ZhVRT2r%N!l`ayc12`E=J}>dhz7 zEQ<0aejc>b41_bo##p`boLpX4v1`PufEHF;$;D}kZH?d*hx`*_Wg9m<$_8xt-z-!u ziAPDZc`=Q0>*CNaoV?NBVq+Z+cRa%h#!rv@dAzwBCZ+Ffz`~hXikFPEG$NoGek)TB z>P4$)_DIqRWRUxMgpOv~f!~yN8P~^r zQ0h32DO*KQ0?qT(A|;P&^&<9>$94}5ZpsR9FJb&|9R}N<+OB8qUEdkcDKR{zSEg}XXt+Yqd6$5}WD~&c(Ze{E%8<3r z;{8&FzHe7Lg_CpZOe=!CRb}ecec+=#GfVKUw0uWUl556Uz8rOVz(vX;6idMF6hlfe zsJ0h8Yr-uo=EwHGjM9{MwQKd`5sY^=96`qjC-;2dgFtewW_?soC$z6Hh(AHoVyo|0 zM-!jht`VtvoLsF@5P2nn=)n>YCjgS);&?4%WEY<~3Rwy|oOve>2q9tV_)d#78v}n3 ziI3QSZ)R9VIxD z;!q*jamHS?20pSf)ABQ0e2E*!KxY(OU%y21sce_nagAsxKOlM3aN`umbv;Z0cR_Xj zZNDEGMMi1OkESpv+3IR%N|Pdlpy%PvY)9-4W$~Bf@%1hBhL@}F9vZW<{%ArFeTMFt zJ#Y%u1Q}Dn_HfR9w_rda zJisxa7B}1VMdEWxUF&<>Ym4UJvyyugB@JM2)ITC|d_P{c^(HJ_hGO~(1rZZbg!_cV zG-E)(Ct=4y<)+U^rO62K!B{5U=fdab?SaZkZv54~lUI$cPLDKY7Rc(r7Qe%3&>Nhq zaYQ{@A|%btwL2NJ&CXktmH5*@+qSq~G6R0|wM2iVX;-M=!FTUeudKf(WyL(Fy%;p; zx$8vv5`OM%`80P;=V_hB+M9UY2&JO;Ls1BU3i5yWhq`K=HreVL)7#`)WzZ`l$KkH9 zuIOFEFJ2sBd}}~e7sJAA|Hk*F$j`=fCDkoChqX<*w8l!L@!DQ#JaMJuWrV%<;q1WT zZy|C5=nX$lp{{&0MB~@P19Www3R^{U^pF0=+2(-CYa$==gfMKet4}d8Pc{aU2F&H{Unen26>arD9g}(;4tpGppG^zj9iiWZCb&vl zb7@*wm|TXCvoHYND(@K?Wp6P_H-i6_Yer*P{+B>Z#PURNgaaa(r6}4^P71W-{jlN( z7rP})4y0zMuLfJ9Q;<*v!i< zoo!bqKhRV)*?;S1Ab2EZl#C=%(j%p@1uHp>9^J#Y7WU-!AlH*!11bP$9r_6t?YZAhVd9`vVziuPmZ3!bkt!xdE{Wb8Y zd)4s52&Lus2)`Hh-Api`!}BAK#CW}Tk@1C!_f zm*#m#DV?QUY(H(3`<2DNql4S&;nB+N#ig_M)8D^x1HAM%>`h%ra?zfXEQcmW%V{Q> z`5Kx@b7Lm3l#>@A5PG@?RU!=@f2PR6{*+F_caM6(0`EtubLv9;0B2s2EF6eK^cn(r zXgoa3@6l~D7=G_W+I$&P>BOFDY^k^Y7?g#ZDBD z=9m#q!AFKlfsqW);<5BK=odW^o=i((EEP+CStm{!W(r@*9NcTEmuiyzy4a4LmL55__9r8AEjrxKek*o%_x7#= z>o!_omJ1UwG%Pr>U+-TyU{TgkN|$h(50PEnX8m_6#s3PMeY+|odQ`Y<>Slr~es}lP z7XP>B*dj9KR*6;GyEv8f3_%mx0=1bqv656=(|y^3Ikab3=dZlXAn0KUEIgEjaD)n? zlCWdWpb%wULsX;JgOYiddBiwayB@y(yKPx$?zH~iO6!5^S{7>s-`w;jT|C`(BZJu} z@?}te`*6QB`HVr!@Cr50M9TNFx39ZVF1xPla<$E`_*~VS$7qPb1u4AV=a8+tg7bVp zQA_$<{fom4RqE{Q&feeLkGICs)p@Xt_GhH)`yOTlQf-Y};ojboIS62gi`Y znxA!gvwr=VM?1QbKc|>W6+0~~ESCMS#hOjaI6>$RHO;IulrlZnMk!JAmGLU-+aD>< z=r@r5IfniFze7@;?{jQKw;DbJ2g>3A77W?`A>cZ!CK4|*>rnh?%9fRY|BjAaGeKEC z9pBNP9e?A3)=QH>s{q+|lavfIft81n#TGMp)R#k9@lVtomcDlE)5?4PJxv5sR)q)A zP&d4fCvRn47Yg!h?-!c-?FVa?n#B3GIFqN3F>t+c>xIrCe>_{^><9>rYcBN?0|KP( zFRIoQt7Gc)XZ_?=T<;H-LwAQOYkF*|5@5v*B+CC*PEm@W5M|q6V|xLC3?$#9L&g)n zgeObN_25(Ffm*%NLpAuXab+7}*4o~4zWN(HNC2XLTN|g-gE8$H+17sLOwx)vP|k*e zo~C$W<`|og^Y}vVi~`?J$>loXwB~~~j}2SK_6tEqrjDp*zQ5^>Yt!7+Q->0Qpu|cemlz(63N(!4Xj1G_R1P+YfQbMSh4iKo<$^-v@&x zX7Fztpu=gUsP61}^hB1gP&#Xr^X7L=Xq*?fd;0KfK?O5JrCUt1WdY(g7R|r9YHP5k zxP3MlDrsAPcimMeM8;``5%qo_xhjNnI@Ik0o|r{T?BZQObyD%YVAoB`Lj%vbe2m%M3u5>W zpAHnSRSxPbR@qW|TprK?`8^4@sVmR3!fc(+nmx1QQ+ea1PZ~oX>MmC^0Fu7=tIOQF zP=l=R9Xz#iJB7sWeE4!Wn*yr@nmWE!+Ljss@!HkL&oevZu$3Ae4RacqqLD=6aop%V z$ir(iBz3R`@a!|wj9E9w5lq$A7`WTr7DTa#qw0%1$_%0^W$$xCG(xTzEb%vg41tu? zSZk8906dKzsb}z_4GvCADG@ME$aeg^xR-;Q0&zUSK?`$1Dn;u_x3XM}RMq{Rz2IK@ z<9Yo9107&WgHbn|vi_Su8o&TMVg$ZHde72nu=SAs1vHuEl>b2-sVa4Sm5$%*Mc-Vm z;KblRD|lSJzEMmZ*=MI?7t`)NV0U=lTOeyT#O3;dTJksDGQQn)p8q_iGifEkNO@Jh zQ@f+lGc!ph5|7#Hi`&{--QrWksT=4E`)Yw z?H}P#;tXcTslf423Q2*V{VySM+WIr)^DK;d*F^r5vjzBj>SUZmZ2A&|wO^`ZS?J zCMqdV?(KCOFbLVyG7}{_3@WAoY~X4T>}sx1m8$UbM&;7eztfxHX9;3{dTIHrjlut! zF^+fO#EWW+K|M4|H-u0AnetN<-`ad#unjJFj}{)&Y~vf-oE~dyX}Lo0FH2_k=YY~c zN=nN0U@q~TT+BgS$qe%`5(|A-;_^Qrv{2`NN6I>zBLSs=Iy%M^A)Bb+*5g(nkU8k#U>$J!56< zr`beFWyAue+x{CHiI#S5_n({d{|}(1^#ah~=+S6;nou;NrY)&+*r<@y^r`qt z3gA`v_WHi3%hbw)t_aM;M&hF0O_!$LHp*@vlWfqNLTq%%~T8<*hE<8OC;Zn5Ydc zw^*dvrO0h$)8oc|Hebuc)JbCnIV0;exCed*mhXl14=VLB0%ogp=hZHUdKZU_%3xt^ zG)uWDgH)WB{SdLIr}3GHCzq(lUE%Ct7mN%o`MZClt0nm!F+tBKV9xLM=oouxCnz}; z+VFVM5yDFQfl!~{(bL;#|g@qju)nQUG28J>rhiA}Ga>>xL z@0%eLc>AS#Z;QYdXFf=(0SauJ#%t4$Li3qPXNXZT-ZFX*=mH3R4X~V9CfMY~~ z|N9$cle4cAtTsi|_c@_pt4k7FrRO>>L!gL=Ipz7To&!hta!b#uNfr3&$hMJta%tk^6+yW6zzBAU>D&^ zry8F|X@jfqJLz|OOw-b3c_g2Rp~N=(PRu`#e})EAe2!SyT=`n7Ggf~|0p7OebqQmz zS}(V98jPVALM~mOp6o+VL^Cq}(Lz*19q4dU47h&6U}0lpa>8dl)v`#{ET6C41d<)i zlS8}fa*shfmHbzWyyehSI#}vGEw8q*iAPS@x`s?a=eA^ETQ$z6qr~4?Md)Z?4bPVs znTn!MzmP0<1G2iXf})>jSRU1(cZr}kstzs#wr`FXkMLzy+&4HA)GYHCi= zjo08tRGHR&4pgAr^Bgp&uO6u`k<2 z7;qOuu`rS$gyEhj`u9kC=NwzkaThFCNG^TH(pBSYRz@1#Y?Xx8#va4Yxz^)wabc!~ zuWJHPkMW7ggkua+4B#fB^DR|@OAi{RWni$+~5Z{z0^@gIylE(~}4&{33Moj6-kWHB*qVnvm?yr(#i zX1zinCU$YezIoq~#;>zrrQUw<=f(*&Z1$^)w$9ccPmV!_R@*nUq`Js9LKB);c|?DH z^5Wni^0Nf!O@5pMCpWezE-o-9gMGy8ixKh`^Dj6=1(K$>%eq4cX=$h*H(ZS9KLYH# z(?jvI6?R^T)s67*?oG?zMHMwkO8?Nf7mcZ_e%PthmL_`F@^M_$;psvFrGL}#H%p!h2X=G@6<=fHxG#EHd0rx{>QoS@ z@ednVVm91|3E{Qg4e56nkAX}o!H-)wpfS?%IvM$yuTz8jN0T)lvFnG2$AdKo`Csuo8SD9QuTQ$;CV#K;cJtV6v^$Arx{h*W&2`GA4z*Yf+h)5!c z>LE!wI4ShJQr;|uQbt3_fDwNI}S2C41+^V*S`a-WGkH2OA1en_j@E8 zy={|Ab`6%W080}AK}D_gZl_1b$2EKLi%cw@U;q5!N5LXhS`_m+y_!^jos{Z-Cv6=b z0adH1^H-5NSS}~#yrq|!cO^rL+?0qLNUfdf0fWc2r#TYopBK{8NC4HfVYk?unf&IraI zZ4a4EuC=upbE=kY$Bvt;2@c3&|CETnQ%K-jE{&KU8*4+R^!(JjZUpkC`~%%>cog5S z%M2vnGJAVJ#~{B#)3Uyttp)ht*e|z z_wVT@bpj{+REe1-6q!B+I-Ut==}xW^;_kv*(yAwEJVL|qVF-gttv;^hF1}`f7dKeP zH(>Px-$kD^4)K1Ho>Z3ll%}hg-GX){QPv8=GdjiqkGF?Z05QG%?#E%yZcE!AVZXMi zdc7jglmG5LZeeM z`L2a3vh*U#$U`4snLJuIPVYCNB@BYHHpjy82&CysY#C|HvhaSIH0Ial%Kx&^C&AQ4 zQz0cce~gJRsL<{sg+b~U+vr{Cu6e61aK{$sYcRn4jZ;Hi^y_J2NMtKv&@{MOYw6jp zfeoDN*ZQ1V-cND=@7I`hLz~wX0vp!nIXYBHv zHt6~|VY{Km@KvI$jP)G@1oE!Io%hMtfZ=pmZj&yYh;bW>=Nnza*PjxAxB+y~Cz`;4 z9B0MoFYi<@p)yCVcS$EK=82qD5RjbS={P=rii&bP_kMF>!pOUNWq0xvh zv7m5tb#iRgBNeCTJp1hF_VQE*?#MH6k6%t^;&E`1h6S~j@O3XBFjEEv8of%uDAV&# z6*xSn5en1NW)%)``38wbbKa*_T8j{E`Sa4rY_CI6wYsM%pC3Ob7Dnnns}V_x?j1-hf{&YG_6=gl z;N+q!*X7?81}XwFKC5Tc)V^f4^|%E=g?DcVkYyu8{( zM;Rd%WIv@fb$#qXu2Y7`!(p|78CqjOfs}Gk(w*A741C0munJk5C)Q{MdLnRLGQNq? z;`P7JPga{p>qllXpR#JvHIv;fV%Ul|xs&a5=XXWwn#E$Kp!**m|GoFFNQ$-a$wkb; zd+lL9P^1`b*^rwvI63Jo&|uj3E17Jyt!vQbhK*ImMi_sN+b(O=Z+e9W5BypK_}M@1 z$CvC8Z_YZzK5Ai3Vu!D`^r_vn=`6S=epOLd?*PNpz?2k9AZ2gul#K8LBcGeT!|a0j z*E(||c=($h8HJz~u+RD1`aGrHY}Vszct8t-L`!8|n(W*mO3`Pz*Re*tArwf4^gXbj zn&-OUz~Cd9`>2N+kJfxGZ!F>O` zzkjU$mSVg&Hhm0qcduLSw$k&l4q7esiQeeAAVgvvkR|-_TK(|_dpi#InR;F{m>I!U z&Gy4E^Q+)ZKBT@ueFcl{EdKz-RI>>2@?A8We}1%o@zYFn#qaojTekJN#D6ImTYZc|-o8+5+$QRw=p6+5S>$P7d|($cvdgnTY7C0yIMkNosW?CkIoc)&5Z z*2Av2J+evWpP7G}E@Man4Oyd6C$jCbxy7%}E>0b%K|h2*=ezaGJ^hWa7EyzfYMn= z%EG>AMiJdszyaoT*}kwH<1%r_tEjaK)Q$A)=7mUSuA9?=8B{fs_E6kM1SKU}I~{VZ zMWvIdhAlYHV-B^XfWLDAXN|O{K)>d!dvQHX8pMp9o{a|QC2`BaZbaUTdBp3fF#In()+tmvu~G-81@A4gQbJf#O5@;X z_9i&OKs^^xW1Hll;m*m~0^Q2eId_=2?n;4GD=83Oqn-PsJKn=f`iX+N^6{qJv=N8= zkU&MmYNkfiWCpr&q(N^a?35NdhiSuEpVn~c0_G$JD13(qypY@T;iM@YzKiI6WAGtY z5?Nfa?o*S=3UG6xI012_)8!WUAj*8st`*Z~IeVq+`>lOlfeD6=jDZ1V z#285Qq7so#<^}R!@G(bpiIztw_UixoRqC5PMxo$k2e0znV!>v&6hioJ8hpEV=@4_C zArA$NRx?^-tfsR!x)3!jCAHz=Vc)!Zk}wrnpdkY4JIT$(*8o6`&>UUUys>|jhWzJT zo+@|MbUow(GNIN{xS^ZBP#!;@SqJ!PxHjZAM~{@*?$BNn%`=Wrq4Zec1~^E2tu+}T zSgyZcd(P9?IHuzx48ZfwBV~5K0{GM-y`iC#6eWgf|iT3EAb%a(E;7Q!q&ojY=g@t(@wSc1e2YCF* z*x41!?&RkF`4ZWb`fC~I^%rbhBK}DlC)k6k{Ffrt!Gov>l-je-F8bih%5!hG%5rZn z1fgK;&Lm)HSz7ODM-Ju`$*T}Xw|N_e#wW>187~3&)Y5h!bb+*v!n~md{RzIzCCP6S)=slN$aHJCrRP2KRn(E+3>!yVFDG6~@`p+LZpj&FVkfUq*nnmkv)U)$;N z5OW3*|9SnFzAFVJf9w}o<3OKKt3)X_tCW=HFnp?6W&OAyrruEF*lLO~#IrRR`(l0Y zXAUT{@cPD^wL20J2uMwP38K1JJ0loqXj+XczJ@Hc8o7m~rc#~lpStiQnQ*^?J){zU zVFOXWMODde8Ma{YjiZ|M`>xRG(1vyA(8i?Ix0mvUw)Xa}=>ark==L-7{tO5lACySH zcuBImVSLWbof;UcEOccn_0^MXp3(#|#gC0kB&jDXp2sKzhe1clVEEdGeeSHWLM=Fl zbrHL7_8&wleFDFfero7B=h^}OlK}jhL(SP;X80}=$(i&$zjRbs!gn3;hd{&nGn&PJ z_`|sxZWx%JMaUHGCN5j5t3TocLM!$e zC4h3ir8?RK`SkvNmyihVMiK zz13%S4?Xf5XXtuTeZVr!c0#wy9&gsLGJzn&Y_e8Y9{Jsg~|$?cBXljV@YNBb(u3yl4FG zQ-r`9j*AY8Gg6X(i?;dbL6R-LoQA!Oi5O9aD(3N(_V6UawI}%6{%ddm7!RGZ99SO=O zSqjTKns}2~q3i9Spmrj{7qeBJCL4oq6fBMl!o$PEWa#O`+!~@&R&?ZoT$IxGD&{|1 zw^et2&Q2-ax2Ph>Vf;$<{{~O!;Ds_yn2bDsAe%~vfAv(zxBF5wR8V9Dyq9>=DuQ)Y zuqm{8cImPuAJ@Y~@);S?*8#ijJbY_GCt6A_m5Os$%Oer4_~tykVPuz)rFSX@i1i`^0>{%Lq|Hc3~-M%Qci^1(XXW5DHMzM$I8ZpjbU&;TBsw~`dDPllWOttkhU z_ao!g4m=aIo#Mt>zr5P`9S!}=3qNd?r&tcus_$l2KMb^Ati>3jvXI5f8EH-upQ>tW z_XbgVqcp>c?VE3Ajr?pHG_v6$pXODRn!LXXxKy_NQ*497|Aqx#B{Rv3k;Sn72BY^! z60LC;N0AxWl=E~N>bYQUYdkX~7i3YG~}9N&_-LLMQFW)`hfZceB)wfNE$QHX~< zf#ME>$|EUs+~qg(NV5j+r1Y5pt|@z9l!C1b-9v<_8Fw^mG>>4yRePmA+&7BEf%EOj zNPr5OgFkOo=t46S>l%}HCKppT_N!F1wYt#5HqZ4&yOb!Jb&cP;f+`Dw$r?tF)d+N5N+ZU(hr z$bibfQ{bud#oaE=MZq10v5ZlXgQdvB-_HBZEtd-??^2teTe?0{I+gdet}V254u_ zW-H=q)y%~_?}~*A;1Gx(xjJ-30@@6ZY{+f%X;t*>jMToct!mAkQ?b@_9@*oKPuh%I z+=<9ssx?lokGwkuZr@e_#a<%tcP}dgnw{2VJ2>EvC)wnE50tua4IY;lT_s=`-#0D( zMzt(yDo*>Aocq*&S32ef&4qR262Vx}K4&IyzJU`Kec^h;RS`IM>O zB2#g_-0N;x|G@ApjUsWE?z}g!yZrANEwt-Dce3)j$ke9|y=$&?aKj3&N-<{^2?uZC5g_#!EJmYB)NvUmc5xRTXHo20hbxoEOt zpNf(nd5B`vKK(6<-q^Bmpy1h>S)KhMM@W2DP&(z&!&2Vinz*+0x`WE;!fh0rH;J_( zFJWU#z4(@CJiT83-Rp*~J=}#HUkLC}VB@mM=DdW@>| zr{^L~98K*4y`R&1|K7W3`HKfMxRD@fu9aE>i8OXEV^LW{8nCHQoRnh?PACl!JFcz) z`D>tucgGv>MubN^tb}F_(4SQ?DbB>1h@4Jk8RZ^ zXKt_bJ`pnt^j4g)s3>!QpsVxoW~z>GP$&JRk%4D%mA1&>hYHwYs@O-H4<8=AD4L9n zkCy}Sq(;-Rh3J+}9uQBVt7t|gxm#wte zaM~zDIi*>gHtHv~sA2@JvCueE+t*=DV`FHnTjC5PFgKeP6UBHfUyT}%&8xB`J^MS@ zEV0V6eDS!E1So+SjDq(bwUp)lIUY3VXaAM?1ZfUq@(%kdz`c(9pL#)r^?*IPHy9d zd^2$3tntW>*+zY{(DZn@J&G?H4-650pTU}^HV98o_MGMYRd=*H847u<}`;>uha(IEg#EZkrN`+RKDG7P0 zgDlktDNB?G`-O$Bxw)kkcFgSL?w|K+t-$jrPI=?FagqEr;$dX-hYKIjAXOVO2lzl4 zeg$j}?P7S2`=A+Oi!=`w?R170B-G=H&F(v&0NcUtZYp8ngqazwrNsnfZ!!T&aqqurk+_=n0Eoif7GTF|K!}8!Ah4HM=nbsp%n&g&)+4E}>T`I;jO zs){y9uXZNw_VAA|Ka<5uLdkx84STyCjuk~|F#K7Xyig=GVju_^Nu(JhqP1I{E$qx( z*qOFEhd?h=+6WS<-{jHcK_OD&HyjblTM;j__J{OlYTcHqloADoe|hNz=Q1UVoHOt2 zVt6ZP+_ z36!M!-|MfBjx0Un=nJy1I!oOQHZPx%$4G_=iA`Wa;_RhS1_GFB)n1notVfhRQ5S`u zaM*Ig?DM2}lwLnSPI#!$PogJq_6W>Gfd<89+YKGtOhmz7-@i)xlo$dLEcRv~9Bg_a zcc4PetA%wjNb9})h=P2!Nz$xayFlH+!C?S;>BEnG=N#=QYfa&r(d_fK}mCMoIl_%%yct0QBe_k@iZ0V6o!RF~tnEq$|2PK}D0bsI*t9;@tdyY>0$ zh+pq|T5<6=U*AVDEbk=Vy_^20a&&if!30Ch3CMc6D=XN~88yM4MDRZ=pQ9zrouD5M zzW8qeM2dd}tbEq3;9qzjquRuN4b7S4usN8&x^f8{Z^%vi;h6oSASZHJg`utOp-GzKKbx-5Wa{{IaSY+anB^FHLPjHef!bWbbW zJ!gY0;eDY}9^%h0f?ZZ!Z-1=ig^}(Jnxx|ezMtI7Cfp3@jhT+P!<#xv=xqOE;8D;$ zI4IuamReNA4qL_pN85!RZD)sNAn{&QObit;HX!ZzcGLwAl#5#8|0-hbmp}Nj)5TVt z6`Ae+478l@fe76F{McEWNE!lRVX&M6gK9Z>8sIBv$o+$ahZi^UFjMrs6Qp9Ocv|B0 zm#yz6MGFv^7|QLqQxRUsXhMYrj*y5SYjs*<)M%@VdVC)@T<^!XJ-O(vvaqkiiI)rVqRwZVzW$BZBQsM4 z2=YK@AE7?$q=C+_rR@06_U8jek7oEVBUuojp0@sWN?R-OWINsNmzYXcv%w1nCUbLj zB+m2ax~7pWKlX&2eZEUtIExB3#DqRa#Yl?rw@p3%u~P$7##1`4PJcPPX^x@@1qJ^1 z+B0mIg_kF#2JOag%s#Ijb9iw2e@rRC(hZxp-b^2*EZLjl+Szl;K*&65TVjYy6sYM9 zt-gN#{+w?Cx&v2#`$W~j#puo4*d_Ukh%BGiDZ|OwQ^4CkKY!12Nk>!jDyjc5aIS&9 z)<8&cb#(;?+mmN|DmJ_Ko?OsDJjBTCD7EV7Qu)+b9^95x-8N~F9IS3owOs;$)}7yP z+OWFIJjUeGcb8ooy3!MPJf)EKjJY*9D%gAXhRVP0Hux1 z!FB&*Or+N9@Be_2fB>fi1E@AzY&4DRhH+KwaMhtW3M$F=ehfHLu=BWR^7DU9YQXwL z$n@2W5t=f2J3C_>iSk{@f-zT})EiapiMHfuw>9#7ILtF*iuT~$wLdyHhhIb>=txFFS5mO`U(UW5ZY zT#1Q^hgKBCgoM&EGD!a-kF-N-4O<;FX?rkijnph)c&+&EmBJahZ51szUU}WYZLBWr zeNH2D5KP+BK7Q^Y21TjrZU59g+Lh?SE(V|E6WJ2D^W?(o=ipD>%xorFYz#91wEnEJ z7r6XSzWW%ck-LE!nfiD65*u!t{|9^CS z1y~ix+b>E5-7VeH(jX}v3ewG?Lpr3pyStk^$MqlIz2Du(-PLFJ z(KBae-g)B}TO&gn>t;9G+aM?Y!~=$Li0CAj^XuMVjo=1i5=@>orQ(Y~0pvIAV`J(; zL5)$XtMKzy5@2ncPyddT?J-4ZBFc5AK8(iTVB)29#au;TF4Uz#X09cEqoJRB#SR4R zK}n?5seeeMr&Qq`HWM?ZlWOlq=0PZFd(|f&&GN%(2{fAi-Obl!*}(m>xdn2NcRA+L zC$}vdQ*!mN4Ppa@4OVMQp@B#jd)G;AQ<_#vB+N@^zl}F*tSOuL>%Kj8qkP+mb<*aA z$ze{N_(8+_c5mH#q3F}SbEG+v~bp%`|uJ*3J5cL~5C=G*4g{QXW4-JDZQqFGp z%K_;piI$MMme(ueZwlN#wEWO}_vD2M2rH@aO2s2K|8(!Ye3USvA7J!TVNwJX4@~^g z{Og@BMH_o=kUyJ$ELu8x9TBY~efY^ZzIbST)wcm}zOTn<@rYEw&XIEIw*$}b*KdW- z+;DuA}^HKPRbp)GvB&F`S99Stvp)^ln^+Y%%!UjW4X{X6>t=qk~ZLbB9@ zc`D|8vf4#W3WTAv?Y(wgTfSx56X_LsHuU~vi2(IS9IdwB30wuvk0} z`ko|+sPYbenCHCatbbLGFa$+bu%5 zs40Mp+ErgjBN!l_F{btA(86J`y6_8t?);R77b)=CS@-dH!+$ohlnEsR{B7Fb<8?fc z*r&KDx;c$xRvi9n!FRqGt*fY3BOqXn^cTTn6L77DsMlHW#wx`bt{oS{ilQ>`^#~6X z(o|L909{F48KKu*DNY$EhvJ%$ees>!(RFWML=0*srotI(kaZ;{q40b9YN$3P7O>2?+u1tHb@egHQXjy!(weR(W}OPmqz9e{(N< z*KQi;jT3fEf@Yp6HJZ|(1%qGra7JO36Me4?g!T6>7KtZal>6wc{733~Coc@i#)K{c&1C6iP+fc^kfLjE2PR*CVEO=AE4hD5DAFr)@~K~JnI3uY@*3-UP{eP}>1Ay? zd1_Np9z7=~=LYOXeZ4@kG!2ZWrza`_%jb%U3I?;2<_~N31tuskW1s3eETMklZS ziM7r^0zY$3@N85bRF?gzf+1pAyiI1y8G&TWEr{f+sc!pABQnBk;NQ9njUK`-f4&Y) z&e+()e(TilwOwyaECl2?UOsEI_OgbY`6`G~-RQu(DX+;X&xb}dd9!kVXV=jqam_0F zeFgO4w?wDZ)=rM?Ag5DW zE6{^hhTQSk1!(JAtTOEE4%B>>GhRmgD57#xu2nu%>UL8}+O3~^EaGr2sEalkiL#<1 z>kg=QTdp=+e=$3ICH)rD(wYb3Od#C_Saxav&NUD*R{fD1qaG(%Z)3Am+{CK?hjGLL zCMN6HKjo7a;Eb`#lW0Jn&ON5P9sNeP>> zxNn&m9vNwHWQL|4Zl$V)1`Q7gE=l7V>;wdMy?dx>4%n_D8zi0SdGzp|`Ot}8-Wvu4 za#BD7&>dWb3vIOYa?R!7H=WfAJ=?)j2qgB1Az$wfx;)1?AZuZNwKg@bIM9yxtX_F1 zZ=*V6et$bSBac3bG0Ayp5Cx1&FfEYRm0LYUO>}7|erRv(oil%H{ZX?04(UVrCmwaH zAKDH66O8I#dU+9C1zR9%_ut*yoiu}?NV)MqDtMoHEGAiohlk%uNVIfz zb`s}|{{WDcZh09wWk|f=rQ5rA)t!I89U$vsqRh+73)m~iKwGiQc16tH!{cg&{NbIA z4GSRaZ)~)gau8u*W0$Hh!okDeEn5DGD4WtCT%PtbRy9_Ju3z)r>Mz%Uiwd`>qB^S@qz&Ze|uh(1GIZ@arvyze_CS zKFrz;9D%=69;yL4Z9wQ|H`BFz%7RRLXsJ6f_2(Gb2JLj3`$FydM0)wzZkQ+Ea>CqJ z>CC*W%vMpMN(j@Yq|n<4fhdB>;Z zji9b5f;}^uqqct!4$WDV)n;bRgn#3OCzIjnfXlbYjuDQlw{`-1a@&0k4Wqg@I~EbX z?frV=97Ri^E2w<-p~uASI1hAi+xLQy>hiN&`8y6C1ulF#3kD@q$Jf^C@F<@qq8-m! zV5EEz<%B)>iZ)GvD_v=66cDM{-rnv<>>uRoy%ef=_X$DXb)zRN>0ChS@_^l*PeM^A4C(GT7SywIT{5Y>UX<*y~1^pVQ`F=8Qio~G-uV&_F~#GkpjuZt(L z^60^-1?qpWYiV!om;yprcf0yZA6CAWK%)r2Roo zgNGz`a?fybZ*6k-2l(fPaMest34f0)PAB}%)y;*@VY(B`x90-G+Ui}%n^jvD5syd< z^HaTyck1|s4jN_es#cla+-kG=h*abs9@Pe zdj&6MTLGra(bE@uQQR6E#LwEiW%iuO>RqZlWRxdCLHgUboq)qTIAf_P3z$Zr9b#() zWg7j9V6E{g$(+s+e>{as1%jW&Q8(y+t&{pKcp<%rT)<1l@=<{^>M=yb=6Rnd;l~cQ zx_pK=hIUjR!WoyeH}3sEIIs#?3CN(keAUVTbB1Xuehrn*VN2cu!J%un1j5X+bZ36= z{E5%K{+@j%iPp72l)-P#ug?~y1Xw62mTpzTz%+77;E>qLsn-jxxA0-Y?84@B62Gve ztbwF13riL}J`F+yd0C~_p6+7S4g*2Y}1x>c|OF31ij#|`7 zrVerW8d8V2f9tm;C-JXQ`%0ZTq8D^;v;izC8zbkR-hS3<%=uxGvj#?Q}>1^UX2j7UgJ?<>UTIKe3@+SY70)#*(sol8yL9yCv_al+VeLR96?9Z<)jEYk>`vGEE4e ztJ9i*6s2BeaW&s`u{{<7%wIJRh5lx6il7DXO><`Ics-wEVghpkA}mkl99mL+5b^ks zOC4m$J1C$Yc@i&ALEql@b1Jv(0^QP%seLKi)QK)^23ZaO?tP^EBxPExO}GK!eLIBE zbzAkP@Z)u!>NU%EX}fzxwSq~#9f&!?i|@KyfXdSWC+@rwz!KVhjxE0dSI?Yc1dGi@TrhTzx6W$ym1fXo4h>Yx~yaV zj54LVBy5^-kMiIeP8}tj8TfLSSYh~dL;QT*-{v4eDEey|o5ez|l)y_*g zE65#xe$3waM~equ>nW{(R=J?O+9o^ULiC=V*TopW41%BYIP(ZC!1Zh>Gu^Z;>Ic6m z#<&maEdPX*25nC|zVI^qgM$_5`rp{6E5`d88Q*)i*54{2A&#$)3~#oPEcN=i(47+C z`HRg|Evh=ymhRJ;YaY#RzBT5;65vOBSQ?sRcSbN_^i@Pz+Esq~w7n%JkdSQyGQXdv z4rqm8abfi~hwwlWABolfFlni2DNYNbGfLPnr2T7W_d@BJ9vO?}Sm} zce_E(cIoC<)+vh@$_h`7%dsa6+9%fTw|=F(-FX)(wtN1OBQZKAX4DIt(aT9hIH&v^ zAahxWC=Xdhfdt3rZW^L!*4nd&Y})g^-$p$##6w%tjTCv%v%nYkxA9CitRdIoCiC+9 z@K>34l8n%+&~{GTohfKtQo@0;chpsv1 z`z%$3pFilNk!Fa6_!SwsyZ+rJwS|NK-*5 z9E8}Crq1lKnz$Z#@;8*3AKcnMsqTD173bwrCk!5b{8<_!NBZIBo+u2HT%P0Jb87>3 z&6@l0#!(<(h`?LvM&It3_=P*v%D(nr6IkZxhMJ%~@_MQL>z6~@r=hh+Tw?t-@C~IW z1w9J5vAeLi&>^LVD2mV{Ad3?t{eWbVFIc{J`p!or0>ZHD zu*zB-b1L^MJDy6c+h0p@PU#1X)=p!EDL;i;P7qU#mA9Br)dV3IH7QrH>)7ivmFuMH zd(1*p+Fiuj8~VAX9Y04tsXy2lDUdGq9i8o>BR%r+dY+h=z-LJ$NG-D;X)A~!c=~i_ zyl@40fbl$72kSi;wKk5+)-;#x@|VY3ZiwquGpooKW3(9rQ}X}`EI9(+DNa;Ewk`KT z3l=~7THCi^`Fat8Oc78$ma^h%i`B7uympcE+l}@q0SW<{A00NT{ zG&k?=kxa?Ate40>L8q1~HvUzhnoQ61V#gRz^_}eju~)ykV_j0ClXfAxnOSIpq}=7zk7a!#J5SiA zaT{J_a``^d&^G90_p4_4JFl32_5Y#7W*f}$Swj$Cxp=|?6p5^J9$ul62srPkb<~6p zI`T=|Kdt=NeQ`M~suqhXv(85JQ&T0IH&rUKt;hsmlT}gjQ(??`PZJ+aF!?mof-}!F zHnZb$cezvVwe4YHMeIw7SZ>qxCg3gF5p+VgrljJOnY_mTiPt0-R!8ssQw+Kqj@^gk zmDff@zm4qkdLsSc*g##qxVV^BkWY)gdmijHpi&hN;@rcHeqF<3MF*|Jdp=DVYI%;= z>n#g#y$dOo^>!O{MUB@T2cd0~b=K!H=bU<%!6DbRVEs?u{A*kv^}c)nI{DBHZ7H=JHR14=v?Mp^*5^q{W(!fDYvd7d5O$ z`g%i&MW1sbm>i%(wqqgakPWM=T1rU};)|z@a**blw$>RBI4m!~yAL{806;utXavVS zne1L)?9gautBE4k5a)Ic_ep+B=o5tO83RRXoS$=ObHXE70)7v^`}+lQ`6DI(^pYzL zSYEy`1t_Vlg8P<%JI5DduU;LTP_nr9N3>o#zUYv#990=LY#fK~@h13C%71qIX?*AZ z&X?7nQNn@5U-#gh7xu68&9li;IM~2Z8f8DlyOD1haRS<9Crijg+^RN;GlQ3#tTIy1 zbu1XCx%xx)%gVTa_0Xhe5DZhd7T{C-H2+Pyj|}KZFUy*Ax|gUFDiogLKkXc$Of?eg zLA9SQDy?lS$^ORO*uSxI=dZHI{0lbpwh@v#M8jqG$$^<+WW@G#J19-5_dBaT0jm`W z=)MW746K>G$(rjc()4OKdigs4jEbe?CH2TVcS+Ubf&?|f- zQWU8S9mWCrPEH>El$%rORTvcj7Ju2B3e3e|ekMQx=)sQ$6^xA^65XZuXt5#?1NhsxtXL?bIM3|$N>-}?Rn%u&TV?4(3v0i#2YAGJIDHr}3l5lN%1;%P zR8$GnJ<-}+H2fh>vDZd_%x=7O2&udyA)_lGqV?qgWDh~cXYF9e^wpK@=@MX%>8zc z0F{E&+40Y!_EwNxM4?Cb$q*NsBCdt22T~2X!5IRu_HF z`oqA=l>ZIz#9|u7RsM>9(C=RZr0_IwnM@@49@EfZ@i3zo9^jsGu)aiTbVl7~iu{UH zjxS=fF(p}6H>p?_B8rG_3)tSCuEJ!br?gII4Jcl%7!g^UjLBC1#o_U#?&4(P$U86z zak~Sz9oU=P9_^)zAZyD`-?aDp_S--@RTA{XhPFJNIi2L?(eZlCW8yr#^Y{)=KD1cX z#RXxfzRtN>iXA3>z0MDXdHsqmN>_%y$9_+Rr9fD!aKzR2I%Srw1M=K70HTR_h;e@Z z{w+{9ijwk&svDWwVdIfEgN*#e8HV`?sw_4NvLz(L3!A>CyT;U^0kP*nj+T4QacduL6 zh=|Z15&de-iHzY%TU^v7bvF95U!;F6Vl<9On0~w84b=e#Gk~hKewpHi3WBgC)YoAR z?E%T7@)#h5Hlkom^Q^5+(dj(<;-^VPZHV@dd;sT<9{U65|ATgCp-2Urw+TM&tY*Zg zD$vgRRn(r&-BMw!yV^P^%GJ1o+>_Mfvew1m9(4zAHTE0SQUa`(NUnaThmPjL?-do# z*T)X0!v(lf>BSXwb^I_;N+YWsIQ`Mk3if6KgLCC!Hi5qhaP*-R61qn;ubtz1-+S-h zUX~me=U$-(+_E8G6AoeS?{i5V%>FD<>#t&UkqH`nP5n^_#{Xor7N36y^!xMpa$`j@ zG(f*cAo>se-dz^xM*U0N8OWmv2!%Hz9Vu%aqRLh@IgHtjseU0wn0V=Z*iRblNcH}Z<#EInj z0P`&)y;VPShd($+v!-M$1?Kfl{Ufy=nKH;n$hbaYoZqNXjKG~qF;3qX*U z-PL1b!9oriN-_nz%AsVqHyUy{?Z1~Cw98gnoouGnSaQCzUvM{pgIGtLiJMZCVUiS{ zJgvWprUn!s^$y2^E-Q4o$^mYFVEkj#kWYCD<|)zRjy5bF)<-E<$e_-Dj}9qvE#g69 zC4}mxodM|noDWb=ys8;c{bYay5W;@jZ?0MsgjpyJ5!9qM=DJt`WqwC{<7moTPc0eQ z_7U;}u~tjEpt3I-j0LWOVp&n8t%2-jqqM-u#H++eq*V@?5jt-~p@l;@ zrDQb~io%>oAM5$R{AW>9t5cECN4|&vgcA#us)XZNy9CPTHC9W-yvlMbV}~`2#i@b} zpXm5BQHaoBsXN$D6JOl=O5$sR;q&gsw#lT*=hopTd}#Ms`YbY%uOh5t_GmzWYh!e%q{SyIJ%iJk3|C~a4eqo> z*#z7?&&NVu1U=zz41}xiW5>q^mdijJsOYOEblfk`nRCc}c`}*aS)G5Nc}aMyVtmwp zFdR`@H(S8>u?IZt$@G>f2Ljx~?X#Jh1PkioA$gyGWn{l5CuU%t9#_&g79&)V0Mr(f z`a?z7;N|oJcPjAwad4)HWDQ}H3I?{dU4hNqpTx$2#+J>;|BS}pLgMUnFp}b4W|Y=` z-8@Z&3JrCwDioi|D-ZQGj=3byDWlVWM+?$v>r;56f2#^9I?UXe{M4v=lso~WhIUVx ziBwPj$m5F)zvDu8%^w!c@xv=co!ghXzpP(*`G=DqrR&Fbs(?8Nl#4n@R9$|M(nrpc z_B4$=HE>soQwn{4MzeXQ4&|ps1-8!Mq5V5FY#9r&D|c=wPtRl$;qY=@UD{VFCbS@K z>H}Y;%`<$@pw&;DA2n_tf%u{eiO(2;hhp@QkS-QiuxCnX{y4x6{nhdKVwGE(9n56i z*i$3O&w72$w$&Keo#2Y5R@l#Qr(zD3QtE^fp>O~ZDg!E#Y4Ej1%HiK?(g3bbelK-M z;tCJ6qGFAqqF&^m54Avpc_qNLE}!`&L?|8~qp?e@SFD1F$!>V#(4-@)4X(|(t`db$ zdA4yE=Ohrkr_{~KL@nv?LVb+rn(#sxA=ImZfBtN%0FCF z$=VGHKtQM;mI)baX{m%KnfVU~D!1+2WTr>X%~KHg7fhD#yh8C5$zqXt|BBpk>8e5* zwH>WaJ3z|o5d;>n5tRjD0~iD%ni?gJV=$%iGB4ZM|E{|4#uxU2H!{Qg1 zSEgowYz3&z{>(%1dLVm2E%eHMl(9lhRS{UP05a`gsi?OQxu~?>0K=DZaUOBahfjzxOVFqhf0EYU_sN@)$){chg~l zeSO`9z=Kcdy94hm*tb4!hpWjX4LI>DR!}XN`ce*>ycBYF#NqnblmT-yH9fxQV)Mcq zXav5!tkl2I^vi5+G%Q)dSTysF!f0I;{w&wX6`waX)dwU`*MOY9j)U43?r?~|2tFH^ zqeap@%hr?FFvs~h&E`6A^{hTU%?uR2!C^TbM2KO}k@McpmFM}E_#QOM6W5KdFAfHP z$o7};@J1aeP!g)v#DVZhL>vLD_MFlfq-EsgE;@m!Y0x*}tRYIoO|k+;mxOr7)S(b28d=s`j%8>uCY`kq^MK@_O$ua;$z&>?Tg z$?*sBIwj_hao;;|S_up1ZOnHJrR&h|m<_2Gg3gp*seUIG-nj^+^DYivqyLXX2aBrb z6>8+mEBh}fmb7kfzw({lLN|}1mf@cs#t@Y%lsvkgpFGz9>l^V>dG6$?Z}#uaLn#DY z(Y4umw)3L?tc>@8*6BT$>TZM98YFxo#6BB(3`pW(3KqPxK%*WN;F+^-XNs?i-YPCN zCzaecHS7B8UbKuheWq)kNpX+}&MV=vW_n5L8T6~;%`f*~&PJ>3vy&@gsC>SV8Gac# zg{qYNRnV}s5LSk7cJK_FRV*VunMjgLk^T~rwHzhRQ&62-Sll^iV0gMY6iOyYRDSg< z32vGc5WE}0q#vk_2RLmbKgRhJUTOvZuJOmu;9pT;=1@&u9<@XX za-l?t3!%3^1s#DFC$B{F;~Nu6fKkK<=J8!kL4_0(S`fL~61ING}Qb z2I$?AMt(P;G-H^~3WOO%BNkg)l^`i7>QGbn1>SksG|~ob zUobDPG%qz0lTK($>cv#2nt=Ai)7QmXGMLyNuzV>N8g$St93uPI{nO3QUG`mg2p+aG ze)0rmBeBy{`ub3gbQ;Jw;H2ak0#$2zWTGMBO4O9pw)ET}i<#1^%vNJ#RcydIG~ty) z`z+JM5`KfrdekAbH|05znP+q9p>o49xubhX%Ce_=h;-~|tT`OjL&?Zj^5t&rhSe-= zcJ2azyAEMmgj&GBsy-Nj_`scgp{8m|;`?~_!%t(vY9~Q63E%;jU^MXDxJwp&bxn_f z_Sv&%pYw|w6Ej2t_1L~Kt3ZnGVd8uY6_hOD6#CX*zo6>h7^H~3tTyL6fAP-)!SeTk z7_pnS+`ZlvDO0{jL1QuKu;`fo#4olGu34%Yr#=Aa`Lsgnfl9@?i;7364b+6fhoUq$7L_!p9k>$p>WoF(0OQEFtzo_C8yuj?MzuD&21QIwYE%;z ziuD3o+R0|Rc+h#%VSxL*P3HI1HU482fU?a|H>{rT$qGXZnySRCtRb{Cp$!NmU$*~< zg(#ES=Cb{Qf`UR47`cUpqMlwKG!@7x`h$WNMTqAJ9D6;vt3a z3xCwRuNo??7rd3j&3+-hbkq_NO_aR7H~xA2sy|6axdb#O>OSSaR;-cI?q~AXUU9SG z{z>04-!KFWBg8#Cc!1W^KBf4V&`^Wh%cIl18T^#}8)cH}ImezJq0W!h$?NMH97L3* ztdPhAXW{#M(woUY?*+x*<4QB8pxhe=iL*5_#zTn4z&=Qn`7u73v1F&|!VVR@w9@8< z*%F~71bUZ|Tz$rlN}kvP;0{U`7RF74 zH*^H%2)AYM5ReHbL(y(5dg%~#*z~ET<#Jfj`XdLS8bztgT;|k|c3lEhgi>1QCLC~* zseb1DB(T<*vX8fjYP{I#s9>9#PjmJh%@ z?DW^Se`E&!iZQ2z=FtMh@9@2YrXVl~8O@+fa9?OmXr%<1rap7LbwO#Vmy}SBV_YUZ zy~JHB0WU-Bm$nd$uoOim4y&?3_v*(AEN7@K0#umtnKh*R2fw6?J_AnAy4fZ_aghi5 zEx)6FcTo{|2l;6hmH>)f54?le(}B=s@;RQu!_I^&DOs-+Y69S5=NpfZXoJci{KSZ1 zy94UcYrcy6dpKb9Hl8cr#b*~_$L|OO{7O4OG9&T+eeI$P;JN71xu*;d1M`@qBi8oe ztWGaXVrU`wN<1wa`d;{53jf`|967^o?XhN~%7u z%mmxk;ph$f5^&OlO`a}>6fNahW$4;b0O^<4kK|oh)^jBizP@m7OkXEbIbqJn2i@wPl2`LxnN<)KLS|M}+LtGf(zUIZURo~uEC46-O1Ne#%Y;jW`rCjPoZ5Hf!iB~MMcFQ*+%^E zafYMg`*6=V3`l|`PLR+DXXun%|KWii{PkJW`k#DI4InN!v?a*DIw>8PG+nMXuL?nN zD-Ve@5fcL^8K!Z^m7D{3D`yS^(vNFWr+O zZDd$$cqELM!FhQWFX#JD5Ysj+_EaCEc?+MOSuA~ zf7sh*>oQgHsKRAPQikvVW0;;~{Y$`!%-Djd;P%=)ue@w<=M1WL2$*)*BqX;DwZY5Z z>)koyeod69$z%;}ehE*M(p!cFvZZ^M*tpBc-)WKa@?7iCBJAqs=B--t{{b*H3`OKb z#r&J0p$sq!rE7(T$@i|ruF*kAPP3hngyloq_IsWdkPl0%;aKOz-8nU{ z#dUtB+Hzl{Z*&3TQwVD6&ONEXiV`bz5-+hv%cOc|H+>&@bOWZ60D2fzq!7GO!;^KC z2$iP`PSwjSA?VCdHwZ~S?2Gfd_#4DonaJ)shP{v-wM&kf-!GjkGl$kQo5~Y^I3P;Q zHlgiuonm78F}h*_@3$kDBpVMf`!J)xR{>?Z=Y%!@rh7pZV7g7?65GSJu>I0dX{IJmi24h{xl=;VQ^Fael$EaaO*;^kE?seuUlilaI! z5#_rNcT~2vCoBX81z}G~yY)(+5)iyoQBi^15Kjyaex7n)+aDTK9B3*(+Y9rYUP%Du zj;3ZS5%Ch6^Y$d{r169 zm7Q8xFsoQbzq!qs%X-dPZmGPkMS5lYd4bz|V6)@bJWeeRLR)H0wH~aP2POf})P6tK{%k4ZQ=sv!_-*{}F16*W>&gFfwS^gO#bY2i! z{8ljqOfjQkE>VYu%7@IPw0P7hiY$UN7w!3wP*6mQ4?K^&mGVU=9w}=%C~2PE@~bb* zZ{{?F6XM~CNlAr)!!dMs@AIb-IAY670xX9p&x7wt@SWeSKK%Ih0bwZwbb-%7&?$6)SHt>V1^fQhXLXL@)R{{@hXQWQKyw$ zXMv67vL*BZ6b!z7__q>3?;ws;vqRii=vOWeA)(+`Q0EN`*EdbLnpN;#nP^j3@Au{W ztn%k^BlWTtc=I5lp57fiGY|n46J@g|Qz9ql^19;$sbI&RBQ_>x3#j%s7n&X}v{>@< z-fPa0LA!xP^T@Ak25qpS*RjbQaf!li>z^z!E%dv`H95V}AY66QtoZAGGgO2ePHMDb z9+JzzK=qvUTP}u>v}ATADq z-K9bQ&>+&4%2Z_YL>D__A@V(jHZwc6>>pp66n3euH_2piLQPHm=C3}SUtMAGn&IR- z?k)nWxSx6NH8e=TQ;?-xthF#R`A%_8L7}MiV+m9%^>e_NFIynFrlB>7RPI-Nax`Mp zSe>Q0UF7PnQ8#)wPX2Hyp6a@td!p}kf7vPgQA?|zp}77y>!NpHz<6)EQsDNm72km* zOw8{7H&@c{4gj`QtuPsqDN@p`Fu{b(IT8;I;R~y60l5GA$$)T9Ma8L)-RVrLH!Py_ z`Zr@9plh0ukpZ-fJkRpAS&jRX4?L_SB#@v6WHS(J#Ks4~OW>U?159tK@6Kml=Ee)( zA4QCgjg^`VWquGOrFe_@*S}{kvnC_e5(tlU@2L;|$4hadKmJfjjR=bW_F4IdZWg9H zSxE;M;OW)B>t9`n4wPL@rWJ#?6QYDj-sQS6>|z>k>R{b| zhu1Daa7YO6*_83Ffb-MOF)=tD5#%!O-}hX2TF>s|y?-zKWAK;{NI9t$231$jNJv&= z2sf>U3n@~x;V#Q~U1Ou*GuJQ9UXe$qr44+g`oPTh8DX-~gTJMv1>Exh4D$ZzO7oiY zX>+erp%6SKJ>YJOPx^|RgQLW3jGm2+?Q%6*rg|gQjrr1rIev$KMgIPv(-NB40;FvR zz}C0)ePj0?k927bK*JZXvar~_IwXXC|0ih=?h+oLEkO29$-E>7D8cSK2xapgut}9s ztFJ}AeR`mT6RkQhGOaX}+SQB|Klet4aYYR=0PR$ttXt;o`+aJf@q*_Cb?;?KkpoN~ z0JG1db%mBapZFl`{q?wsLE5xy_HqOD+%fKpOP+Qta?%sL5FR zngn$^gcy58f*JBlxwF^V^3Ra;Ng4OC!iro+>SRDw6+&3?%a)UMtWTXO1oB_IfB!b@ zoUx9Mk4JoUFy8>p_yD|kA%D+CL0;h$9Xtrj_Tzdievw~q;Q;qpha2OqDMwf%=n~(=zZ~RMRFmB~Zkp_m)?|`kGV_gbRYmEd{gk+2epaK*jouhvTAgZ|+ z>q*wf;G|=L-~t|G)^C#Tw@01dv$KzZ!k)aWY;*c+-*=Ld25UXBy9-TeG^x&y^T&cg zY5#D$Axy~cxO;JJVQSi91;NI`^7(B<32Zk!ZqB13BVn4Yy>8TJt$+{T{f08i&Gq#$ z_@jn_FQ?~4-9jla1qP2gv`@D+`~%=UVStPlIg4Qr4sgEewHpw;NP$ty8aljLcu3_m zA7`;$eoN250OY+5jE&{xHe%3nv8BX#fP6yK3O z?ErjwzfMH0(K`p}pW5*e|Dkr!0crpc*Vi`uFXyV`w*62VThkomfwQlaXi$n;-wERlxh|`e9%`rBhLH zBZAy(4xYKp_cQ+!<^=VhfU)xJS@^e_?6m-M1!l?q@2a^|p$oU1(Q*2^%(7A8Mr+gW z)YJ-)>4S-b%P4)i=xruWZeSa{%W9U7U4*um1ba8^<~Nd2W^WqPZth=*0n>#tLzG?P zd(xRw{YX{SsF;|yZP#aD^C+XcH|HDp*}jF!Wr@v=JE^MDZxIsLxL+ZnB5+FeCEGfr z$n@gr%h8hE*C6{GI`@JLaecb=R#bE-SDxn0n>Sqp1LDOKC)@eOSm0I9kxrpyU}){_ z4Qcea0j}{-2ze-Fv^f?&zWAFrkBf?m7}YAi#4;$C0eC*hE0B_+{L&-ky%mYl^qoSfzR23VA)PVGEc6RpUyE9Lwrx}7e-XRpg2;CBciEr zygdt|(RtbIA1~NK&{?Huhm6zh&3r10QKV{v+fGQkrGFNjCA%T037}B=W~gp-gl|VV z3S98yM-;8A?6WE$WyCDf7)|Y!RQO%K&P3-_`XF2wuq2;+*c!CEefM=o=f0J3Bj9HiiUaz5Adoqou|3 zOxJRZ)Hq(1hxeK?ItYUX?sYXksGjW)=2N6o_#VYNM8d+t%H@wG3wb0WVN(-aFwxO1 zf%gRbj&<{%H)lJ6`T6-rM~R%~X0tEp=#XkO!ke0eVdm%OIn2jHLCC3ZVAxSvkSvRf zjb#PLXS_hQ6}S+(oc!hn(Fx;2Jo6f`Ce4va6RNJR9(7Iz>Xxz!3T+s|w=fFSjbJ}e zK*91n{-w52S0qK(+9ztM#5VA(snmS>QF;etg7VL}t!8=jj|p`^d;t_HP#*(kS#wLv zBN!(qCqAeBzWx+`sIX#h91|FHjJw@gC32WzQBynJqosntTJmY_y#hThOn7+st2NqV z?gjT~@S#^f@e°8x4luHu=1(c8yrPvKra^>h2`x%~o>tKi4tCp>gZjl(3tz&O%0 z7oE=h>LMy#FFqR2-d8!Si>rG66TN#TccGfTVrs-a4l!?y!enZvH0xggYBO{WIV8)iwdk)srXlOU1 zgF*sJ{9{omE1UnFaq-|=`TkX(XwtWilitn4zq^a8gQo`rypW z=fIhU5U56JX=#mpC5-hTu1@rdJgqpx!BY&cH=nkD%G_UGQn^RD{S(FKu=(9rzlMee z+waFId1}OJ8M5gkTST)}RSTBu>+AKW!yk6%>f&qcHiq*Q8KBOkD-|a;uFqJ7ON){N z(zF7T-Ei}r*!3Qf$qh%zr zN+oTGJCb=y|3E7s7BPa^)zd3CIinA=bWI}E|40L-wP zi-S(^Ed$04pxTH2V9y)gIV0h-|EXpgdUdkF4sH}96H|WJa>E%9kd89|vbxT0Zu(YM z(yFS20N-T4Ip_`kWIcWTu&m}s*XMh^^9^n;S8MS*f;m9sq@bXn1OgGy8iGnnn9T$N zdXo0TwKB->y-F-+xK^&6=3N3``yxO&d4x2IeL#>$Kwwq{v6k6k&KEcg9fCd=t4=eF zJ`%dPxHzz5g%%XD^sz6fs7k#a?h(RF@Zpw*o?{JyVC?g37E|IY&q=2>$+aUTE>(NA z{ekyEAA~2_X4(5i+L;<`4rTe52W_IYcH{!mn9KTMZ*-4DO6%q?hQ4azrV3nCn5ysA z_;93tmw0xg*X z$sU39PVH3RbKuI}cJo+vg_d!oqk%muKJ*5P3+L_|a&?<^uK`!rvv@J;aq^ojsW^hYWs zI*$MWFd->vfeT4jv)1WAI-Xf`bGnkj%F1eYx-x{^>o&^eWW7|U<#AtMAE-CnDx;dX zX{JX^I0SBX%NaFmN2`DL_M#9I6N9wx%#1vJAc;EyX#4Aeb6{dZ3$nhqg-Rn3YwZI9|;1+=C5DB4o^?phK4Xf0$Em87CK|d20Z8bpaJ`I zXJVkbQ5|A+d*Jo3(r_`a5A<(A$41A{5SVC$5ZN>B)FMKEbMT+C0I?QCgTXSx51#i| z#`9he9#A&igtnJdo#USKl0QC?J>!2p({Fq=d;3?LuPY6bCE8{?;_x@_NH6YTt?OC& zY_%f{2Z`VDCAUTi(#2@W>s#puuavuX^c}y+h?!W>D$L=aP8t?wV~yF8olMfb%Vby= zOjM*!_2-X{S5V#I-J#{FZvFW(L$Mh8Rx9MuttGK99G-0z(~ZOq{V>J%afM$R`?ZtN zVM|vWBPB<7Yr}!!AqxLeTdu#qQB+RHO z(&8i8QiOc=v~#r%?4CbvvV(&yhS|BIQX0@2?)m5y?M|z~YZ+2JB>o+H>crGQ$qcP9TQ~2M0$0K?7!9 zORa&h=?e5y<;F6VjRJy#LE+&=M?7C&j1js471XP%tFdrI7Q1yxqyA)7%emvVUUSBg zqaKFh*(xi*uI)?WF76J~uuJ`O&m$JS*Knkv_G{J@WL4R}U^M zn}RH?`AvKBVp$U?C{D>K#zO^DHs=QqQZLd#fTx9KN#J!I+Bs8ES3j9^THthi zhgN3Ljm2d--3HQFcXhNoaZz`70c>i?bl^*051IpEq~Xt=$UOP`6lO?&GnOv(y7Q`z zOOY5E@4xJK4H-|__dkHMtz{t=N-)M?)A#D0E(L% zIcsV9vnLpH%k3em_0G2XfwrAy#6!2VN2jMRczJ6|Tfxae`uEBCS^qsjxDjP(wB?Rg zQWA&;!wQm{%8~p8I`aM}N9Gfp?)XU^+@+=(4D|Z4`=)r>;q4TA0x;`Zf710eJJ={XoOfT} z0=gw8CZT|zUAe1m5pc!Of@ya(Q%F5!^7-d2FQt=Zb-%(-&E8cJZLv) zpaqG~GlBbmrKo1$!&tPvM)xT9KK`ng3^ZBzPo#DeA@v8HJs)0DfaMO$Xp2zC54Gn| zrM6D%nJRl7OAiy#3V!3e84b2`gJHd4!?fN2Q+2N>o++d2$XmR6V&1>EAe{qeI@IKEH+2IC+j z>+S4gMe0EXSYHiVCr2i z=9VGXHb7c1A3UJMVei?|-b&k-bgMbr?i=J1J{1*oYhw&wTz2HCZ)aWg^!#fW*=S3R%BdjS$wU+nG!poew(IXn$GwbMcGoeuSS8Q*Zmkjr`aN zlXW9SD@_UG-^aal(Zro8hM#C6{d~KaRq3lW9&p|XTy%RR5&n%}pW(6t%WfRnIo<7a=qp`}#=@{7J~G0U zYI8a2Z^s>StLf680s{iJG%-y}KBS6;K}9bNAh(f`5g*vh5t)SDj$obc0|{I|k*zQ3 z`HRZ~I+wl`EcpfP&=V7*L&t4}_xBFIb`bFQ3vN)%sxJ0@yYW=~bnV`4O3Ga?!$&n@ z!6L70FDMCe%Jb$AnU)ex8Bh}ky%jahszob%zeah~SWp1Wq+fCSA||x&^5D0c0srxK3*ZC`Lt5Le#nXL~EaaLl)< zPcq*6&PcTVAsbOacU(yWRxWpe~G2$_4J#WV*RO;{lqfw$7mfx%$a7m^B>g+_gd9Rw7{|H#X2F8 zhxVj6!h+~K$AY(=2{ZOgOOwXhkALjl&bt1}Y$E1}f01q2Wz!?yPy2oSgRK)kJ`Rzu zH*EyM7>5*&7U$I}wfI7%SznIk-QSO*(r25Ym{S&Iy?5^(9;`^H6Et$~^78URuFTfv z=9A3RowQnQo%;C>!SJMli0$pibGC{ar3v0aJJOw9FGz=7FyKm|6*%1#hDQ?I*5fZJ zBSXQ(RbG!_;{EpRHFFD#UX#ZNrUW4N)=Uu7;D{0Q>Ukz+YPAc3_$0v9Dfx zaqb5Ao$0u2dY|%%oAt_eB7%~`2yH-WSlth;QSx3?HA6i!p0W?GB@mCQ5XPERy<619 zz{YMG98`fLV4CDDU@*uXCxU6mX!mmV-j_ta(D?W4!c(VoiC1f_r*O@9a+CVJ4D9T6 zXy++3a_)EHbA2Ma@5(96X!T@% zeq_NoKE~Rns~IF1u0BZ!iQx(>fx1O%IKUQRo$=f9vWcJ<_oY5JOR zTfq*~Mc@QY$GB<6p8E<)&}#wc@ZL3fpAUFG-`Uv-eD@A08oWPyu8`HYxwZ8PDzpL7 zkynq0T01ex*mg9FN6o0;mlj)`+|AXL6*64j{N61EC!Nq}g&h?TJPrN*(V&(?l7MEf z+6V~=%`Gf^R>_X%cjJJV8y3ni4|jI}^-ckM892US8yFh4&JfbHt)Dxljb7GtG9 znwomQ$`GO4Wzr7T&TnaGc(b}{j?h_Alrq<_vC{ysLuPrL?vGhP#1)U@YAayL--?P* zUjo2(2=oO&&f75NPetlUhXE_Zoi_9cUUULM2w)?LZAHS|oPn*K9kp$ zH@ukesWd4WH63ag708RGzV-vj%3tPX*!d>)tiiYlGTh+4` z$mFX3G`obyBlBI_#W;0t{h6i2m8s@0TNBgvyF=Rq?``5_f9bK%(L5Xcf>D}Z^KRmc zM8^1dRtd~be+Z2VOg|=*z|GYxYi?-?fF@E!N-p+&T0~MoU`K43KC{wGqd~%t~~|bijk4ge6%&Jcfe_6BR~a;+4D!hVIN+}};4#uI z(O(_B2*@UQHC$riHH7AgU`?;KOL1V?&TTxcWr02NdMxU;7Bp2mFFXX}>-m4^tSC zr53}W2{&jCOLN`D*#>e@K|G=7(fmCF){*;DKK{|a%l9;tl$3s}%%hIN%O6o2Oa*l9 z@Z>~aPw!1&AO`$r7GSv+Ta3l%c%1>`mxn+0J|-qII@&9POg;PdTnoCVfa39@!qHsWm4D1 z6x!R%;k$M1f#_2C-X}rK?swGK%~mrACo0Sq4_AkcpBagKj;0~l5J(!TstZl6m__AI zs()a^OcNUtLGy!hvG-t`K%gVEiG7C{MaFfn!L$8#4tkw;j^vBy5|QQ3TaTc%Zthiv z@83K&_7NVV%1F->&-0BhY5D2jHX%{%qd-8>4*jZm4T;H`DKuPSe&iBx&=ZN#PGx2} z1m3~C!jqU+Z((-AZ)R!PEyYyx12Audc+Ax((F2Cue4_F_Q{8Fgd`IliXgaw>j(U+l z41E|cwSXcM9w&a(Hz>k<>c0zK9Lef?EPhNau*#|=CLm~AO!j*2?98<`R%Seu_kv!% z@JG4F`RP%PRz)PdU8s3Y26MJcgu_VqxoqdIrHY1JrWKy0yIv=DreeIUj=a>`2Oiyc7MhUrBj^8LxRWg)jsH9DBwOa)5TNJ#>k}P7ooj7y-?@DofN#H=8h*kM ze}|PpdTdyva!H{le}OInn4z{o9`UO2f0Yq?f84`GxC0cuJF}LZAO)Nf|(S z-Eh?Z7)*{_D*(W>_KnlDBi7q_w*s9(82 z?gAwV%$7-2qFov*V*&|J_P)`JYz9g?x9bE1pG+QCP~VQ>;_dk&vx3suslgX+hYL$X ziHn=NK(`THG?X|{GOeMf2S-XuikO%f!PDVF&Cw{qtkDkh@j?B_X@&l>{>_!cqa)x# zJjL*SI{;nI_@eSBz?S3V`*gbite{x`^#bKuhE;e=W6!4deKTQ=DMJPh=N5PXLX z#rs=@%xbWDm|_0`Iu4qGEYqz?SB70zeyBHUQ_*(2^lLyrWWP3gCtlj$&+iK4CS`M8 z0@~dCF1_0KEahxO)Nujwqr?EG#=SRyH=GVk>9o z@&e1DGh4xvYElpWw|2Kl_-v*~YRpoGma@p5t;l_?DBrT1+TXcDPRuLXvcE<0nkWf5`6q)CGBUPvsBBZ{R3ONCXxnZNZ`Ld@$j}?Y6&q|G>_SCLrJ; z9o=g83t!&t+RUG|Mx@W@`Bt{uW<`{^V&q{AAtQ!D!-~ zCfc-$-d_#(=5Q32fBP3J5o`?Y*$iEW_qUqwl###9zR1pwp;pm{!4T*#9e3*qhdS=G zxBu7-nfvw%&yL=F?LsS^HT!f*+V59?XSvS2>z#I@3zhYq#p8Ah+PUqqshj=*0i0Y% zT0}mR3GXnSia!i`W2LnjHZ!`$f6b*|F{hHiy|b979O~RndnJt+x*s+EAws`_35p~? zSFPjwj@_?6+gp3w&_2f*=HYZy3K7j!-I=nxF!Y#leb+BiK2pw-zsRR;veV#^ACSW- zPR!-z=XJj?!twWm9%EvCVbRB3+eJ?(8*MkQ56dRKaY>#myI)mZod7(yHGo;ru)v%7 zJ~VVsR<_5~fm%wkCnBjfiB5I@fdmgdJAipuk~ zoTD_+Vdu$X5*KBohlj2a9}bfhx$)vd03=-?;5`mC5o1$bspO4F;aehXI_djq6!S*G zf8IL=o-?{%iFhRc%?~Tzyl8&pUd6M+-)GuPxMb_1FE&UGgm)Fwx{*bLMqc!bPZc6| zcXu_uzjY*&>)#aWswvMK-12Zof7qKDZJ@o7)^BpI)2!RsF8S|yv4#EPUmy+n(cBEm zvjEZ5`sQYTC`AzNPufx%msSGz%9UQI$QDOSuAx9FX6+a+qIk0SVCA%Lq+0#+u7AMG9h7nSohkSO<$MYsaps}j^n4DUX%89Gvtf4h)m#=V250P z^n==5B|lMYC$%jarfKmKL&NvmxRkVPow*j-6fW76K4ye8r^dJy{%8eIQBm13-iiL1 zrMT#}oLZn9ZOXkbMHqOMt@jo7W4n@OGXq|YshJlImm8zQ^6Z!rk%tUtoCr}A`;JjJyKvqr$xHuf zI!4Crg9Glk7i!%@y>P)rJ`d`MjY}XDsS8G%L;s5_ll>5YaAovX7hj(U30;EQjablg zT19K z*O>UUsUf)~`5OoJklT3~G~8PJf#yF~HfRL=6x8F|uaZO6tm%|0Hyok;Re&l$fQ zKH>RLtk-E^3+Vex_6%%Exc04!=Qqa$9~d!EEC0Um`pLPOd4G>M7O!PFHJjUIk6b2N z#<}KXMS6|(4qe~L$-shd@KX-auirMFQEIDG9dR)bmW@lH#JRad4Cc}a2nn^CW<-4W z_)!Y@!@$5mV9}s82Q6^@#Kirhkdv={FQ`toIapbrW@NNJWwd&_G8hF?nnX&Oq)A)a zf0%|@*}s^Ebv%hk+Ni1lQ8ZP`x7kCFYU8o;6s!)7-l(Ng9&8)df?nDJL%&AFO?Ino zZd)*_j#Ctmv!FCH=Fmd?jap`w0Pc~I=8;E2T=S{|Eywe&4Nb47UG^IM>7G3^e`+7o zN|p?BaZFbP=Gp%lz>2S4zcqd5-gTPEWPi$%a@!wvQwzkg>6;B5YorRhvYDOEhcq|0 znVzgnGH**+{{c>h{aw+K@e$Gq8H#<1rbGC}&UFEz0LmK&Yxu4{KbX?(hHu6{{pCl0 z9!V&rCQ3g@F%%yEL66@NYd{SMvdM$8sUocQ9rC4>jPMOuqhS%#-idMCFOhv_?CSQY zyji}PGv8RZ1-&OBI}VdMp`;62c9Rdp?*C!^Mj;thi5I`z|LEO8`lnhoG+X8abN{{T-PCI$pI2XZ2;2~?x#VK} z$?s|~CVILtA#YXyV?KS@Z@l_b=V>wf$Q#Kj0-YP&q)u)JCyv3vW`@3^>4j_JgzE(9 z@gE6A4muW5m{&&coMb8%?32Ff+})R8r;wNdbuB_=LR>%4x1ccr`gRroy7ByP%t^(& zds;QebPsg(Cr}YxDbL}fD1Hx8YB-<|3F?ebQJ0)nHgj>lOd(BpGl+bcVMQ|PRM`4K z_gvWR2-Ao_Y##Rg`*-yFVw6z?W+$qEXDlwZEDvOdl$@Itx$c=#O2<9fS-`%y`x}XX z2ei4|{fHH`#n_fkWti8m$FmvVfU!-`7=qLdLC#8sw6lvQ0E9y%+`v9GJ8Cb z8@HAeu6lkS{FbfXXV;lB3A|1VbE4=6Dudw^_7bAmD`v(=d8C++7wNGHKsOOMF7wa3_%l+ETGMXLjCvBQk(ZB)Ne)E^q(F| zxNhf>Jm9zB7ins_WNp3K1eZr?>A?mctZ3g+kHhHzm^?s$_Y?a)On3y0$OrEy3Lq+= zG5XwVg2aNI?61u>-P6e_>Vo&=0{Tv(1aAOhjSki)5H*?m#n}Pqj{3`!pQj6FDh*b; zQy~H_;`{d{q@|xxM#(EEJOy=I%^D~lp6Tl9mY_X8bxDL_c0!211H@1a8pY+=!ziHq zTCB39yVl@C0e9IZ8tb3EjE=li8it30Y_l=)4Fh=f%2ACX6{_LnsF6Zhr>c?RSOXVl zy92il?j}g9@Nb!AHFP#y!YlenLAgvyyH2NF>a-tL<$<==7boIT>`nToZ2%2V_DBC%Z;^a3LivG|@eA2wJ+ zUw&m2w$u-q$cZ1W*(andslGL6S_0R5LPiEPfI2|*fKmX3iHRvEhnDI|{|a4s!VcNe z5)loPtOzArnqiQtwPRACpGcLPz*Trz=>s5m2B+37U_lUZA@qv4q@-ag*%UB^bbMS3 zWQX7j1)P|>d9DB)D!{A)f}W<~1#T?>?Wh0zX$3k}E>p>TtP~qSXaG(nq@)@f8&Qpo zjR9pwR7cR*rhx)YJX{09w?gSE^eilH5nX74hYHjy_k4JjQ#uf!Cc%q9NnxJF7 zSh#e$qH}@pWq<5>fT=@f*nn68h>Fyq6>j@KH-XSMf*ARMyuP9yfgt#5@$vCsfqMgj z#C~T|tH~44v0i0kD>6N(1#AZS(x6`dJv}XCYs(7EHL!M%Vms7J%?Y5lc&rV>sCU4` z4YTK~`G6O*?McnQB9!s5wI6_Z>a%_fPc``?v9r_CpH<7C&rv;Rih8I@9ZvJU)xdVt zaYH`-W*&vS`%Xcju#B#3k>d%;*-%Ry#Z6oJD-NGQ&z=*tretsXl7PWm$ zN`NmDs(jukr|qUdrdEcIG(VB+lS^ke8Ylcy%WXU4S2W&gMtgMjA z-zAn={57|OsMwPK&inSQq#i`>@haxy#}3ev0*U%rG5tB1p2Cxp!$3a+)*>J}gW(L5 zS5ff?=manoAf-WfGDsjcK1j~U7z)aLtrstl#Sg%?0q7^;UEv@cx65rP+524oMbv_9p0m!UWIbfbqNq zt{m`1AWntp20!ZCn6-y9*Nu9TLKHF$01OVk&l>}T)-O=e_}{qSzB%1^X}H>j4aiZL zBbFjZeF%K43wSC4BaUVbTfn&o=5VLIJXZfd}S^2fg6P0lJ~cYD&QMVBMnn zfE{rV1NFBErk6$F1ppjHnlUXcEfhEbz^da5)OBx_lNBg!-G|5;VZkOJF2PY5UZT%d zd2)kd#TE{1hb&@#cj!ZR7wHXb@eFJgXX|V$Fjy-vDqPVTY~-E>=8b0S@D z8yAZt7n=8KZh7CKJV^9~h*y=7OhUYDol1ErteyCl_`nd+!Aa3smqppW(=2nd;~l^7 zk1FnC#~(Yy6|f7G)vYo-pNlK&z0WG1?cpL9-m|N8I7@EH!BynC;|x~|3JhDjJF7AR@@qU3 zj`^n5&j<47b5E4rNXBgw$(Q!eP_SH4n^a*BUa{XceZDcBr^fqOckCUPss1Oq z+Qum})&5}UpvCLyB5~&zn)a_IKc=k38De_l!y9~ztFebD3kX4?9`5Pg6Xx;U)P~lP zmKWJnvlA`)S>*?4?i%L~Pd_UhEm>pMD1;VpvE!DR5um7H?IMCXdh}f}%6ykc-77=+ zctk`Up#7H19spG+8%V2N_g4cmGij!$r=w$yLCO)hy1K!F-_F8jCNfsK(O1|+NBw@r zSORvHC*1jIP^ke=i;&9za@COi7FR=hzNjD;9StEb&m&fm8!NQ0^PR(yqQK z+L2C3@P8N^6g!4MH9r52Wh?mvEL$aS+{dq_R?g7W@BETtDsbA+2D7Uam>ck6(l!u~ z^N7#zXnVe7A6Tyc;5wQ(JW2aE)6uy_<$qy1QqHAsK0Vq|udt;6Hw_^9y*cV^vmDp2 zUk6&H`I~MdKFl?OJW9mJDjxX*8LtCf50P~f?SyoHfWJTP&6`g^fv{2?29&g^^;B8q z9?3+<87zwKK-C{mnP!0{?9ASC&v!VaGg{lOgQ~ZX7RP|2c>sOZQn{!Mcb;OD5YLzs zPj6t52v2X@)WTFUfgm=Ud@O52Rk(__CHKQz_BIadQSym$=z3meeml9&z>2|YReME$ z?Rn3BvsV@ces;5}m%=uj+xDx&pCp3^qTV;}ZPP9)&F)pZgtuFiXs4SF&06NU+#kvx zb;##0Fm)GlPL;8@w>P|Bqj{1yJ*rYOKSI5aX0I>9-3VtuH147wPx}6^W2f) z(EC9R&0hxv^0aFt0nDL-R4pK_s$J}${YLyhK%2vR7&sOFlAfEM!hE-acy-Og+9t^_ z@x_IPmnwpB9W;Z(6%_Y`QqQ51{nTp$cNm< zP8Qis0g@(X9TMhNQK$M!|9I59)qKoup?>7o{Qh zMN<_6 zo~YjNnDVZdqUOWz1#$fCOp(k}s%b&rbbm78W+_Xit*#wD|9QU-a$dv!9-DPHgp5XsQQ%Y z?fHo>X;#0F_jXzhBtLKxNf;R!yE=E4ryqj+!}l0Y>Hi^{r&`;h{(s<>y4B1{#oPa1 zxusOP|EGMOMBx7hpC`b=f{aQ8MKOr!?lNfHynEM(4}4w%K=xjK0`ee_V|&-#KlK^% zsYp!-a?cDK+^624^!Syn$}|Z|IE@W`I82AX>7s%H?~6nfWwV=3UjXKm$H9H593ijZ zLWnX7d2$Jfi7Pc6fV|@a62kWw3Ah8TTe9bIAY8GLP^Ci_52CD~egBTB_V5h^e_Zab zka4#D;B>mPqL5FYSNHtGgKJ?pYwu|TjWIft3R*^ot|&(Q@jAp*&|M@J4?)qqhktaX zc+V0lq<71{c6Iv2>M2V_If%&Y@k&SvIC z_DjD^1Ajx6P9R2n6%7rvVK2uyK!-Iu=PH>N3ENC*rK7Je>d&7)gkky#Q>k~=)zyK1 zR^=u64hqM3ZabqA2MC+UFOY75j1}9lO7PvWSWQ+F>Vu>*x71uuuK}807_!%}&|LQV z{HVuEWo!}r@PN4}YnFjQ57<_@l?vsMPqcv9hLQ?uTTJA#ID!jvW=3z;*5aUdXebt% z%$NH5iffaokV`>$U^Z3D4+E4l3c@tQJV2eR(Iauw_e7XH#V_CyJNtX^ozBkB8)}uA z^gaSMau~$cHa0fFgzWDizC>DDdU>33oG3!=u5Wq}M_CT*mOE=c>(;8JYP_?hzOvUW zQC<$W_J%;$!hU%ybMlQyw5^Z7V{x9i-RCp*p0btWKO((+J2U7S_nihOoF+gW^FyoMSkVB}c&2CbSCcr0(GT6P%4B6_E$3aF-x8TG;ZddYoFLwO zQ2)V9*#{7Z0LDe60=1eX_QrI$0W3aPC57^tn)>?#9oHs5DR-!{uoTp;{I&P1%8a@Q zOD!kffr52(^aBX*@(P4a!zrXMOedZ1S3k1-`wB?QhXRMk8>PHQBD{8hoXKi+m9;~!g{f^n+qJrZ^HRl@k^ zmg2T78WmcUW|EXtP)zmM;tT(Z8^RA&$CZ@e{@ub`qFId z>b=F0#}>z0^It0eCWa7ealDew>qkQ6SK;#2gEau5FQi~***x9p0bIcs;4|n$zy(Wm z;4%MR6(4X)0I_Xz$G*xYYv*3b3Z6y7pZ;Eju$AI6nrRiHi&8q% zO4h-cc)ZWc+!C`L9byxl$Gy%0$NzFnej>Rak>1nz)be${AJbXq$*1UviSmGi72G-_ zLUBo&9~XL$8x{4x{fhpinpwU+FS7K6)c(c|hkXz~SAbh%WMl+7wA+|h0G~~PQ;oCT zZ7HHHlq{C?38k!9q=9(kRdgRxaSNi`i?f~^>j5oWgn#Q9K+I5cu$RFfw$2ICb?7%E zI+Ck#rpYBLSFGmvFi3`Wy!vM>gPlqGS+LPj!E|hydZeecb<?@Y1}Hl3aMsSd;?n?U6;$w2xt5+L(IFB zcb&##!HPz)SoClkcceg!z=h?FQ=#l<=07a2`p{3>gocAj{2B_^k4Fp*4T%NZbhOk+IcR*w(QSMB_C zC^IsYvZ(eCKKxaJI;S84B;?QRcGs<+@M-1%(n2KS5aZWu`O%%Yh@NW5I^^Eor8#r7jWfh zaguM=zFSlEY>h;`Iail=2l{e&O5Zic;^;r()cwU46*u5#P>Wx zx^K2OxEfz#(JT{4>Afb;b|v~`qPICo8VI~ z$GP+Jr{gvs-b zmrokTQs`cHlTA0t)_;DbFShsn%k8vp3x|aJ>1$&C!MD~#H<4n#(A=TL;{}5<-rbMQ zvUSOydX}zD%7iq3%DE{mQ$WBA%P)$>&MChv`SjEs#(#6!%%Hi}kWf)kL1*^|5FNlr z3kz>EYLB_dk9$Bh3pI$X>n2m?BYC<(U6Ej{C)s>SY;VndX+R5A3thU1i$T zRhD9e-D{H*wk>;@MrbO&KB=)k-Q!^;yRK4NWB7Q3!GiR|sOW@>#J0-)F1PvRI~v8N zE@N*eMy1%te{C?NdS}1)$lx(85ACgn1OIiQ**B+43Fo(>RhT)@DnpUdG>&d4e{(*N zX(XZ=wdTtrP&sV*Eqpw~4&}48-8wY7`p+@S z!4{Iyv56L0jJ``jDBWRTVwX3CudC^>yUvj6VkWUXDgApHla|(Obuyu-xLsc&ORM`x z^>nSJm+1H8qM#W*H+yhvuE2+bQgZY68kGZ4r4Ce1*@Kfc9tPHXrR)O>xjns`Se1uE zg1=-zu>HLvS@*!bJn8y66RzPAt54F;IlG9;w;|~qg-5GK{jTi%iLSVy;3IkN#PL)+ zlH5};xuyASP_&rH)^?NlW!oqeIftdEecqXBCnscV?73z6ySw%xWH@37~ z1#{bzndJdgL^huBhZ^OEjL!EFget8e&1?%o$kMADjW~kj)YN}Mse&Ql;2+yuM#A;B z!9hg+t*YaJFlJnitN2LnmQHPTGY7ixJW)Dt;%dRSZ{ffPlxtij{0%)lFQ`sYkf1rl zafL9b4*L#&em(?-n<&~F{aaxn64I5CBeA-^-VOcUpI>QGiEGdubo*VuCoY+Fdg67s zIc?g3oz}M`M=PQsb(7@k43#pp{~#C0v*Z)G`oVgBv*5%FWhCSxf`GIm+Sb76ou(vx zzkpuZXz6G>Tfbc2P#jjRgTXiaF5p}nm9(#&+^r8}cxi;1TKi@_KgH#8U5#QM-C|YI zRKZd|rQ~A@`{I?>)1;lPSMzpa662V;Ny(wcw;VT#EyB*a=Ynn(({u?((*0$frGl^` zoG{muXmZ4<1<86s;8#QWH-Y}=q3pKqNlDjc(e`%6hf(prJ~weeP{PYsHjAmnNQW=h z^Bj z@gouRyg|mJL!*|KmVm;vg+vxNXG*Z`enZRx ze6OP&jnr1_8k+$k#|iV_t-ie@NLx_#f;};ly2UQYOdA6 z+Z6D4YxpXlpg+c^gk8oMIwHh(tbVb_hL4rN?-m1oAV@9(Oc3JCm?0_%gao~&LEkaMSs<$F!h1 zKs;=Pg{;sG{{hiBJY}$@b#t*+P(ou2O~miz<#v#(K=K#`JSN2cm6GycR($H?JlTI@ zRbwz)twKP+sk*vm3#bDSLLX%JHcP)KK=&dFeh&!$0se7zc2*cpR`>}x3KAfYNu$_A z2-s;9`1|3z;m=krMac?0eEc{XXk)0ey-v4aO88=T6L27~%`b0)=zQ;YRJ>&pGnIOI1f3_21#yeLx&F2w%~Ko5lEaFU4zXRwtXBMJe? zZSy>wzM8_iG*xP$IlB*d+q=1$S8IWitB#&Osk3)8jTIvwC=`s%LM`=f_ z?6BO0)siB$IJHfXzDY0fU9pQg#%n@B_xbT@r^1}a{!-N2DST)U2>jxqW_9bcUsgg` z#zF7n7$C{@TS9N$zb^yTE2N46Wd`Y9LC_{pP^5D^xDr4T37Lc|Jp=Fb3?C6yx^C+{ zwrn&4DafKrM1ncD&Vt-pSUDFvSC0DUl0CMirp{3YC_yTLkee;dO8rV90ltbbusU!a z4r8-`8U)#o;0y-qPG_=UGQgCPNlBr}$;m92JhLljD!4I^DFOZlRQ?(cjxh{Lp?)v9 z>sn*u24QBoWIxP!)**QipVdG-WQ+lv|NUXG&2JzPIG?mXSNp@qVsM}#RztU0ssC{q zY_rbY&NHI?R0|Y^$HS(fbHw$}EF0#!qY#q9(Z~_oROf5&R$~Eut)TTA#;v+7QOU>q z%edrhZ00(N-e+@dV3;NBS9NjWh7$oq%nG1G08<1YKp;bgM6Pm2hRB_JwVzWS5TdwM zT>^ti!{nr?F|Y#$SNoR6s)}+ zqWeB;NLlwc1AGOmVYaZUiWlAI_1Ve6?Bb#rM7RRF4adXy;2_Wz;1h=eu%$qMucr&i zNDmJlMP^)v$L}U5xuyCM5dFiFch&b{)x?eBNtU{GQ5R;`^%dS6lWKP;VKSCrGtUA z&r&(urT(Z(SHnBzs>jz2C%2u0?#3_>_6wwEys^?#2pqoWvk~v(9_>P`=yOB8Xo2gw z%YE>M$`21e&d7($6JqEBgMvUr1n_4gFn4e(Ud6nk)ixpJdWM`7v{H)>+^-R3gEc|E<#})y>15mTEZ{EcFod0#q_V zhg*Ld=vB-otK$)l8m<|jg7{r`kq|U2Vyw7YL z$|u%K{2;wS`fL=?l5_#D?E+MqwVfSg!DiPFxr5XM-2bq}%}%Xd;9rne6~^*Ugp^Q* zq~?t}PqYFZZ-pzug8l;e!l><{e2lhvi~nked<=g3aWFAMKGx`1iIv=mqPmFr@$4hj z2AoZWZPL@a?0c8wTz0=dd`XM&IW$E3tVJm)-f+u-U&ToJ?&0AfVwob(OjupD67RVt zlv^A8RQLHujy{t!5S9-?P*{*Ms7LJZ>zQ!VXM^LQHrJLqYE zA8WRNh!SHcf>GeufO1eVmkHQ0aN?NpJ%6#6bm=LCmo>@tRU9234-PxRbpaE6_DZ3S z%Fo}!FX49HOkq)Xj{=`MWI39#;=o&C{J*KBQ;iAr=Hki!}F&C&J2oVy_ zhv^Z^1ik@LVl@_f=z%&t+S6k?JwJEDtVOlneHKv{h*SvNlVZ{ZWK*VcKx?XyvZA5_ zG9)A8;@*-=K$Qq0Khp7uiC)tkafu^N^%2l&qFH(rKJnNY@4lylMsPu9<(5@p`?cR$MWVIVmE!$ec|f=tNH%ZZ~f=u!7^Jo zUHyUZAHjQb@0vqbYpMHUwkAe4hV4!9(nb}=*T&;xurm-9cmzsS`;>6Lc8*kG;+ zC3!&2f5R@3u&w!S0O!#dF~QdGRNH zzKA>Oa_fktS=;vHV{yB^#j)#tB=)qo7WH}GbCk8G72BO4;g5uO)Jr+`L2nJ0ga zkddYX?;0A&61a}M<`zlOzBf1H-CI=W;;Qz}N#cTo20k!+p5njVK5!dRC89q%7n%AY zODo2M<4RiX^F;U8aKr?gkHJfwc7gM#O;QM6GBF*DJ~*yziMyfCoVj?4%ZiD?ANQ=O zHklxTxW09!L!6ah!3TYs%KVCDWiXF~p=}7X=g{g=G zJE6+)h7_LE!hT%G7h_6YOFeBf-Ws8q*gkIaF{faTTNnCIP3bv4tl)o$)8{nA+>fHv zTV5Ui%>@NE^g@pyW8&`$HYo{-zCtlG1J(EW>$u^N=-70!*lc8xCX7k9jF0bmYN7x0 z=x&lc{O9&)=o8NO$xya7p%6#MKfJ+WisR3BscjL@g-XrCo_$MohUNW7){-41Bgc<{ zx$zdc7Lv5Z=0QDkzq^HZc|E<$ig4v5(M?7^7+u;aAmGRJOEdJ*|M|Dh7LFrmsX>ls z2wfg1;65q5YVADr|DmXB0!;@?L1W39mPT2UZ)RqrNBQNypBu2E@BT5(`lbtOq}&}>**rTAu`8szGxk%R`(|Z6XgXi!ROC!4iBC6^z_O-&v-y*fJONDEfm5J zBvi$F4t@Vh<;Vb~mrwSJmPRRl&{O%740A+4*;{NOGLE`*@V==Aup>?&DE^y#rBD#@ zr=*~0YL4-~vx4#y2F)fw2M8L~H&Ar|NnD*OV5+F5CT?xb41pc2dDpL9`wh@jeOnvG z{|qr|$2q2>rHuor7swGkYb@9au|Zo)$-oc-q;Vl+(aaWto)bBC%fhli>Hu*bNSq9) zAOS&vT1ywQE1}xyA1VE6$H>4ie|9hh_bNXaYjuG-h5GrZ?&3sHTH2Dy2@-Ooq2FzV zKz_LGu0o~R0vV>>@FWF}=J0?MbQN2)SA>oWx?xaBPAa|k>bz3uzU#rn<_7938^GL11}8M@z9XMxn`WkJ)i zpYdO4zpej))*+v2jTQGUzUt(Dll9rL0~9FS;M_MYA%x}}(jyua8?$Yp{o5@bTb zWh53IiN4x|hcq-Y1f~aSdU|uY%Y-07c?SYn6ws7gJ2@HmXUGGso!0FLcN-%8gu?U^ zVt|~r1<21Q6dSUi0g`~iGz2gWScOQ&55ydx!q2f&UwW~ZKp~+AH!Bha{Nu+nsAgb^ znva*?D$(%JysVKwR0Ilw*=@MzfnQ;!OyTF}-{3i-zZQ0VYgq?57k#Nz&qbgdG$2Ml zab%{&AKiVmnz1gSEAns!ZwN7Tis=9RS>d2{ zP#;V$EQr2*`QX*7SFm1xuda4Ng9qtB#-L_|=wCe}qh>HoLsr3e3md?mVqowBB02&n z1Cng~ZFV={Eyh8j4tx*f!9tm+UjuTu$bGRSBAHoO5UC^dz+h;46A*9}T4^Y`Ol?5YXJb>q!oq9ZO{`IB z-Ue*&B_wwrR0N2;1EP4~&;~)19*o|EJPKcUg&MP&%OK_k;Hso#&*Hzumq1|J3b8$h zzkP5JYxqx-#~@LQ0M6~7h^GHH32LOqjS~U22nYz8?a+{{{kv1pfG{D*-1GFtwzfd{ zC3!hH>oIN|2uy_A9*H}FV*wJ#fJ(tnHU^QXJG3y6)qyypYmb-dCOr;bK$Z~r(G4L` z14&5%+z-T_!tgL)v;iySGPqJY3Jp8pW3?Kvg7iWR=EfmXXb^`&sui^I^-!hD>@1Xc z&C1*)q4?Jw6xH(5n@p$vwP8v&N@_&%M1X#8yL&&ZFV(3FQ z#Z{{qf3^QG>>)>Y(ihGdc$uK)h3DmmD|i z=-&|6XHd68##|hvpF!z>EM{QSK|gdP(VPs zyF|L~GySi%&)(ZUEZab&!b5I(|HF4 zNd4o})5W0U4i9sG`BHf{{Oh?l312Y1AdIM}EgY6)S6<*nFbr5vJ8v;uM4V}U{z(Xi z5YM3h`TqK1=L<42f>yn>Iga(3nwf~hPS4C_W{+1%jw)Vy@!|#GSd7%E+DAs@K%{|{ zSAJ8|!lxy({l2U$END6r3s9u&w+Cj6cd#kfHB3hl@i%z46l8t{(G1nvjr^wm2Ju8dYW!a84KY~Y?>L+9PU?mod4?ZsW z@+IQ>`e*QzJz_Vm1(id^!_6FnPh-Uq!t?swyNB#9vy|@^wFrBqc!ZET%gR0!f7;Hg zLG&R13jM&ZA+-Yv4`OKfjw{H57a7v7s=qUKPmi@#0sr@`GJ}bRv9a-;vb}RqFkL`+ z5GLy|5NWyk^xin%e-!*{r0>njjhC-|owG6YSiYIiJ5zSgKZ1NxYh2{#0#;a7=v5+B z1oa@4*|2>#Wy-#`%PwE{LM=!0dxm#Ci3zZGazXz%69CH?ahMM|EQV%Pu z=8L@;xIv@Zj;)&lM+GIizu@4P5eD}+u_iP8;}iZXP4jGw-_}cqZ@DD@^$Dhb@t17f zcA_lp&4-`viU+s#TW79Giyd-nai3OIV!tM-c;UtUurPKQoS2$B{XgJy{{9~SEk0Lf z^r#gz2D7Ahy=^mIYCbrFGN&nxM1#Bvz#C~vcQ9ncJ_3aGE;eQm^|gn`;Bl>o4twBd|?l8VlSRjsaxL80xXK|(g zYDZ#o=*yQM_Y3Q9+OZ?(rq0j6$||SRD=t_zq5ED`k=}}`ucxO7yQHU24P4Lv3m>^z z01;4Fq?Eos9W7tGZ3I+QRh4e_jgCGm(9#00oC0NxkB`sQv9q>m(~Xeiv92 zhf($ow_o!M3s&v$H&dFR~tMH$7}gdYQ)W7DTe?-13+=`;#lXk z<;cHsS4#Z%7EJK1)1)QjupJpP1PO5#e3i42S33IoJb;XE+_(|S z01pg<2qzqjL+tE=M{oZRf1swl_iYFk3&rCbJaU+GCc=-?WpP3hZf_;`sA<3m(?w}= zTeNd3$ShYI$Lk_2b)qnRuai;!{}ea0TLP7mm?q*4kHY?4+~ozvCkRnL1%=uX!(!yQ zcKIVEo$q?$2Q&e&n1alfil4ETAFC2|13rTx23;vTyxGkq5uTYNT zj91(_p%H`^W-H7WiKER>Y}(DX+#Cy4Qn+EJ@*_g}AGR58G10c7`Ud1>h*bkDMh%fb z1@#v;F$V#&c%SHovn&#|BY0ycZnj~q>9)iz^tephOUyfL>N_ZE*O@C8`Y$?@JyXd? z+ozr736A1w>(<6Fj{oSZPV9}>K9OFvu%nGtibtv!4&YXNUArs;f-jFqZk{#XRl)tC z{<3_ALr6u(`gg92lq=m*!*%=m<*q3|vkPFXiGFKomDQy?WGzfY%n&sEV}lLRwRBr< zsDi>-Zv+6q(EU!ZzI-7O@TS_NiHKMFj9y+~t!82?xi$W-Xi;Ht)MfQTc6}~eYTx%I zYpc$5+!fSA%pxq|0KRpd+n`z+N7rYGty5$)8YUOYnlpw9ZFBVK;epj3v;W=g!E|kj zEp_4Ne`EE7p~-gXdtb)jLwx4153#Gtdbwd{kt7j6^}vc>_fv;+(|4|aWctB$v5R*l zzoNJ5Y!W!U8kxm$>>Ix?=F7Vqu$9!lKGRgYc_B@|i_eASah4qw8N4C63!_cg-)W)s zL13Lnw1L2e4Y#f4|41FKZ7l7PyV>G!xY8u5CPI2>!G4~AE9Oe3&zkH0bMnE(muTOK z<{y(ZPoIO}12xn9JMkw``@^C8|1)<>#5T5*dFbNbn3j|^+|EHW``#&SQjtu4`qcpJ z+R*#7I-hO79}^Egq{Mk(RjtkF`Er$-gz3pMQQ4FJmj4Sy4lXoR=S7(B|d?iX}c+|j*Ml=!QSZR5A(_udB8u$KP;dyz~NE~VZ zvZ~tmr2l2U;Wn-BDT(2k9QCR7bG_dLoSf%x#odaZe7baO_0)#9nonHNnx4ZbA}`Zt zS*2dAv9eP7mFDQtTOk>K>D}lg18Kj>|2ho+osuB>t1PadmD+q>^3wH!hE4s#ndHKl zbKJ+OT&t80A@5oIgSk8) z*-+em&;E+w(ZL1;h+yzs7I*<1Yr){@gTQH^8&mVm7$!1!Ir!T5t(HwTwg_sZ}$t0T_cb2|zxxm!u zvUk7b*1ghuemk)(2#gi=RI};#1)q9$|5qbjG@k6vi7l3OEuN?U7baFymW1u}Ktwd} zf!!T~y9&P-(8IE#;n$zcyaRB#{0Ahx^Y6si^azSzs7ku3$J8#o39NiF#^)W#YzN)Zs? zQ*9LgM&VcW&mOt!X#tPRYCBxQGXJ?xD0oc%SrSl6cHHD!8E(! zbr)Vc*zhK#rsjmtlA{l8-uiXxwBQ7QuHzWiv&dlh2^2nrt_t-;S0)e|0!pAbKx#Q& zzzImzVSWq=gD_kK;D>;-Dy6jx1kN9E>LT!ATyqlr0t~e~fzidM#hwq>#*qjH(h>9a zlPDte)TV#`)`4r`)bwRv#TY$UPJ=l>{owRnh<#zI0n2<^dU`M6rk~c8VEO@K!kK_y z4ZPH4X-Y{(Mh1Y*>x6{yXPu6lM*n1RuA_hPuSHxv%fosv#On1ys^(CS%`22mCp=lN zx!v&9pMdH4KfC;Ho-?nSba{~WuxP>KLAU#sxz}PqNBtGbgJYeig7SAXWb>^HIBlU+ zQVVYV{{35a?2!Hsmd5yS@(DH0Szc-B1qv!fMRgo(4}7{bpO+Vvi<1p!U<%aiE!(!; zkUc4&01y`EIQ{WQX>jvmuHQ+!hNAnoNDx>0|6svsfqay`Fq-whWx?G+n~zd}2zXFW zyiaby;gkL;vv3=G=3x>P6e?QXh`)y!SEhpC&#&i>t_Zi)_G5lqg-vw@!$#N zrw*08RR(aN`?TNWoI&;X_;V8TH|O~b^Z9KhgM0GcjD6>w*cDz*^Zk%)^&EY4VbC!! zV)KS{jrf^yqMye+A7Po`qd7TcTimX<+&80IxsP?t>-S41!Fba)s2_{FB~2!ApTmuC z1|qC;-PNmCi4g!?pdWd8!N>+T^{lBW4`L`lfIkLYPj^)%_==#O@uqz{w3(S%2K@n8 z7Knx1fGINIHPVoSeU_5K4|vhs)>a>V4HzrOV2lp0h9js+;f8}rP5UQ7fFnfgMQCl{ z9iUMz6r7)dTLIC7;l^W?pHxu^NF+>B!0Lca6G2c#7RDjmWDX8DuokQaYY&Of_j5pi z4XbQoZHoeva?_SApAEvC-S$eSB(QK{T;TlxE9{N=q)e^AsEWD&@8|g zS^$w3=ZE4UU5B~>{ur5E^m?m1O#fY8cye$v+h$Wzzi&T^yhobpIK0|9GRGsGta^9m ziZ314%RHLNKXUp~{H|s$vDw8*?vI)?!ma+#s;|XIelBCIX*8?et+hqQaUn}w|9|GV z!1s-X$HWRgOPYH_^6tn+k-QdieE;}8>eEaPr169QM# zn6xfZ^76&M^`&DEV`fJjMt}rD68a3}mG6l30Ayq=8v&tDC)|y0#pfvM8vnl}-)YMl zAyTiaqeENNBj{i5jtHeZ^XAexf+{F{Il`BgmPlIz+B%Uv;&tmnQ4i1`N5V&q9Ol=R zb`-jus{a^uLU73n8=M7I)BoV!EntY^Sq_As5J-`AXO1jD3i56cL!_T^>NNl1J6}d7 z3g5xD86L9Z{)8-OU~Gt12AB2kGC@#u((SR&L2{0?+YFFsAweT_eOX?yk0Ol~X3Z;`1<0v)@v(wUZzY>iiNkxNN2 zSvzcSvBYqe@5apd=jC3dbD+ThqbFg%{&}Ka;aw_8$!#1$a~n21*jH(yUmYb!p|9^% zb;I*-MI?YZ8~W_5M8ydVMgY;EM@6Gyh|WQ)IV~dMKDNc`tlMJ_3nTUM>>ytkm%u&% z3IYU8;6+D(Fa;@5fk$rHvIS6SuE|P}v>p}%EPX*|I^q|(ZrwTx;(>nrQ;s2U03_Vf z-a**_N#Hl~P@8%zg+Q7mFF2amnz!d@D85xx42kyr{{>xA&7!`SK`DF!m7hu80Cwc*y!KQuJUE+32 zb2B)7WX~ty0NV9d3!&|&(pCRG9VztLyjEn@ZiyY&XYP7y-aE5U{Op4~>h+vMjw+5$ z+>Q0g7Dk<4w#4si@SW=ZlK1KKumscfK#x=VIfeoZCLG4bl>ZFcUyS5M@dXKfFCf_B z%g2}ICEOzQxcqsm4(vo2a`^?)+t@?ySJ7!Y#c85^0dHG72V+e<2>Yn6bh3K~PxusOz#5L^jgkAmX1C zubzz^eB9T2us;Y?JopuHh>8XykrXM3G5Z`a!!}|$r_p2qXOu!X`5QHBq=vDHIXn*z zmW8~~*4CEnM3}LXy**LaBQX!z5B}I71o~&`JQH#Mf$Ft=;y!+y!B<944ft!SxYAx} z9T1N`DkpwU&jApId}9ha*Jq6l4dys9T~?NEZ)6iKCH51@ojA2H?KOSNqT}l>@xgyO zxPltR{N>05!*$!D+}bQ(<&JsF6V$WdkK5B0F~6BTZQ*BCKL;E19n1M@$>X}|?$k>Q zT9I*X`OKky&49Bdn)I*fgWV$y;&F<}!>fT`UVb|xqx<=AKskh6C?)mKq7B13oM~$m z?tRs7wDj)Vlb1Ne)x9t@4mX{}H z0s{W5x_@7Om-iNj=g;o6uPQiwZS}dAdy3qL2YE`(^2##H%Hntq9^Q5E9qa)fALKl! zc>40avwKhOJ$-Q1mh0;xD!s9k!U1C4*WI4^YMAY_C$=<>v3CPN@!44T@nGw$4}+PQEk zn3#5>(t;FV^^LZtN7FvN*cK;p=+GnJbL8rX%{xXM7_r_CFxs4<#!ShL5#Xqe^;#$X z9WSO&=S=h6psaoTuK$C~pRO^9qcbHL!-9!aW-$UY{LXinMFuYq4?O)@U}^$1BYN(4UQPP^kCRt!$`LL5IBG>b3&xf&%OQWh*^rPj8qj5T24%hrn| zEDmz~C(ZX0_TmVPycVOFV%ic25RV+lgx-p3)UNZ<0qoNHs8S=@GJ=LM0VJer!gS>~ z`IVBtjd7phkBIezO9I>w5Ap@f@c|^Nz_L8YVX8C0AawWP!y5|ukWNbyj`@vhAA$iN zhSC^R%(2Y%+M_JhQHGts@G-MjyLyz#y#QMW`F1#qSxrg6fQd|+>nkWRG#_~Klgi%e zGIxItcy2J{;#?uwPcfsz=AX}SwCb{;pXB~yIVG?aWswFhB&))Tmjsn#HHKimqmuv}RL%_;6M zAOG+@UUO3)uhU5x85XTKim{aZ8$wQMX*K;8xq0Ze?+Ejy%^y02-WeZPdVK3P!$A1a zrB?Muamk)TGsPBQ?-v@SDq0KifTqJ(;^zRn&|}TZf7wpRGI-8Px)x z9EzCv!-_VBz9Y^t4;`B83`WD|0K79~#QE+s+k8lt-~WMUP^xUR#uI1H)P;H-ogXd9BC)yS5rxyd>&m2d*Ytj!dv_|u!Fm;UphjcL zZuw{*V3=T@&Jm-0TDMOo91kB#b@nIT7P4L3#`r{lZqRE#zcq`v_?sTW09#-2E5Zq~0X{qsur&rUAH988>U)WTyv#i%+8%ON?Lm!N!f)y|U)P5f^s0RK zWf;2gOIk5ZwE_~>M3E8e3#RhXys_4;X*(`PhfLNPb!M+@zV4Mre`{(`sn)-+Lkq1R z88|^>RA&^aTcUse0c(5rdsoY0yQA$Fl^Yb8YuO(}8(hj@34i#oG*utOV>E%`Z_Cs5 zZ9waQN8e+fGiT#=Z}@w&)Mg{u?g}jESIx0(p*+)jW@#s9=0}~7D$%MR>5nm zT^(?Ky-_UB?Pp1{nTC}x*S+fXT@RV8Io(IfT-3{Q9#vp60?daC1OI008oJuJWS^_i z%as#t$0jBWzt9PYN;k(*d@fw$SF#%=D*h;J`1`xN%LG{ydah-TEQ++>35q&B7M)+1 z^_G)!PoZ}~Sq`VlpUFK(D?{F_R8p8W4qj-P%#Ra;G@g|CMhi~-NnauC)e3+;`h9})gci<=Qa1-7JeMGURC$O z#=Ip%9Ui=A&vsvrptz3zmQ@biXJ`EEo_AM_HBACV|`q;6fSf%hIwhN#$3vFbh&u%?qIXkPHTFva_o6)4)?1?uauOt zN0UY0E6tdmK2y@wcUw>3=(fL~_x)im26bcsK2$goO`qRHrQKq^?j4=(&(C*U+fy8_ zS?f`MVe!(g^_WR;7}XUOJ#g2c)#}IYf>T$5Dpyy!x{bDd3%FQK`Xru{(QDs~wdI%RVa;ld4di^AYn$X>G!pR|<6AH5 z#!t`tV!lmg;r&)a1~x^WCAh7<1bNjKOZ23j-EpJUCM9hWev{jOj8A`%?22-Y)M)8w zF!mi{!ADSwYwH^^zmS#6erwFn)(V+muDYl@?Hv6_*cdV>|3I43qM&k__(Ud_TTxwo z5^1cv-ioUK^W6nqFR@~5pVav@yxI9HOKmFztw{UwiHxKX!&445t4!T(U_Ve^ot#c9 z3KTG_1V_RUXX>Mc5y+VcvdYd*pAVD1F^7=gV6_jql*6_n0M`g41GPiVyUQFD7>Jf! zk#kdQ?$ZZ1@mq+<_UM|gNNo!M@duo59vis_bfCbZ=24$s-gE8l`IAqu8AXPO(T;7r zo9eo%Z5LAxv#IO*D-WvHR{n_2e68uW!x=V&BMb4J$DF;|w<-=@jU)lM-I@-S`5-ILOx{CDB?45L7f z!qWK{;sDc5S^NwX6n7~5e%aG5&HZ$DlgLiljhawVrWI!;(B67T06Gexa z-3^TRc#}ANNknycx90E_MUN~Dvq1}Y+pMhx!SE!MBdrTfO z8wj4(0SB~--FZx(7Ggl^B*|uC?Syg5cjT=AQY;2nZvZ+NxMGlea6*uabN1&_IgLpq zQE{WZ1$IkxYsBmP>C*^&;oKmQX-vG6axCr44&6xkXGEdS~IXR!8tS5XOGHwT) z3+BPom==q!;_vXJx}IiQL0n4fjP@NoSXx)_DbU)#;Wh7!m2RSPe@!$!VoPkf9U-_K zfLDM(s~rcZ?d{650DNVZp*F=zg>Qk0U(+R*F>uG4nARTdoK{%(9AW=dMJ;+=X z6cF$g3Mew*0E-t#x)q3VFOapE=lb2%S{yqd=rbL7*PG1?>4>K+mdP=PKQ{rJeM4SjrNNfLMd$>ebm78R zCE#N4nAJ<3-vVAqkY1n-9~C)?zu0rJqrwo8p{~_Vy>5>+NjR6}b@<)=6&v?Y1{Qw> z`t}XEtQ*(nE&ir)oh#S8iRSFlm<8D2BlKH7WDyk!PwQ$V@1e`+q00+|9$!Y*jO z!Ijs-g&|1C9**ZKS+?Bd!+~2s2p*vH)u59C*nKIY0$+a$pQI=Fj!QWXHl);tnQ8!x z0-uX*al(esV0;3t{1*_b)x#Uk9X>`~jdgk@+p479=TO3!)w*{yw{xiF7Oq)e<8gG6 zZlAP2mTob`Gb(%JYYOw?j|?jHWkFBIk*1dy*ii%mrbDwDa?GJP!ph)4-G`Nv4i2jf z4HXA#*<^3_yRBZGaswUJhA@<>%{Ku=lU)eN9R-_RJ0&5}fi4U(8xkRZq?*XcY#@6b z7M;Jb!0nfoqmxj&Gi0g5)|>Ik19*;8z&`F2_J~Jtj0I)u+@~r!UpF_m_4M?80J-7} z4_Qj)f5IbdaJFi(Z8E8iQ{Q<9JPRTlj@QUh8{%f9A=F{~1fn*_dAf?g<()_R!tUu} zhSrRu8O1Ddb|lneV5+jvV-q5X#LA*oqRC4IFAopNyN3ipcB)`s9$yRoq;=6%c|fay zc~r@z+~|wj|PEPp%{rHc&dCcLnYTq2xvbY zK`zl4T2NO56V=B|BTlQa!#sU@4!$QOGQeG%gnoi923Vc(&cY1EQoaUAp9bIXBYR^_gZ&-QyNh-l^PuOI=om zP2BH27dc|&Y9hev0i5)x>Tsw#JMSn2zPOkEJ)H5b{a5G3knDl%=666MKCzwxc^pA+sYLpFA)4sYuZChLHmUjYF%|vto;l7S3hHIrtH&X7V%5TN7|fyW*skeBNy0cmW)sA~ zSLxFi%A9h<35idI5 zA5Lvl=E~#saqzRp@rQ#&aLGc-~<|Yh!G|UJ&hiFQum1h^cvc&urbt z>KB#4GQC+!ZN)3eGwWc-ux5?1=Xid$71K=XF;+9x#?c$DM-b$(I9^Ckd`mFe-pmp# z?XgkPB2bl2PDDdP?&h}nT{eTXpLHe<|6SV=!$_Yi0-JRQY8$Z3AZn#|b#=upo5Lm@ zfCyaB^$I%7H?gn`Gyi9hXb!N+g%(Y`&L}t;pljR@$QHEIaTMi8^~$z|+I6mniL*+o z(Y5!IqrDC|5DABvH|Nh55F)wx9-Na#BEe^*1e0}*iw0rr2WBpcAC*sGKO@$&4!CB+% zVa`H+x#YJ`HXY(c?M*V2BY)ZAsEM=BD!X9+NrlZMql>9%=}+#p3ad+vMm$%=w#yf+ zgx*i5FF3GnbXFjRd9YQSS9>K+^HIpm8oIQPB@)NnCR@j^MXZg^y4@A8tFbr#*lV?U zk_S$X0JMU8IXQjS&|NY7HB4daIDON1uuk~ehzzTtIa>rRdU#PxT#M{=Hi%o zo#<-)$y|^H!yv1U&(s)**$pR_kh7p^KAR=OwU?e%SfM*p(J%X4 z1mE9RBqMZ0ohD570nLG3og342H_rN3iI(a0SPMT=S>TpzQfa zq01yY9rB;UnNM#imONvXmXdnce>Cn;s!^Q|_8A6scc}-L34Udn(>O!@(Vq6ovr{$8 zeB%u3K04Nw3)*xGs%pCUFz@_%Nn(U$-~>%rTH3gZq}#Y{&g_ta9`;0%t35noo z;O~r&ONFavZB>ZLrHVdAUGsJnxH)1pk@QKgUj=d$SL0)4f$Kg1*megf%QAaHI{z z=Ow{x0TY>rX){8?w!-f5W?WGLkNgb|zIk(T%XTHt@$tJo<~MK;$ZdGF>L#|&p@L7WJ06Fmuv-3Nz1 zST8NKuD1Q=>~=jU_qC~_@K+5N~(5zX|hxfVN>oVa)Kz^w^U z`h;1ltW4RC!Fq|aRiW#(SW5Ur-(BtFIemuxOIy655GyK^zrWV+@2@S>@2Jpga`};F zUnRHoXytcNi$lh9w8pJ`?bQ9E>$&_axs%t`>|IWdmsXo)q?KYjtaah9Xddzisam&mFSy>WJA{#22 zp|FIh0;zKUnd@LnhCm9fKaWKlq<}|(_yVM%$_Fxa2FN6;uJjvuXkL3D@;t!BMO(vv zM9zmqfg=(){mW9BgA=|NF6cDq+R@*DJS|3qja5}+FBZhX<0I`D&e;j@&ip?}Y|c1w zkeu-EZ$_w|g_z!X{ll*|@`bd2v*#L3J=Qmo%G0=@7jaU5!z+1aD*k?2z`5pI z)BMJyE*|KuSkY3Q-dK68-?xR_0->2q=!$gH2eA1`OpL7 zi}497s-@r7-?MT(TD6_l!|F`$;*jO(uA|4ew3L%)rsAoOn^DA5jBn7I5n{jo%w=<& zxkk^6U^v>%eX-P%%LPJ|Miv&&i_?&jpONz_7-C3hl z+lHQlFsg7<3d@I9Z{~K1yq>B_abVe{V!a8!_pemb5}k>IVN?KIy=4XfJn5t3$E^M+ z?5E>P zOL!)TuSE{*QLvBpqAG!~1@*!iEWN8yvK@#;z|luIWTu$|`;{C?4-DZ)G`fIuqaqfI zvlj4tfgW%VNCQ}RuK-*;BlQR*-A94Pz|~PX)A|q%BUgTh`8fJ+K4{|z2_8c?B0Zo0 zx9iVHlRwIrZ9Z}Fsgz;Xx)uOO+qHC6yD7Z8HCz2xS?98%%QRN2Ipye(8f;uoGY~@JV>T8hmeYV%-oDFIfaEtLQT}js6&1Z`K9214x!$0} zrV)a3FCrVS2M}a0VxuiRdDgtbg2Cg`1BdOTZ95ACLlKa0gxY5#zZyX9<{)m7UvuE6 z0n^+IK5d)Jf+ya2aGC(Ql?Yzea}Dr`G4?O`S-kFS^R!pqjRWcIT<+h56q# z%M97cLwZRrU*9^%PRr(492WSQJMnm46gqv}sxKvD`vv!USE_Fr|Fg(im~bWL=Sdtk zd--15%(1-2QKPUPxNIbP0DKOL#=E0{)V^X~gA6Kob!C2%L&yx!rnvT$z1ZkExVXMe zIROnTGTn|Swjv!1x%ltErHtz1XQIG zZDnWI{GbcRcp(5z-Pj9=7Zac?uQ1ojos{0L8=j$I_>M4na#&(@l)!`X8|ZJvu$%u( zXmG<1dz-tmYVE(#eI2nCAy_H82{I~=6kfAAL;Mhq0q@^^*4;rGZ_~ZM6x}bJyVo5g zzp_!Hk0!LdETNm`;nJJyZa=)ZD)YjWPTJm7d+eRM_Gf&wN?%gNq!Po85TWvCa~CXJ z{%C%P432UyDxUTexO}^rX?`lt+jnBjVkUEU#Eqy+C5j#^^+PKbR&GJOj{Hf6fuwQ8 zk!9Im&0G6E*~_$LYr=J(#Ouyq%ZAT*v0>Vmku)HRd))m?CvZRPFyp+l^?Y4TG5-Ds z`Y2jzC7|3d*rc`Z_lOD*+oS{hOE2_^a6Rr)C2%`w85sg@0H#{3@WVrsgG>npH}NhY z6L6UES)xPrdH$S42?@I9_K!8{33;6e@`P=kkHpGc6}d}5^H?@|+;VnsnAc+sf2iiT z4Z{bi6G}JL_%~J3yr@!lcxiE7W}1Jyo}RMR>9&)yg&XR2Zew4k$uG6>n`&Rj)eqIm zJUl`!?1{X2r&7nn59Eh_HH@cl>381lP4A_5W9X8&%^$HX7nC~da`?=)95AbNNFi+9u~f^d^$ z)hAzXz2$$#JJti@r``q4C6sDV$AinZOGqf=Ta5%H)yPmDc4z@47c753(`kYlEuJo5 z2#n*JkDHu#0T-y#Pt|lc2L}}ensS^&g=7{4;DJ&Q3Lgx&*U&L<8{h<+7QH9q&14)E zixXb1-57avq1MQ|*#aLjZEWU$DRaPM`M`J~{AH}ur*%d-{S&^{Y%~i{2;OWJb<9i! zbee!q0gWq|>{{TzZ;!orHjbCfct0GC4wvP4a)HG=*!TMMGWVz8Y5@btO+95KlG4qe z8&BFkC=TN7itDiKk&X7@&~&O;rZ&5Bt7@YWW!}%5r2$Ru7UkWJ18gk$eQPE9;@KtS zJ2TGS%XWL!YA;+EL|0NX_;vpNl-_7fhj{{IdT23(T*dFYzj5r}lFMFb-afQb{$?Qv zhR3$W)Ws~)D`Rnfz|8W0%WgqK3Bf?t>FNWacK2XG-vLmxd;k4?hKR%h2jndvR3z_^ z00P3q#0mR(>qXJK@FK*#$FgE2SQF1PV`;j{9-=t4ayU&@hAsa&fZ>U!HWT8D63shK zGxOif2QlUqG7aEKM+zRefBeR-f+~oN1?=VrqL0wJff}EzQ?vodtm5muW}(-VXm?*7 zzr_f|)+#m(-;4rN4|4B)Y?{)2n}7G-`P4K*$T6TKQ8caynm3uGktx&Q&)9M~ftD|T zk2D9ZxcAia5R6+Xs8FM`w6j|G$oR;`NSYVC0{pg~Y#S-^+Oc`?ue6^t3;uYr6 zrZ*Mf>stg&t0CKdtNi5l(`RNjUGCzez&!jlT9A8gD|fUNDuj|zNXA{mCikI#3$gM9 z{z9#V39}PEze2o52sX$`&}Ft|RYBEQh`(J9Vz7;=FKs)bTg<2bjj29UWHj9kf2m4C z^*d`WN^n<`HntHjNa9kre6sft%9?Kcwa$sV7Q=`q!)e$pDUw#lU!~PwIR&<_@&3-yX$2SEASA)o0rjXPeQTpFtriWhj^=Bb1cB z?W#gUjm+=|>XSNe9x!@2QJU|XtvkF_+WGlZQpJTgi4@N*x&>SursRynoS$AifA|DF z`w1JSlMQb4=2aCV3D;>m<%^HZWMvqOePW=0vUVml-D|zcflb}&e>h)7dWVL}quN7F z{<*|}SG&V;eCFaR+S8}!YyPqe)Eg^k2;Te)eU~WMk&g;HKt*_tLAsK)w9+|zCls&| zuy<&79Lv%Bl?Hwm+88XTW`&T4rA0VRaBhoD?%8eLv1?APpuULu>vVbgUH7_UniMSS zmQybrPnul_v#p8jW8WZeqFwWc>Wa8*P4~A?Bf|kRYwC1PY}OJM>SX_XHn%)kE5tBL#{6`g_$}DcjSd3C6)&f+-nIX)FpgA;;n#yg%7t z9Ld8=P1!VSzTBPS6*sV0-u(4}TMy?t&vS=47Y1sEox4naDCKyy%_Yw&i#0HP_j$b_ zd2IIC=&YT78>N^KM}WZXoUf0?I7`iMnCr&w4EUvec;r@U-ba^Cl_SnDs7p}1$4>*G@Kp$nmpm&k~x)&iTdhrFlKHDiA zmE4TzXsAE?r_ZS`w>J!4Uhb2M*sw#8_t%tD*+La%J4@!lUMA_GnL;x7*0 z<&OU`dTG_(oXd;b7~~xrZ~Y#xpmBCKIyrTI?sMl8PcEMQQq!~)F+37iTx4rjdr;L3 zd~>lD{H@V6Qp6)LVrwou=_ka}7(cSnT$qnl-n=21J>;2FxmD9w)4~q(_7suvXHUA) z+$7C~Sp<`l%!PvmM>YoAic|(W2Tun};>Vd*8FFT^9*bs7mdX*b>E@8o$dN^ z<7&<}zjMLe4L|d?cC&uL-O#a1?y!3~(X=|sFbPh*CrlRuzkT!MH%!{8nRg@qe2Z}5 zIy&b~t}DHIMpv;Rd|^cn@>k5RCZ?xtbCyx5V1n!NXo27krY*u4ojQFL$Cw)(O*cwO zYQLTC1?bw}Y2?iJ__xR8QJ}9i)@~e{dd*DKIfY^)Uw{OIBSZ^6LPnh|TcVSeIcT=#wN3?y_q@T#s z;n#KA%I3ccv?oe8Ugu_H`FVjlp@{c3(;E50$A*dBi^>H$1u56_U*xdFFj)E-NL1N> zvp4M)O;e85O4U*}VN#@FjG_CeNE_pO)j}mz*Ap9znG`t{ zV;Bkx-Rt~vxE8)YuG6;?yKbUrj4waATcM+s+ADgU_M`h<|2p7>>3)q|YrMEtz8}xD zTa4z_JswiK!>J=~9*sEhJ9D)H` zqW$EWk3Z#VYV=m`BaQStEHDyId@~Y8moI`!>0>dQ+nffuY8h^VW6ey zUL9xe)mUJ-oKTmboUdP`^OZeC-yn}I=rv#j&8)McREHO=+L`?$_Fbsl! z--K~zqLU)DTH@;L;cW;#PRYJ(M);9}qnT=vls)gdznxK){R* zMK9;f%*nG6TsZwzBcr29xFZ;Kp2&70;|gRi^BexS4+{%(yyBB6>Ifkm|B=UVkU~c7 zfWK{7p_j11v(NMr+f@3bsvJX;Oj+e=WefAQGnDO&iZ1V}=3=q@WzN(p$bEi#-LOYL~9BJC4~!a#u^bqT$zV}&?WCeNo_xAN;- z^I`6M+%IEg6GpDMO|@5U+&3DvjCrspTw(I__uwA)yV0D=6-_hI4eSHWiMqDRs%rU0 z+znBw=@iq4*Tj!%E$Y9ycsBMiPk7mEcjDVNW8(3}+otJdFHxym_=ZlImd34c?W9yU zN9`1E)4BjaiJ#2cDxa~2UE;X8Q!Z$qPU5TCS^nK0C(atxvpp>8{Mwo`R6g_^8|c}` zHjwv2gjzyshldY|NSY{2JqioNW||A3wZ<2B0SrQ03LJ7fa_sU!iD?7@>Rfa|!zYuS z0IEQ5BdLYNpcwxjNn*s*avug_cTi!GG*-CoV+2cB5~L>wli>=x8$<*oQyAKVNc?NU zG$GRoq;I{zfQpNe1{@7wr8-P&K<#-AA{$6r1QUTB%`2>D@Rf@BXzMLN2YEzb6tDm{ z0vSDF+FT04+67o;2?g74_AZa6xZZ5q`Q=fw!%!?L+?e>>GGx}55~3)U)Bx5F&<0%~ zxWtWw%wsz`E_VuNq_i!$*(HuxC#W15-Y83(znaRJk;YwHiz1Y6P&@V4W4hxej2GUF zCu>PvA3k{X)|;uh=?SiEvs`g?lWoURPik;)qf?HhcB^0Y=A$=jtKzxJLPm$u%{1=CNrBz2Z)ZR(KOAp1p%j4VqpSi8i`FsB_-hJskJ!`fn}I_tA0Oh!|<#$QkE9;j$uZ= z0p#FykTQ^|p{OWJR!b$GIid5xElY&)2{kwtkbEd(%#Fbkcs;;yHH_JzaXLcW0HNWw z;1A$CPxUrBlE!YaLJ;?`)P$ANk7z@ zA{uL?$`7Z@1%|gLarAk$^CwdoY@K}Tyzt9OrNL~scj}&Ug-MKL8j|(}Qay@!r`l%_ zC?G+{bboc*`L5!v>u8M~^$wtCIvWxEm!$LjR)nQWPuR|1MyWb!O&4;^Ow(DQcAlH; z`&w3JNf#AH0MqS(|nLYG5R(}N|~3980Ev|nTc zLsZ3w24VC{WaJObUQgI|^XA)EDY%vDAWiy)i7(j(AsfO3r6e^D6iXIi>vvylAp3dV z^X>47*$TR+S4ZBTyVvzu`?q(Eq@CkPQ<16C6)C4fYu2=$Fmo~%k7Exia!0SvF;>F& zbXB?O5!2m2IX}sEOaf(YOc~$=C`mQ=^GUd24b2@g`i;8WZ|CI1BmO}WpT66O#1BDC zwHI_cv^XTs3*i04@85gy2@t@@XFIG+fxZiVmU}=`fpr4n6nKTHON#zm7>bZip$HoI z8M_k$txj+|)@WgC#vEbhMH@40={_5!68k3b8B4IjNNy)crnK-EAV2oSi_h47*3;2( zfAR#w=?FA6gcb)R5YQFYoHrl^zkyfh&Xxsf_qKYa`VM8 z)$dmA{2@02RBi+Y>|1`RAJjQ*(VqU+Ay#>UU$p#Bm$uzDb<>G2K2BA17e&X@KN|9s z{T9kZYg^*FLd_n=T^qYV!^RdM7~=A(Qv8)ZZ+>3$Lbh7{e5$JJatu$-?SE@>*wnLZ zMsi<1f4&JxHwnsI6vM5@%D9(Oj1#kKn_NJeT7(p>1jgB59*~g(vYLue(~@10&{u&q zZrk<+Je>DR@ob>WuOiAdOl&$ZS<->+HXPh0VeORww{vKf%R!r=P);*t1z2b$=SXfW z#As(RQJAfAT`t6JeRaYlaA^PiDv1@qmNry`xJx``fgflE0_Ee^hEzF@QW4&W2aX*1 z&HxC^V-k4;#G8xFb<8aRH-Ntz&;*O3C^RoU>KeY6uX*Jq)v0fuN{^{(mY4*C z^RT5l=Y>|V7^2!Yr7d6*Au7k2+J3)D6tt4Y0~cjeshJ7z3a#Q4=saXc$tO zTE;JTlx2sSxkh{cTzDk&AvL@E7Nc|{mXdURZ9Fvr_UEP=X;trD&dWWaY2;APwds$8 z>f~u{O|N_54=ym<59>{RHu_2<7T@E2mZ50C8a5F;I=XGeRT6=}Y~)vMVrqC2CCB#0 z!KQ)^D#$(83Nc~B4D|dtf1JOO5biX8z<#7=?C1Y0_0}ZV*Z%e)Qs>HClQTP`{G?K& z^?CD36}HYvZr;vzSFK&&CNiij`{8F;K3psjX4BO@kxM+`|s06{dydgAdXwL>9gedLB10 zZCPxiEp-lEm>mf)8>mZB^6oh|ZE?p?z@mLuk?igVFE5N;eO!`cJQU}Q-M#@*gUF8~ zI$l`Doj4b{qFrQVYt{=MkrQdp=VxVd=P(GMDqXV3{235MMSI|t?UhQi&%F~gHbcU9 z0`DJR=6LgFMkjf$!&Jg}r&{McX)+z@$fxPC9D8GtXqNi8H1(Y8&mwj73~FwKV~Ns7 z5GQ)x%5v5%dzDqkqX)id3uEU310{MWwZ{5|rFhYCz54-^njL+NPxdYN2F&<`oyy%= z7(lJL-QM)xS~ff8P=#>j^SLXQXWi6r$Grg7df$);`-)}AC zJM#3j7o)+nl`I$If$opV+gdBldmT0UQ};Q#7Mk~_D9K7tPggmnFv+i?l9La$)H$*t z^;GV>%+2x_W+6qL*`1Sp>=cX_bOVn&zs$5*tCRS0W##vF9yR558?3B1=%W~H7dwjZ-Zjxrce~6fC@{j^@5@yC=jj-h>3ux@d8;E3y4sqvB+RW zM1*k35CWPYAh!n8MFS$mfC&gL1>~kHRTNP6IUmJ3v$M0aKlaZ~&kqI?a=tI$xxMdm zp659=VlTbTulwxlSsRmXbu;7955ztNoi<(lpCpbHm?)VsJaDeIV6S{zS;CpQ-}E;W z#&k?us2B6~;PjS$zq)>Y!TM0CyzO@NIlDTk)iw6t7SS(V&$}4BB+jPm+sc~3$I{9# zErNOPe->OUy}anY>fOuK0KZJ%>-#hhK2PhZSjwN0Vp({bJ*Wx&QF zy~QV#du`0bK6?ulZra8%9g*GzUAO959{I@5H{MRQ4d{6??o{-ieh30%4Uc^ArwPAP zCLJ%%+liRExCBN3RG(+&i8vy&|GG&4P^;JgkYc%g=36 z_a{`JavV)vA$8)MQQj1s7oVJOeMQW^{8ihIo>km$O2#%~%yV?zq7%-U$@_D(3L_RK z3l?QrjQ5#rZ;Z4Ux2!W!NHtzt3?w^-@sB!M92jRUhmHKFkK$NtKtovYEkj> zG+s+OKRs-!A!BXhyJ||2;_9VTwD~+1@8yKQe>>lSJ~O+gaiB1uenfNhaJHc(nQs;W zJwOgUEvxZ_oM#&WF%iPHqV#*&X3nqP7`wIVI3-w*tkG7Frw`AYGmQCSw?^JAU%xYb z^%+;Z10FhZ2b$^mL1i!HJNu)Hn&z$+nuguT4ZO2&sf}C<8L)Gy@p}1^lV8f`ZSM}e zzx@;c81>T^>2c56Qv$o&uhK;)I78k5=1u#SvA+0oT+;=q z9-L}UxgyE_pzi$aB>h|R&&=J%{*gGZR)oG5#1gu`F`}kfkPQ>n_Vd}_HFcf7;%Na& zPmWU=GqUo=jb;9(HPKl~K5DH#0_*0|V}Cet&F*Ztx3FX6S;*gCrjL61u3L+nrb==eV$%L_JyAI}Uz1eJ?zdMgHY5Un>{a^B(mF}c^X^k}Q71U04UGinIa_>rX4=kZUH z(Gh1vk1lS`w(o11!;<3zB2M?-+;aBNDmDE~v+y@Bwzdkad%w@Md(QiaES$qGVlYoW z7j_Z-c;91|`9Y?%KGNM*F&gGcg+V-1k5Xf$RZ{NGJt48~GP17ab3M_82KrVwF!2wW3uF8mEJ~GFc93QB2{n%?3Lg0sL7%P7zar1>liif0ruzWNKgu`9r?Jb5xMv8q&@Y`{bPvIp{qflUY z&Y|(={MpYBhf7YaBn(AjQ3UF-@l7N2@i=c`kD5bgFFdW5u3VX&es4C7|3$V-erjKF z(}5bFhWX774o0P_7ZBL&%J;B+C%Xy>s^XV*dVHe;wTXsnf>e4kGq9IT#X(>N5b+~2 zE_mjB4(!25um#V#eXFRIP!ZOtLe!BcyA=3fLRGPck#X#ANY_BZaL2!~c{N`A3$~+j z(g}GUTSI}+j`EKZf(u|95(1NK1KYap@6_lH8L{`uxJ#@;qlg6e2X$A63^n&b$!J^H zb3ZsEkmmpp>8%md1mmd~)X1E4i0WlI5o zVsCot0}yxMf^CN<(E{BcJqd%_YHJA>=QEyy^vfdf;c{YR4rqf})f5*yJu!U113F?H zJjjh!007NvKY-3?bZ$lZoZ>o8v-oQIHqsnK<^aw|Ue6a;$hAQ%s)LiEG3mtot#*(e zi_DScbi!t*d4M|wkODNjQ86*h5I+Pu<|W+tScWb6Ns)Q^?Cl^a+=L}C;Sx?kmOB-K zfHV-fblfkZX}=|1{@0G5>6NrM-ZW>8>v0HHu` zUWgPFRHQvsI=IqDmAFuZ_1^{ol90)KBB9-!m{>W)fV5+eqVmxjYp&~kDr~)--P23Z)Mgeg&9vFdII}&2I z;If*i8FOL7CTf04eMi+MO2mr*$_0^?`=p;8vj?FW_-zt{oL~5J0rPMP`;)+I*9DOPW=-o C|I^cfqkWiChR8ZG3Z(Iuzo;>I5v+Hl4v(NV0 zPZq{<3l$eiNlD2a*uVE@DJf}1DXDpbUoQZ^*}r(OLQ3kFeFyd$9`dsvV#q#DJdBeN z76ffyD}DUVvPXxV)-Q`NG1~H`+;d?LVHL=xMxb{>ux~ul9bka!KTbB&z}opT^c5sN-)VdS_3zbLce~ zeP4sl@T1F1~UecXB4}44ZRn~-l z91jbG=)chJ-T?jBpeGL&B_$P5@!$4xtC~z`(Fkd)Xpkay7E};~7301Ho-dmt5p!sI z?eD_8dT&{$hbqAb;#>33%p|gSB5N>&#uN`TeJ0X4;jFib5@u0X&2qF}TlN`hBZfKM zK8}=(Aq9P4y(`y@;Txu^t;v!0W4GyMm%>(NU85O_pMB^Dq zb0)vew-K5UW{;#ftnaJ#f2?=^c!`mNIW=2 zW5}-!{Df~S5b0UB`|gIQ542B0lgns?$fUs#Qzry~CI${w!R@8e(qimea7KHXuG+Xn z1W~e4lucDPbs*w=bXf_WjWHBq@5ltdyU%}2LRV@Z4H_@3$Emw8NZu{egJXJIdz>pN zDqD{{>?dCzGDinkk-C1}nvNa1f$WZQph_fJ7{(yUpGzG-HBwy*XSU&dzC#&{C75m{ z=3892nOM85>y)_P7-_;GNT!PdJCVXNq!&HWL8Ym}Ru9uUJyMi~@fW{6mr0Mb^JRFm z?KlC}a1jzAU`~dhgk2rOg4nIJ{91YvGGyYK zCH5|cKbqy;XKRW*dpRYH_bdNSwJ?S0v0UBc!Z`iMdP*7Fwa6 z=u#;YDLi&n$j!n}JfO87VN<*L5}rhi5{&p#;sMTkuCGzj``0=CN(S**?%?;Xuyrf#IP)&g1f^2X^q{H*#)x`0Ay*Y+r*bsj_3HC= zFX4+%@(tavMm@v8!aAnSxe5F;d}>sX%iG6yFnpwlhLp2cKx4|};EPInU!(79L{Qdm zrl7_6xRdX8a0C$8>2etZT^5odbvpN+CGOlPS9mNI@Ur5O^f7Lk8zdwa`ba(M z99*cyzo99NTjHP!Bph1={8A5ks20|yUf8n)_eN8dUujIr7lliv!U4}O62)o)hA)#* zqEC?ayqbSl;DKw$6(Wcf;YP`_RAIIdeW~B_Ts7Cyv^uLEb$55X(BNCX%UI``qvG8S z5>DbZ6cy4;29MaoIWW};BWU|cI`=fyA=1gk;f2|DB`)HJCU`q>jS--+hf`Y_94+MJ zy=db%C?((kfhR`#hLa;GJzJ2DHL!uRg2gFo$gi}KQp#dq&92f^GFs?$2Zrn46Bsi* zH18qVmgtK@2(jV?T~m>Fg`qXO4Ek%krnSbWmCka>ypk+Tt8-R~Pc?Nc>tbE@u6HF1 z`8o4W*52M7$YZ_~N*X^PAq6g&bp4P0F;H%MaVF~gB!|(bwf=Vb$ zP+i}r;a=67F-Gm0;BPXO3yEP-Zjp!cTGK_TDMGKS8o@v+!E9+8z%%~n*gJSP-#BN) z1n-u_R3{7*l5`}Y7;$+wRfAFt+~UCpYa}lKOR=GVfxB7G;gmR{lhN{EqHr)y^(u>V zxDx5TrJXQkI6cR`xOOdq<$kgKufnJ-?qCqXAko<$xXPB2ZIaY`!sZC!U;=VaG%6bD z<2DI^!iBY=awn(JV2R&c=Zoghx^5&|Pkm07iGk+ECgLU`A^>_h_& z89U{l=#-u<{6SZ;ejUzE9qA?!^C-xvDSpn{ciry6N{Ab^XQ$%tI(#p=+VMCi{GJBq zRHhjdMWv{eF@+pkQ^fWD!4weGB`U>AP8sny zCwua1DJU-O!I4L|xmt1W7gOu1<9O3e<=IzYWp7KqfX&u<&Y-dFx?gzye+RogTa@Qw z$(~lqufHeEo6DC~1VU@7?3NM3+=}yd(XOIq~G!;hh z&0E;U_2OFaO9iRzieL@T7L9M;VdLpN@y0rXIbFguwe)+wv{6-9s+tOH>j?97MiRfq zK|TpTo}wyh*t#f<1CJsjqE$AYbynw|f7PwRD;N(ZOa-(1;?YfTX{kdm06aaLEOV{- zc8CqSWVb=@7Rv+HcHW0u01y&zV<-uD5uISb0C7c-zH4CcA`T_EZ46tIvwKBZ4uc>O zu>lfD^!Q0y>NNy(XGhvqr@an0U`4#m&4%0qTLa(*nR=E?WsIHAvE#Xp2Ln1~QL#-~ zD5RhNvU(V$eiP!|HLsosW1rqY;Oh?{UJI+UgeEiir`Scvaia*Sl(rJ#CM5rWU<{zI z5s9Q;JXGIU!fV&_W6q%2Dx&ZJk7TM|c>1)wVsm+@Qc+bkZm{fg`gABsg8 zhW~s8xj{tgt=Bb>@arv6q5*4}tkySKFsT#Y1H8Un+F*Z8ldW(PEt%}-_?9dYaeQH3 zd+-;8bb^S^kZzIHDBfNoc$1aHB_$e&0LsnAG+_9>F&Qiajz1fqTN~c7sCt5Uj1kxu z*RD}KmdF`uN7&?@KgNrTsf~wcm}-=0s-ei(Q6It>oDhYWDuwyv^kz5+j&2zL4hVLK zeycQ0|3)2BBFwyjEQ%mJ;(FtUPN@#cJFbxg;HJt{Eg;Ov)3U_0&=&lebbWx9iIZ{+ zFlC(2w^J0!G-ZU187XvVS##RKvSQvwf>4OXGdt<^<6hB{vywCooKs6)cZQCr2`y?m zQ4SoS_Co(?2tqSx&P@;FCZCwF4S&h&TU(#r8`x^rMckp@JP5v#Xi$Qi-h(`44U-b&tsyMvfr4C- z#lj)cyKRvy*(A4 z$emQuI=`PVZcKP>hY>g4k9Q?Gjgss{0x7BVE|8h+Y6yIBzV;RGeH${yZI%P~L-y~NxMC3La-C^paUU6&Kl`y0NufBF!V+vG}! zKL^~1eN@=c%yrl(uQ&9#bWFpqRFq%1RGBB&@8?{_a(Sn7N|GC-G++P)m5 zh_M${)&r{}Pdq|Sz+kju;L~?bGeNbb85% zcObpDezOpuc*SD72AQg&07Oz96XjN?!&+poQl~#w<60-g$?<=>Nja%oJoz-soa0}Q zAvL3*WNTWoC2hu%JDa;On)cktm__!{lsG*?`dY%4qhdsCXFj1RC+Q7t4<7N;3wTiQ zr`$R$_T9y$5Z(Ll1p7!ZAV)Iu$Kju0O6RXgg)Ee9DH*pcl9}l2)gbr_-k?NpN?YgS zo>3mKy4k{#`FwU~wyQeazJd8aM+#)eS%@ zYEsV}J0NKvon#Z!SCmYXfv?Qdi1KTgde=;d9|?GO;14$o0>yt(THZI2>`u4yO{>uF z+ac&0zm^&75CUlN?KhLe;~6mf7!AFl%1~&)4n?}2ws<}sm)ZK;PuNOrwnd-(>ej5{Y|JQu;Z09lYN_u&N-CLbaULC?9B!p0VMdNk zK&iV_-&uONVC&mAhTS00SwKnP#7pVcJQvex4n@+z2R?SY3rO@Zx`fF!1g*P$gNMXM z6TksPTm^p|Q>(*IIk)mt#3nu<(M0W>$c`y_h;gIr;^!#JtoCZ?;I9kU-}wlngBV?k zq%&C6es0!2W6|Gz+~)bIcnz7Tp3PXpcAS@fD!Y^z11=XuSFsU^{VSy9bZq3$xGn=&jCm z>wKQeYsdH^)xMV~t=v7U2InNWGnD^EYChrP;eMrZpnYDdLfcBpW*jPK^s~>Bj!y`NKUZ3yf zRdHmK&1Xo)ecL@llG$ymx-r7J$#)%2YA^QpZ1H8{ z;<2kQyi13~3toONxcBV5E&h#?9Vf4d8GcIdU=O$EXBhC>a;oU}^f<-47=GQMdo?$* zfk3CUfq=|eYZZB5ts&t+KjQ25Y1Z{tJb7wPc#Uc{lL`I6>nW@u9J%w7K5r%ZGcwO& z1K%0XPId=~Jl%V+$=fo^u1IS&eyY=)EdR`pCZpkM2zWCu%80WeKbaS)bOAp%1%&(o zzSVbW$=#x6leaC=r^V@ieCgI!YY3sI12kVWeb185%yxYDWj5laVt@J!(f^f-`hmH9 zY73fu0zDP8j06lb7rEDAE5#C+XYV-KhLTY)>5UQXmac}JO`mw) z8M$5(cd{3ltt}w}7i8)BEcF!z5Cw$^l(ME`3K)LAsEj5TyEgy{JTvhVkE>|}hL;5d zD{a3vjt4#vS@@yZx;Hpp2g54>O5rD#uthxz;6s6&-!(I_W`+}QOT2)|&wBS)E5Uqf z5b3iF67?)#Zej6*uWzPk_-HV$0am<^D4&_UG^5C^Y$R>k?Y%etJQJq8R=9#GRsZRf z#F6J}`fj49PG$G%iFr}Y)to5iiZjqlysj_trh{fza78X7X}gQ6ry510x*ySYY2?LB z^!oy>vLspf%~!aB`HW!TTN$6@xaGbK`fb5 zr3PTU7vAg8bd?-}amA*=uHeq~kuo(v9~5Xmr%70nY<_Z)+n!a7V4DV9!MuL!r81!g zlW$L%%mfN=aHhk`r3W8=a3pPCh(g+YS%uV>AuBSJTnp|D4E6>7S?GUS#ksEJ9Yb9q z8Ws-V1h0cnRWI6>=+$^&5BlAW4W`kl%eNj$1&4&`U_U%P$`Vodmqxu=I14i2E*d$v zn42gX<+wOmZXaYLtQ z(5_T(Z?Lk*eiF1lDkA}Do}KQy7fWTbl*IO7b1{4XBpu=ikaRqukN0_kkPT0ULiRV1 z8LAosXOI|j7Fw99v2SO(=c~3xT*1o(qL)(&( za91Pg7m(>VNklx5F?C#&Ne6-0%aT=(=&3h)jb&gP;_KD=YgvEzlu@=iu!HAv@2#gZ zXeSAlpFHih2Xk8o98Wwa1E@3p(yFKaoU&JgVIV79pIe#h${>xfpE}di%V5Z2NcQbm zS3@-7126KaOlGa9g;ry@jzIGkH*WTRdotKtqdxe$b7tYjM7)3snscO|lLN4ph9tlUethPbW#Z5L8xe#hZxGW%QDI2&l-HM2Cm`m3 zW%#L8C4AtDu^+gef5_`qwT0^ghu2i!-;=*cF4JY=&;`V zw=%im&g54s|I3K3wk1>qvTu5F;`3tO`|hHc5s-a=VDcBZd8g*%fb(a+Ip<~_VNjf{ zWhnqTK+aMLL@3}f|C?U;BafSNith#Y^3z!Gr8T_=BR)In-08bBVZ)*gigfcCSCr;d zV9vBB-9W^jHPyMeHusrNBF`6R1L0;aI{iJ(-%R^k(>_LQf7{Lf0%v0h&@!}vG7L7_ ztrmiFWs?vEH4|BdZTmx3Xjs78+{#=rzaypJ0nsej{$iYU>PmKRJ`JR)2Re&t8L^H_-!-cG*qG9hb z02=cl6f+lvW|ilQC?Mk+IFNwK{6yevV^nxiMkO}yd~>5gBC-q=$~`W!&)~wJFu}f9 zNjhK(RvA$4rGDE6K-wv7kewxl^hJZr3K2F^X3o0X44fCh)OMd>>RL5`5wOaPV1UPq z)RAG29u)=}i6535%BBPUiMG%i&iywfyiRpznC`%Dx%G(Y)b4O30y;kpq(8DP z+^J?;uzeV0DGbZY)sniGBLV-Gs$UHL09Qdt=-;SdPKp0U1^=(3f=AJV9KY>}9wRtf zU((FsOxfHSmy>7x%&yw*{u)<92cNkuoH^@h_#hGdH1fRhsP-4HSy#+0iEAZpoD}TnNy1^rXbpr}sN9H!TY}9Z#ttW?d^Ji??0NP#rd{}v~ zkL4|^n`I-X`@p8*7-Me6r{fdNFOIOmqIjJ_|0L0Pq>xBg>o0cQ-Tx4*-+ONTZ|;tf zt|(BD-_s!ChX3OchW^bF<|oqUDEu^e{LsmLiHCqM4gQSTr+Dk|0_?ruVTg1nH zLDT*HEtw?l&5YDwgQ1gbP}F9g&+uvOfcEYkT26%z!te&U+CEhdPX~qNV1;Xfl_N8Yxz!`35^uU)jD`NY7o~%sU@5DV)|!vDgi162 zbFQY#E5eyr&wz;ss7D$qZ}VJiT0m*8OuedK$3-VJoP&w=rC7Vk!ZpB+M0~@lu z9{`9TtNUOsKb#BAvroYpg|=glDjFkvSK-V2I+JUeT8-Ly3$}`G>uIhl8V4$^*L{6g zBXl_{-8{Y#y0GSyk$7a{i;3r)wJ3TW$p>~cN7Y=oe{W0hTqq5_6Rbj_5w?DiXL$uj z0OiJvl0GVFa5~6mKKqUT#Wa1+N|}f2)Hare4_?n z)u#>tKr8U{ql17?Qu)7V!2b-JzMtfO7YR>)KZela&D3hO>L#{A=dN?E^)G7SAIC8^ zY8eSE)tz^Ri;#Bo|DzA z!OSM(a~|U}>BbkJ{+vC|My>xq6+TV|X3tgUhW{VTy$n>E%*OdooX>xx3m+nZ*&2hn z;Xh3WKc@bJjBXA}%!=qg)9??3@TuGS*E9*Z%ekm9yHWeM3jM7@|B^HMTZKOHp)LRG zz0eyE(P~(_Rb2_XrER9<2tDZK3YE9ax#79o45)zQ6JCElUd;@mWMOc!r3Tcjp=YR#o6DwuC$wCYi~`3* z<^AZN@{_^vb1r=@^(z;ly+2;|Z1nl`gR>drZ0v<1d~QHP)-|Y*1)gEkz)b#xY8$m~ zalWo@j!i@1?~}fdBcB9`xiI|kqFXn2f!Y*NEhs?*!_SK1Lz<3@Y*s_yrRMi}-{%9L z`|KY%?yTc?SvFI#*9R&f<7uBK;L~ybzoh+7^vq%7k9qJZ39~Xg_`-0{=dSz+HXdf9 zEr67DerNccl>aO|;Lv7q&!@^by9NC5tD$S<6@RIU+~D-`*X2&#<)KdO#x!RjkbU%KF9)mozm9Lim!WP4FDKcIgxJ8`Y;#21$J{(jaeWU7nrcTLD}FK ziEWEZ%^`t<%wvo%WaaCw)_*foi2 z-LY}i+MNQU)&dl0%g~X$W73t(?|XvZUIEQWB6DyY4BHeX_*uZVB;3zZl(l6#b_qL8V8VqSK*|XioinXrk8bb*Yb7Kas{@k zbozZB1>Bo{CJi&Q1ec^~&_y|%cMHk;0ogIt4QGp@79cXS4tHZ+JYO1~G6VfcHW<9S zW@!d0R53guxO9n11DEZ78I9MLq!OWX-I75&jv_ukqIAL?_lPub{{cz*bv{KjJi?zC zr27&ApMrz@(pG(5A!ylhDCO#4x8Qg=^if5YvnSw$JE)9AfyM&h<}i(l5_3n`&~-U> zMI!N< zQ_z&(2yUd)9)JtyrGbTQP`{j!(l&5$FsV+DqmSeDxdXX}bj3GyZ#s?ZkXDVIAnbIT*<^Be2pjF!*CH8nMTopG3 ztCqDNy)=GzMagEhW7(z|9VEVH+BlRyv)$ zIF=rhjhN2^t$B(tc+d`s4K<`H(y;O`Q)>SXZ4gjEex>C~2WB%(VZ>k=UNm#Y7$3Qr z?~npRnTq4@*W)L10$Il2%*Y}#6Wx}nroTC_h}chfA|IifKlV4y8^^Tmg1o ziQ2|B%ct;p^cSDM6-Zc?QomuKGJDXyx za%L6`puZT&@+u7++msP*+x9fIlZ z`;v|&blxsoR6LlcBoERxbwJR-#6)PIQCT}^3gWw|Hv=(oj~%EgOeX6>{iH_6gXEOb zh2ud7pG|~vCge~yTIc971pQ9XV~2@rrjH_Z3vwjM*A*dMUj?!4Tte0(1yF%J<3F-Q z)06oWh5?aq+ELuv4H_4~+WSDSDpy`SW|=##8wV*lUx_|O*v^IX&twI>d9Xktzh6OO z*ePgv0ep)ZPz{~{I7e`a;8ikIe=fP-^I3;+n~Pq2eITQ^oG;)$Jrjk;rTDq5k$hba zHB?~GVEUs!=nDE%XtJPXNd(hw#<(F{c`!S6?AFYJyVEPyqe~{5#xHNuUt`d^gf_z5cF}@idL$Bm}5~B3a%F zZ;ZKcr2sg*yfn};EM`z?2V_6l2MWpVLUb9nBl$_Z9d6ep3t|}L)Qsaqr67|X) zqG-qq++}7LX`$00vwrmq;$QrWL8TbycnxJupPZ@NRc8$4=Pa~xrVANV1V0;??GVbn zs`Z$f9Tlm@2yO&Buhmtl*mVlnJ*_9@Y(f@LGGJst8&p-E8=it!Gf#}yI3RY-x`%mo z;#yGvk zW1`Z#lbN zSlc-In(>4x4uBJ0OLpgAJi8_PU(_l6QSIvy%N6{ z;j3$qTeeEt#=uP>Oa8Jc;QF*tHsupfLuO*b2%WAduQ+xZ_!kDc1?Xe*1N({ls|Z#s z$?AQ!%qe^3Z}zRDgv~Q&Ttbdtgoc z;uVjup}u}lgAeu<4eUu?d;n+&#h@BbnEnjRwKxlXC`(PeCnC!s7VinDnI-7j96?Lu z6_@u?R@WiF(v+0#1{dPdB&gS14AH+Yjtw<8;f>FeQEtG+Rd2=zE(U5JdP%5yQ&@}R zfVLUw#aLT%AsQ(6AIg-1!#(OqK3E0C-P{wUmolfUcjgG!(YSPJF=3cN*pGUnTJcWw z3U>6^UYD{Zsj7FQ#ZRL5H~h5y`uvlxRl~lGe7isH;6Fq=BysEi5q(oVG4$2(uOsp{ z8&@qn?K$m`9N4s^34Jc)(W9L?M@enhc+I@_krKSHrdgCXh9I(}5&fcxX8WOROgpa) zW3k=&thsq;^@{72#t7@GHkHuO&_2*{PM$vBH!HHf^&EAhhZ@nc`5%1s?}+i(x|o`c z*lO1I3qvJK<9720vnXM;YgcLZ{TdQt?&+CR*d3<#d_`pL8dbjS`f&dPcV5%!aP*I9 zSJXG(?m350&HjcPQGW)(ve_Izx`s&ItogH+meyH>pnu(%aI8LfFfvr9&`JnygxxL+ zh8c&wzKHGdY<`tu>~bgfM#rVK$lcYw*p{5xL8#H@j`gga3qzL;x0kdcIKK4;yjXDi ztW;UgXIqf1RI8X^Wxqa~+oU&_b~RXQ&%< zS)R+U7@73EJnr;c)n$jfIXSzuJTBfSx6P(n$O4KFPfxzwWFr`OkY$F)li9<=PpMq} z5pa~868zhb2yWqZK;|(`BDy&@xnX~4Pfx0WzpSk6Dc``TvQ(#i-~;FU{It7In{uaO z9@^xb4XrJ4rf0s0(7h3kRbK~K6}^S1(X4qa+V)~v1WVBK zrKt~tFyE}HZkQV}*{}!Jd?sSZW6`8+p0go>>$J31IqgZz#-&`g5rR>f5zpa+g%|8C zy;cM+slOGRj!9@b*$DJ0=743W*EMTv>-eCc1)hR&_I!8+IMo`11ozqH1u%G_uua%62ygeu1-!nK)FJOj_-)A-;M~PA`n<5 zOAAt@9$KQM-onrg3oY7C-x@NFQRaJ%?O?usebedOxoisc>Kg7{W_t;yHdqz!=%7h& zF7@u3^``j^OBrqiOro|hbl()HIvDyIfy3d( z&RR^~nORAdSTtSx>+-EN0N10rT<-Ibpc`>^8wLR;I2F^Ye#`c#rkr)bPmpC0$JD@Z z^*8{b#TY=C+j##$D_h$H+trQj8$fg_{N?oN7xvqFGHy7y4h{~Uny$Q(a{qp{{ZA$) zCN;oaE4GeghxF1C%%>7KzW{q}1B%zlO>Q`Pq2|E1i9=?j4p4Kg=&GuyxKkt&b!KY; zE;z4`Dt>_PJFgiG_~dcOLG`0t1|K$^CQWs0j7 z!CkxF%I{Wlb@ZX|B4M5 z<0qIy%06a5aQvqSi+&nMGZ` zOmVSCW3}B$|BL(8Z(r;`?T~FBWH~*QQ@_@D;0M6S{Q;+XzFM1d)}^iBmDZ8oD@cFN zdR@0k4QMYAztZ6RvbQs^h`@Ns6V_9hdbAha`@WPMl0(-70H|RGXTLx(< z%Gx@nImf|roAr&HjEst#F){IZd3i4m223SrSZQ^7B2e#rC0NgV9hBoDdHI%#gZCdi zFyFH$@KM;yA47FqfJn?E3r`;laKjef0I$WV9c4Kqh~Pb7mIdEu$c8pIAG11i=yb&? z1vNFbJXwn%^QEQN)R4Q^0zvsaS$n^vgrNzC$6IAsYzTY3X2sZsoGQINxBJ|`0X7=R z#|rX+hrEa-jrHD6P}fXPPdBr&iW1q`C(EU0#_Wh=_4N$}gs!NP0=9fVWck6JEtR0` zqOT<6?ml(am=4gIRzPCG28V^AyBz9bFJ7FtYsZeoHYh8rQ^##=%8U|jRMnT4m;W+d z2yC8qddI$-XM1Q=*7-|VQUt7}XYZXS1PNYm{q36X;$mV}XuZ9>h`2F|SXEH-kn@9ph9xwPguesbv8v=wIYJ_KRRseU6^fb~p( z*pooNQclKA5B>HJ8JNlelnM9W<|Kv<&7GnbskH>xQh+nKNn4k52}cB-hl`TyE7|e((~uFv`+qX25};v%hiS zZzslF3A%3bt!8_evd$8%zDrR$+Dj(b)nmM~hkL4` zBG!39u^NgE)vWWCHPj#XBypn3ZBCs^X)bV2De>H<(WMFZK z>;Cw(Wb}T+N@ZnDKA)dXBDH90YQC+o+}jZAj7Gz2Yis*n-?Mt}#lWUv?r3^LY+~Z> zef#$LJm2-IvGEt8dyM}#m#>fKxH5EU4902$GdbY-LDTM~=!K#8?rovHetnt{b8k5C z-r$+WoWjDw7wv)@mT=}6*ri3wLR0MZdM$#ASmp3z{iy zzI7$2ZESc-IoUE%x-F{F@xlH3*`bAzkxN=TXaMtomPn%)@Betp!=rHipljd57Q@$h1uLRq( z9kPe7lwVuAsc(1B>AN{7g6K_Vl*=Z|r0XAkAV~A=>82jBSlqKwL($`pq1VRlJ2tY+ zcz*b!%KeDj0A7GqAbzrTyqdft{|r?j2RH4LbE-n#%HH0!rKKgZ=>A?vj9}KsE5DbO zTN7H?(sEqyo7-4jUERo{-vHp4dFZy|oa{`G^pU$W?Kf}eE}Z%CUdn&ZkEfr4oz+p# zH;1j`fXF?kMmu|4??oV46dxV2nbhcNNzPYN*}rN%mrQlT#-7V+#5OiIR;sW`%AsQF zJBP0>aHfdMrEVO~UMvQMezAj@Me4Rmw{PzRm*cM%g3jf&;9mTBaLADF7Oy-FnwQ0c z?wW6G{d;fqfqOTy`8d!murIS{WG5)G+^&ipH5`h~Zx5ihfqP41VG%Cq;&(xX?DY5{ z2S>-sAx06shsCOI;}bcp$swTY*tRr0Ab>xeTlBE{Qfl)Ac_W2r7oo1 z0c<|K+~u-_&myL$q?F;mvP6n<`QtRpS`gM;E{=ko!7HsjZ=<6X#y|mRJm^0}SCF2! z#lk*-y}@R48=(8isic5`Ev=YHeWSeHT3Ri?^d|Tlkz=&r(Ja*bDaC!H#vf!+GA`Em3+a`Bqfz)?foi1-eW)*Wy3Q8V7Zb;Dc ztrH{Qa0jTvom>3SG}y&1kCtv6MdAcr1U8kr&fq%;&{y!_k)6ihg0lA_<%1e zW1#I@MczEA=%62^fqLW~R4Gn*2lmo~29vPd17lU2h;Lo8N+xgP=IK5DDbQXYY;L9J z=hJ}?sKXeqJGW#0kLhRpb#Pd08#x$sjIi7`fJqHbD=u#9nEmyFH#4)&7Bv3o11MK(>C&Z7PNm$X?i{#Ty!6SrH{Z7o-f$xJc;vZM zo$+5>Tu|_KvY@ikIz*HdjugLr-PB~1_mx!iB|tCmmjjz;{x^fvu>bu(j-aE2?}86& THyMD1Ngdc{u{ZO_6PNxE@)`-6 literal 0 HcmV?d00001 From a399bbc0370932910454029fe4d49229212ac6cf Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 6 Nov 2023 09:59:09 -0800 Subject: [PATCH 256/386] More fixes and enhancements for cache_bench (#12041) Summary: Mostly things for using cache_bench for stress/correctness testing. * Make secondary_cache_uri option work with HCC (forgot to update when secondary support was added for HCC) * Add -pinned_ratio option to keep more than just one entry per thread pinned. This can be important for testing eviction stress. * Add -vary_capacity_ratio for testing dynamically changing capacity. Also added some overrides to CacheWrapper to help with diagnostic output. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12041 Test Plan: manual, make check Reviewed By: jowlyzhang Differential Revision: D51013430 Pulled By: pdillinger fbshipit-source-id: 7914adc1218f0afacace05ccd77d3bfb91a878d0 --- cache/cache_bench_tool.cc | 112 ++++++++++++++++++++----------- include/rocksdb/advanced_cache.h | 16 +++++ 2 files changed, 90 insertions(+), 38 deletions(-) diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index b75835348..1aff1b8bf 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -63,6 +63,14 @@ DEFINE_int32( DEFINE_uint32(skew, 5, "Degree of skew in key selection. 0 = no skew"); DEFINE_bool(populate_cache, true, "Populate cache before operations"); +DEFINE_double(pinned_ratio, 0.25, + "Keep roughly this portion of entries pinned in cache."); +DEFINE_double( + vary_capacity_ratio, 0.0, + "If greater than 0.0, will periodically vary the capacity between this " + "ratio less than full size and full size. If vary_capacity_ratio + " + "pinned_ratio is close to or exceeds 1.0, the cache might thrash."); + DEFINE_uint32(lookup_insert_percent, 87, "Ratio of lookup (+ insert on not found) to total workload " "(expressed as a percentage)"); @@ -101,7 +109,6 @@ DEFINE_uint32(seed, 0, "Hashing/random seed to use. 0 = choose at random"); DEFINE_string(secondary_cache_uri, "", "Full URI for creating a custom secondary cache object"); -static class std::shared_ptr secondary_cache; DEFINE_string(cache_type, "lru_cache", "Type of block cache."); @@ -200,16 +207,19 @@ class SharedState { bool Started() const { return start_; } - void AddLookupStats(uint64_t hits, uint64_t misses) { + void AddLookupStats(uint64_t hits, uint64_t misses, size_t pinned_count) { MutexLock l(&mu_); lookup_count_ += hits + misses; lookup_hits_ += hits; + pinned_count_ += pinned_count; } double GetLookupHitRatio() const { return 1.0 * lookup_hits_ / lookup_count_; } + size_t GetPinnedCount() const { return pinned_count_; } + private: port::Mutex mu_; port::CondVar cv_; @@ -221,6 +231,7 @@ class SharedState { uint64_t num_done_ = 0; uint64_t lookup_count_ = 0; uint64_t lookup_hits_ = 0; + size_t pinned_count_ = 0; }; // Per-thread state for concurrent executions of the same benchmark. @@ -313,6 +324,28 @@ Cache::CacheItemHelper helper2(CacheEntryRole::kIndexBlock, DeleteFn, SizeFn, Cache::CacheItemHelper helper3_wos(CacheEntryRole::kFilterBlock, DeleteFn); Cache::CacheItemHelper helper3(CacheEntryRole::kFilterBlock, DeleteFn, SizeFn, SaveToFn, CreateFn, &helper3_wos); + +void ConfigureSecondaryCache(ShardedCacheOptions& opts) { + if (!FLAGS_secondary_cache_uri.empty()) { + std::shared_ptr secondary_cache; + Status s = SecondaryCache::CreateFromString( + ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); + if (secondary_cache == nullptr) { + fprintf(stderr, + "No secondary cache registered matching string: %s status=%s\n", + FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str()); + exit(1); + } + opts.secondary_cache = secondary_cache; + } +} + +ShardedCacheBase* AsShardedCache(Cache* c) { + if (!FLAGS_secondary_cache_uri.empty()) { + c = static_cast_with_check(c)->GetTarget().get(); + } + return static_cast_with_check(c); +} } // namespace class CacheBench { @@ -366,6 +399,7 @@ class CacheBench { fprintf(stderr, "Cache type not supported.\n"); exit(1); } + ConfigureSecondaryCache(opts); cache_ = opts.MakeSharedCache(); } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, @@ -373,19 +407,7 @@ class CacheBench { 0.5 /* high_pri_pool_ratio */); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); opts.memory_allocator = allocator; - if (!FLAGS_secondary_cache_uri.empty()) { - Status s = SecondaryCache::CreateFromString( - ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); - if (secondary_cache == nullptr) { - fprintf( - stderr, - "No secondary cache registered matching string: %s status=%s\n", - FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str()); - exit(1); - } - opts.secondary_cache = secondary_cache; - } - + ConfigureSecondaryCache(opts); cache_ = NewLRUCache(opts); } else { fprintf(stderr, "Cache type not supported.\n"); @@ -505,6 +527,8 @@ class CacheBench { size_t slot = cache_->GetTableAddressCount(); printf("Final load factor: %g (%zu / %zu)\n", 1.0 * occ / slot, occ, slot); + printf("Final pinned count: %zu\n", shared.GetPinnedCount()); + if (FLAGS_histograms) { printf("\nOperation latency (ns):\n"); HistogramImpl combined; @@ -651,29 +675,44 @@ class CacheBench { uint64_t lookup_misses = 0; uint64_t lookup_hits = 0; // To hold handles for a non-trivial amount of time - Cache::Handle* handle = nullptr; + std::deque pinned; + size_t total_pin_count = static_cast( + (FLAGS_cache_size * FLAGS_pinned_ratio) / FLAGS_value_bytes + 0.999999); + // For this thread. Some round up, some round down, as appropriate + size_t pin_count = (total_pin_count + thread->tid) / FLAGS_threads; + KeyGen gen; const auto clock = SystemClock::Default().get(); uint64_t start_time = clock->NowMicros(); StopWatchNano timer(clock); auto system_clock = SystemClock::Default(); + size_t steps_to_next_capacity_change = 0; for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) { Slice key = gen.GetRand(thread->rnd, max_key_, FLAGS_skew); uint64_t random_op = thread->rnd.Next(); + if (FLAGS_vary_capacity_ratio > 0.0 && thread->tid == 0) { + if (steps_to_next_capacity_change == 0) { + double cut_ratio = static_cast(thread->rnd.Next()) / + static_cast(UINT64_MAX) * + FLAGS_vary_capacity_ratio; + cache_->SetCapacity(FLAGS_cache_size * (1.0 - cut_ratio)); + steps_to_next_capacity_change = + static_cast(FLAGS_ops_per_thread / 100); + } else { + --steps_to_next_capacity_change; + } + } + if (FLAGS_histograms) { timer.Start(); } if (random_op < lookup_insert_threshold_) { - if (handle) { - cache_->Release(handle); - handle = nullptr; - } // do lookup - handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, - Cache::Priority::LOW); + auto handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, + Cache::Priority::LOW); if (handle) { ++lookup_hits; if (!FLAGS_lean) { @@ -681,32 +720,25 @@ class CacheBench { result += NPHash64(static_cast(cache_->Value(handle)), FLAGS_value_bytes); } + pinned.push_back(handle); } else { ++lookup_misses; // do insert Status s = cache_->Insert( key, createValue(thread->rnd, cache_->memory_allocator()), - &helper2, FLAGS_value_bytes, &handle); + &helper2, FLAGS_value_bytes, &pinned.emplace_back()); assert(s.ok()); } } else if (random_op < insert_threshold_) { - if (handle) { - cache_->Release(handle); - handle = nullptr; - } // do insert Status s = cache_->Insert( key, createValue(thread->rnd, cache_->memory_allocator()), &helper3, - FLAGS_value_bytes, &handle); + FLAGS_value_bytes, &pinned.emplace_back()); assert(s.ok()); } else if (random_op < lookup_threshold_) { - if (handle) { - cache_->Release(handle); - handle = nullptr; - } // do lookup - handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, - Cache::Priority::LOW); + auto handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, + Cache::Priority::LOW); if (handle) { ++lookup_hits; if (!FLAGS_lean) { @@ -714,6 +746,7 @@ class CacheBench { result += NPHash64(static_cast(cache_->Value(handle)), FLAGS_value_bytes); } + pinned.push_back(handle); } else { ++lookup_misses; } @@ -727,7 +760,6 @@ class CacheBench { if (FLAGS_histograms) { thread->latency_ns_hist.Add(timer.ElapsedNanos()); } - thread->shared->AddLookupStats(lookup_hits, lookup_misses); if (FLAGS_usleep > 0) { unsigned us = static_cast(thread->rnd.Uniform(FLAGS_usleep + 1)); @@ -735,12 +767,17 @@ class CacheBench { system_clock->SleepForMicroseconds(us); } } + while (pinned.size() > pin_count) { + cache_->Release(pinned.front()); + pinned.pop_front(); + } } if (FLAGS_early_exit) { MutexLock l(thread->shared->GetMutex()); exit(0); } - if (handle) { + thread->shared->AddLookupStats(lookup_hits, lookup_misses, pinned.size()); + for (auto handle : pinned) { cache_->Release(handle); handle = nullptr; } @@ -769,8 +806,7 @@ class CacheBench { printf("Cache size : %s\n", BytesToHumanString(FLAGS_cache_size).c_str()); printf("Num shard bits : %d\n", - static_cast_with_check(cache_.get()) - ->GetNumShardBits()); + AsShardedCache(cache_.get())->GetNumShardBits()); printf("Max key : %" PRIu64 "\n", max_key_); printf("Resident ratio : %g\n", FLAGS_resident_ratio); printf("Skew degree : %u\n", FLAGS_skew); diff --git a/include/rocksdb/advanced_cache.h b/include/rocksdb/advanced_cache.h index 77f1f5ce1..e2aefdd01 100644 --- a/include/rocksdb/advanced_cache.h +++ b/include/rocksdb/advanced_cache.h @@ -603,6 +603,14 @@ class CacheWrapper : public Cache { return target_->HasStrictCapacityLimit(); } + size_t GetOccupancyCount() const override { + return target_->GetOccupancyCount(); + } + + size_t GetTableAddressCount() const override { + return target_->GetTableAddressCount(); + } + size_t GetCapacity() const override { return target_->GetCapacity(); } size_t GetUsage() const override { return target_->GetUsage(); } @@ -638,6 +646,14 @@ class CacheWrapper : public Cache { target_->WaitAll(async_handles, count); } + uint32_t GetHashSeed() const override { return target_->GetHashSeed(); } + + void ReportProblems(const std::shared_ptr& info_log) const override { + target_->ReportProblems(info_log); + } + + const std::shared_ptr& GetTarget() { return target_; } + protected: std::shared_ptr target_; }; From 2dab137182b626cb0f8d80c3f700217cd8162a41 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 6 Nov 2023 11:43:59 -0800 Subject: [PATCH 257/386] Mark more files for periodic compaction during offpeak (#12031) Summary: - The struct previously named `OffpeakTimeInfo` has been renamed to `OffpeakTimeOption` to indicate that it's a user-configurable option. Additionally, a new struct, `OffpeakTimeInfo`, has been introduced, which includes two fields: `is_now_offpeak` and `seconds_till_next_offpeak_start`. This change prevents the need to parse the `daily_offpeak_time_utc` string twice. - It's worth noting that we may consider adding more fields to the `OffpeakTimeInfo` struct, such as `elapsed_seconds` and `total_seconds`, as needed for further optimization. - Within `VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction()`, we've adjusted the `allowed_time_limit` to include files that are expected to expire by the next offpeak start. - We might explore further optimizations, such as evenly distributing files to mark during offpeak hours, if the initial approach results in marking too many files simultaneously during the first scoring in offpeak hours. The primary objective of this PR is to prevent periodic compactions during non-offpeak hours when offpeak hours are configured. We'll start with this straightforward solution and assess whether it suffices for now. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12031 Test Plan: Unit Tests added - `DBCompactionTest::LevelPeriodicCompactionOffpeak` for Leveled - `DBTestUniversalCompaction2::PeriodicCompaction` for Universal Reviewed By: cbi42 Differential Revision: D50900292 Pulled By: jaykorean fbshipit-source-id: 267e7d3332d45a5d9881796786c8650fa0a3b43d --- db/compaction/compaction_picker_test.cc | 4 +- db/db_compaction_test.cc | 114 ++++++++++++++- db/db_impl/db_impl.cc | 4 +- db/db_options_test.cc | 107 +++++++++----- db/db_universal_compaction_test.cc | 138 ++++++++++++++++++ db/import_column_family_job.cc | 2 +- db/repair.cc | 2 +- db/version_builder_test.cc | 46 +++--- db/version_set.cc | 20 ++- db/version_set.h | 16 +- db/version_set_test.cc | 58 ++++++-- options/offpeak_time_info.cc | 73 +++++---- options/offpeak_time_info.h | 22 ++- .../periodic_compaction_during_offpeak.md | 1 + 14 files changed, 471 insertions(+), 136 deletions(-) create mode 100644 unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index a5184c956..3241d034d 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -86,7 +86,7 @@ class CompactionPickerTestBase : public testing::Test { &icmp_, ucmp_, options_.num_levels, style, nullptr, false, EpochNumberRequirement::kMustPresent, ioptions_.clock, options_.bottommost_file_compaction_delay, - OffpeakTimeInfo(mutable_db_options_.daily_offpeak_time_utc))); + OffpeakTimeOption(mutable_db_options_.daily_offpeak_time_utc))); vstorage_->PrepareForVersionAppend(ioptions_, mutable_cf_options_); } @@ -97,7 +97,7 @@ class CompactionPickerTestBase : public testing::Test { &icmp_, ucmp_, options_.num_levels, ioptions_.compaction_style, vstorage_.get(), false, EpochNumberRequirement::kMustPresent, ioptions_.clock, options_.bottommost_file_compaction_delay, - OffpeakTimeInfo(mutable_db_options_.daily_offpeak_time_utc))); + OffpeakTimeOption(mutable_db_options_.daily_offpeak_time_utc))); } void DeleteVersionStorage() { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 6231a4c27..2d7123117 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -19,7 +19,7 @@ #include "rocksdb/concurrent_task_limiter.h" #include "rocksdb/experimental.h" #include "rocksdb/sst_file_writer.h" -#include "rocksdb/utilities/convenience.h" +#include "test_util/mock_time_env.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" #include "util/concurrent_task_limiter_impl.h" @@ -4794,9 +4794,9 @@ TEST_F(DBCompactionTest, LevelTtlCascadingCompactions) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionEdit::EncodeTo:VarintOldestAncesterTime", [&](void* arg) { if (if_restart && if_open_all_files) { - std::string* encoded_fieled = static_cast(arg); - *encoded_fieled = ""; - PutVarint64(encoded_fieled, 0); + std::string* encoded_field = static_cast(arg); + *encoded_field = ""; + PutVarint64(encoded_field, 0); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); @@ -4942,9 +4942,9 @@ TEST_F(DBCompactionTest, LevelPeriodicCompaction) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( "VersionEdit::EncodeTo:VarintFileCreationTime", [&](void* arg) { if (if_restart && if_open_all_files) { - std::string* encoded_fieled = static_cast(arg); - *encoded_fieled = ""; - PutVarint64(encoded_fieled, 0); + std::string* encoded_field = static_cast(arg); + *encoded_field = ""; + PutVarint64(encoded_field, 0); } }); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); @@ -5020,6 +5020,106 @@ TEST_F(DBCompactionTest, LevelPeriodicCompaction) { } } +TEST_F(DBCompactionTest, LevelPeriodicCompactionOffpeak) { + // This test simply checks if offpeak adjustment works in Leveled + // Compactions. For testing offpeak periodic compactions in various + // scenarios, please refer to + // DBTestUniversalCompaction2::PeriodicCompactionOffpeak + constexpr int kNumKeysPerFile = 32; + constexpr int kNumLevelFiles = 2; + constexpr int kValueSize = 100; + constexpr int kSecondsPerDay = 86400; + constexpr int kSecondsPerHour = 3600; + constexpr int kSecondsPerMinute = 60; + + for (bool if_restart : {false, true}) { + SCOPED_TRACE("if_restart=" + std::to_string(if_restart)); + Options options = CurrentOptions(); + options.ttl = 0; + options.periodic_compaction_seconds = 5 * kSecondsPerDay; // 5 days + // In the case where all files are opened and doing DB restart + // forcing the file creation time in manifest file to be 0 to + // simulate the case of reading from an old version. + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "VersionEdit::EncodeTo:VarintFileCreationTime", [&](void* arg) { + if (if_restart) { + std::string* encoded_field = static_cast(arg); + *encoded_field = ""; + PutVarint64(encoded_field, 0); + } + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + // Just to add some extra random days to current time + Random rnd(test::RandomSeed()); + int days = rnd.Uniform(100); + + int periodic_compactions = 0; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "LevelCompactionPicker::PickCompaction:Return", [&](void* arg) { + Compaction* compaction = static_cast(arg); + auto compaction_reason = compaction->compaction_reason(); + if (compaction_reason == CompactionReason::kPeriodicCompaction) { + periodic_compactions++; + } + }); + + // Starting at 12:15AM + int now_hour = 0; + int now_minute = 15; + auto mock_clock = std::make_shared(env_->GetSystemClock()); + auto mock_env = std::make_unique(env_, mock_clock); + options.env = mock_env.get(); + mock_clock->SetCurrentTime(days * kSecondsPerDay + + now_hour * kSecondsPerHour + + now_minute * kSecondsPerMinute); + // Offpeak is set from 12:30AM to 4:30AM + options.daily_offpeak_time_utc = "00:30-04:30"; + Reopen(options); + + for (int i = 0; i < kNumLevelFiles; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK( + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); + } + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ("2", FilesPerLevel()); + ASSERT_EQ(0, periodic_compactions); + + // Move clock forward by 1 hour. Now at 1:15AM Day 0. No compaction. + mock_clock->MockSleepForSeconds(1 * kSecondsPerHour); + ASSERT_OK(Put("a", "1")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + // Assert that the files stay in the same level + ASSERT_EQ("3", FilesPerLevel()); + ASSERT_EQ(0, periodic_compactions); + MoveFilesToLevel(1); + ASSERT_EQ("0,3", FilesPerLevel()); + + // Move clock forward by 4 days and check if it triggers periodic + // comapaction at 1:15AM Day 4. Files created on Day 0 at 12:15AM is + // expected to expire before the offpeak starts next day at 12:30AM + mock_clock->MockSleepForSeconds(4 * kSecondsPerDay); + ASSERT_OK(Put("b", "2")); + if (if_restart) { + Reopen(options); + } else { + ASSERT_OK(Flush()); + } + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ("1,3", FilesPerLevel()); + // The two old files go through the periodic compaction process + ASSERT_EQ(2, periodic_compactions); + + Destroy(options); + + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + } +} + TEST_F(DBCompactionTest, LevelPeriodicCompactionWithOldDB) { // This test makes sure that periodic compactions are working with a DB // where file_creation_time of some files is 0. diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 4a6279148..741636957 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -1329,7 +1329,7 @@ Status DBImpl::SetDBOptions( new_bg_job_limits.max_compactions > current_bg_job_limits.max_compactions; const bool offpeak_time_changed = - versions_->offpeak_time_info().daily_offpeak_time_utc != + versions_->offpeak_time_option().daily_offpeak_time_utc != new_db_options.daily_offpeak_time_utc; if (max_flushes_increased || max_compactions_increased || @@ -1343,7 +1343,7 @@ Status DBImpl::SetDBOptions( Env::Priority::LOW); } if (offpeak_time_changed) { - versions_->ChangeOffpeakTimeInfo( + versions_->ChangeOffpeakTimeOption( new_db_options.daily_offpeak_time_utc); } diff --git a/db/db_options_test.cc b/db/db_options_test.cc index fa7f52d29..8f60f0051 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -1155,48 +1155,67 @@ TEST_F(DBOptionsTest, OffpeakTimes) { verify_valid(); } - auto verify_is_now_offpeak = [&](bool expected, int now_utc_hour, - int now_utc_minute, int now_utc_second = 0) { + auto verify_offpeak_info = [&](bool expected_is_now_off_peak, + int expected_seconds_till_next_offpeak_start, + int now_utc_hour, int now_utc_minute, + int now_utc_second = 0) { auto mock_clock = std::make_shared(env_->GetSystemClock()); // Add some extra random days to current time int days = rnd.Uniform(100); - mock_clock->SetCurrentTime(days * 86400 + now_utc_hour * 3600 + - now_utc_minute * 60 + now_utc_second); + mock_clock->SetCurrentTime( + days * OffpeakTimeOption::kSecondsPerDay + + now_utc_hour * OffpeakTimeOption::kSecondsPerHour + + now_utc_minute * OffpeakTimeOption::kSecondsPerMinute + now_utc_second); Status s = DBImpl::TEST_ValidateOptions(options); ASSERT_OK(s); - auto offpeak_info = OffpeakTimeInfo(options.daily_offpeak_time_utc); - ASSERT_EQ(expected, offpeak_info.IsNowOffpeak(mock_clock.get())); + auto offpeak_option = OffpeakTimeOption(options.daily_offpeak_time_utc); + int64_t now; + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + auto offpeak_info = offpeak_option.GetOffpeakTimeInfo(now); + ASSERT_EQ(expected_is_now_off_peak, offpeak_info.is_now_offpeak); + ASSERT_EQ(expected_seconds_till_next_offpeak_start, + offpeak_info.seconds_till_next_offpeak_start); }; options.daily_offpeak_time_utc = ""; - verify_is_now_offpeak(false, 12, 30); + verify_offpeak_info(false, 0, 12, 30); options.daily_offpeak_time_utc = "06:30-11:30"; - verify_is_now_offpeak(false, 5, 30); - verify_is_now_offpeak(true, 6, 30); - verify_is_now_offpeak(true, 10, 30); - verify_is_now_offpeak(true, 11, 30); - verify_is_now_offpeak(false, 13, 30); + verify_offpeak_info(false, 1 * OffpeakTimeOption::kSecondsPerHour, 5, 30); + verify_offpeak_info(true, 24 * OffpeakTimeOption::kSecondsPerHour, 6, 30); + verify_offpeak_info(true, 20 * OffpeakTimeOption::kSecondsPerHour, 10, 30); + verify_offpeak_info(true, 19 * OffpeakTimeOption::kSecondsPerHour, 11, 30); + verify_offpeak_info(false, 17 * OffpeakTimeOption::kSecondsPerHour, 13, 30); options.daily_offpeak_time_utc = "23:30-04:30"; - verify_is_now_offpeak(false, 6, 30); - verify_is_now_offpeak(true, 23, 30); - verify_is_now_offpeak(true, 0, 0); - verify_is_now_offpeak(true, 1, 0); - verify_is_now_offpeak(true, 4, 30); - verify_is_now_offpeak(false, 4, 31); + verify_offpeak_info(false, 17 * OffpeakTimeOption::kSecondsPerHour, 6, 30); + verify_offpeak_info(true, 24 * OffpeakTimeOption::kSecondsPerHour, 23, 30); + verify_offpeak_info(true, + 23 * OffpeakTimeOption::kSecondsPerHour + + 30 * OffpeakTimeOption::kSecondsPerMinute, + 0, 0); + verify_offpeak_info(true, + 22 * OffpeakTimeOption::kSecondsPerHour + + 30 * OffpeakTimeOption::kSecondsPerMinute, + 1, 0); + verify_offpeak_info(true, 19 * OffpeakTimeOption::kSecondsPerHour, 4, 30); + verify_offpeak_info(false, + 18 * OffpeakTimeOption::kSecondsPerHour + + 59 * OffpeakTimeOption::kSecondsPerMinute, + 4, 31); // Entire day offpeak options.daily_offpeak_time_utc = "00:00-23:59"; - verify_is_now_offpeak(true, 0, 0); - verify_is_now_offpeak(true, 12, 00); - verify_is_now_offpeak(true, 23, 59); - verify_is_now_offpeak(true, 23, 59, 1); - verify_is_now_offpeak(true, 23, 59, 59); - - options.daily_offpeak_time_utc = ""; + verify_offpeak_info(true, 24 * OffpeakTimeOption::kSecondsPerHour, 0, 0); + verify_offpeak_info(true, 12 * OffpeakTimeOption::kSecondsPerHour, 12, 00); + verify_offpeak_info(true, 1 * OffpeakTimeOption::kSecondsPerMinute, 23, 59); + verify_offpeak_info(true, 59, 23, 59, 1); + verify_offpeak_info(true, 1, 23, 59, 59); + + // Start with a valid option + options.daily_offpeak_time_utc = "01:30-04:15"; DestroyAndReopen(options); - ASSERT_EQ("", dbfull()->GetDBOptions().daily_offpeak_time_utc); + ASSERT_EQ("01:30-04:15", dbfull()->GetDBOptions().daily_offpeak_time_utc); int may_schedule_compaction_called = 0; SyncPoint::GetInstance()->SetCallBack( @@ -1204,30 +1223,44 @@ TEST_F(DBOptionsTest, OffpeakTimes) { [&](void*) { may_schedule_compaction_called++; }); SyncPoint::GetInstance()->EnableProcessing(); - // Make sure calling SetDBOptions with invalid option does not set the value - // nor call MaybeScheduleFlushOrCompaction() + // Make sure calling SetDBOptions with invalid option does not change the + // value nor call MaybeScheduleFlushOrCompaction() for (std::string invalid_case : invalid_cases) { ASSERT_NOK( dbfull()->SetDBOptions({{"daily_offpeak_time_utc", invalid_case}})); - ASSERT_EQ( - "", - dbfull()->GetVersionSet()->offpeak_time_info().daily_offpeak_time_utc); + ASSERT_EQ("01:30-04:15", dbfull() + ->GetVersionSet() + ->offpeak_time_option() + .daily_offpeak_time_utc); + ASSERT_EQ(1 * kSecondInHour + 30 * kSecondInMinute, + dbfull() + ->GetVersionSet() + ->offpeak_time_option() + .daily_offpeak_start_time_utc); + ASSERT_EQ(4 * kSecondInHour + 15 * kSecondInMinute, + dbfull() + ->GetVersionSet() + ->offpeak_time_option() + .daily_offpeak_end_time_utc); } ASSERT_EQ(0, may_schedule_compaction_called); // Changing to new valid values should call MaybeScheduleFlushOrCompaction() - // and sets the offpeak_time_info in VersionSet + // and sets the offpeak_time_option in VersionSet int expected_count = 0; for (std::string valid_case : valid_cases) { - if (dbfull()->GetVersionSet()->offpeak_time_info().daily_offpeak_time_utc != - valid_case) { + if (dbfull() + ->GetVersionSet() + ->offpeak_time_option() + .daily_offpeak_time_utc != valid_case) { expected_count++; } ASSERT_OK(dbfull()->SetDBOptions({{"daily_offpeak_time_utc", valid_case}})); ASSERT_EQ(valid_case, dbfull()->GetDBOptions().daily_offpeak_time_utc); - ASSERT_EQ( - valid_case, - dbfull()->GetVersionSet()->offpeak_time_info().daily_offpeak_time_utc); + ASSERT_EQ(valid_case, dbfull() + ->GetVersionSet() + ->offpeak_time_option() + .daily_offpeak_time_utc); } ASSERT_EQ(expected_count, may_schedule_compaction_called); diff --git a/db/db_universal_compaction_test.cc b/db/db_universal_compaction_test.cc index 44f5c3bfa..5c10cdaac 100644 --- a/db/db_universal_compaction_test.cc +++ b/db/db_universal_compaction_test.cc @@ -7,9 +7,12 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#include + #include "db/db_test_util.h" #include "port/stack_trace.h" #include "rocksdb/utilities/table_properties_collectors.h" +#include "test_util/mock_time_env.h" #include "test_util/sync_point.h" #include "test_util/testutil.h" #include "util/random.h" @@ -2230,6 +2233,141 @@ TEST_F(DBTestUniversalCompaction2, PeriodicCompaction) { ASSERT_EQ(4, output_level); } +TEST_F(DBTestUniversalCompaction2, PeriodicCompactionOffpeak) { + constexpr int kSecondsPerDay = 86400; + constexpr int kSecondsPerHour = 3600; + constexpr int kSecondsPerMinute = 60; + + Options opts = CurrentOptions(); + opts.compaction_style = kCompactionStyleUniversal; + opts.level0_file_num_compaction_trigger = 10; + opts.max_open_files = -1; + opts.compaction_options_universal.size_ratio = 10; + opts.compaction_options_universal.min_merge_width = 2; + opts.compaction_options_universal.max_size_amplification_percent = 200; + opts.periodic_compaction_seconds = 5 * kSecondsPerDay; // 5 days + opts.num_levels = 5; + + // Just to add some extra random days to current time + Random rnd(test::RandomSeed()); + int days = rnd.Uniform(100); + + int periodic_compactions = 0; + int start_level = -1; + int output_level = -1; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "UniversalCompactionPicker::PickPeriodicCompaction:Return", + [&](void* arg) { + Compaction* compaction = reinterpret_cast(arg); + ASSERT_TRUE(arg != nullptr); + ASSERT_TRUE(compaction->compaction_reason() == + CompactionReason::kPeriodicCompaction); + start_level = compaction->start_level(); + output_level = compaction->output_level(); + periodic_compactions++; + }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + + for (std::string preset_offpeak_time : {"", "00:30-04:30", "10:30-02:30"}) { + SCOPED_TRACE("preset_offpeak_time=" + preset_offpeak_time); + for (std::string new_offpeak_time : {"", "23:30-02:30"}) { + SCOPED_TRACE("new_offpeak_time=" + new_offpeak_time); + std::vector> times_to_test = { + {0, 0}, {2, 30}, {3, 15}, {5, 10}, {13, 30}, {23, 30}}; + for (std::pair now : times_to_test) { + int now_hour = now.first; + int now_minute = now.second; + SCOPED_TRACE("now=" + std::to_string(now_hour) + ":" + + std::to_string(now_minute)); + + auto mock_clock = + std::make_shared(env_->GetSystemClock()); + auto mock_env = std::make_unique(env_, mock_clock); + opts.env = mock_env.get(); + mock_clock->SetCurrentTime(days * kSecondsPerDay + + now_hour * kSecondsPerHour + + now_minute * kSecondsPerMinute); + opts.daily_offpeak_time_utc = preset_offpeak_time; + Reopen(opts); + + ASSERT_OK(Put("foo", "bar1")); + ASSERT_OK(Flush()); + ASSERT_EQ(0, periodic_compactions); + + // Move clock forward by 8 hours. There should be no periodic + // compaction, yet. + mock_clock->MockSleepForSeconds(8 * kSecondsPerHour); + ASSERT_OK(Put("foo", "bar2")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(0, periodic_compactions); + + // Move clock forward by 4 days + mock_clock->MockSleepForSeconds(4 * kSecondsPerDay); + ASSERT_OK(Put("foo", "bar3")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + + int64_t mock_now; + ASSERT_OK(mock_clock->GetCurrentTime(&mock_now)); + + auto offpeak_time_info = + dbfull()->GetVersionSet()->offpeak_time_option().GetOffpeakTimeInfo( + mock_now); + // At this point, the first file is 4 days and 8 hours old. + // If it's offpeak now and the file is expected to expire before the + // next offpeak starts + if (offpeak_time_info.is_now_offpeak && + offpeak_time_info.seconds_till_next_offpeak_start / + kSecondsPerHour > + 16) { + ASSERT_EQ(1, periodic_compactions); + } else { + ASSERT_EQ(0, periodic_compactions); + // Change offpeak option by SetDBOption() + if (preset_offpeak_time != new_offpeak_time) { + ASSERT_OK(dbfull()->SetDBOptions( + {{"daily_offpeak_time_utc", new_offpeak_time}})); + ASSERT_OK(Put("foo", "bar4")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + offpeak_time_info = dbfull() + ->GetVersionSet() + ->offpeak_time_option() + .GetOffpeakTimeInfo(mock_now); + // if the first file is now eligible to be picked up + if (offpeak_time_info.is_now_offpeak && + offpeak_time_info.seconds_till_next_offpeak_start / + kSecondsPerHour > + 16) { + ASSERT_OK(Put("foo", "bar5")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + ASSERT_EQ(1, periodic_compactions); + } + } + + // If the file has not been picked up yet (no offpeak set, or offpeak + // set but then unset before the file becomes eligible) + if (periodic_compactions == 0) { + // move clock forward by one more day + mock_clock->MockSleepForSeconds(1 * kSecondsPerDay); + ASSERT_OK(Put("foo", "bar6")); + ASSERT_OK(Flush()); + ASSERT_OK(dbfull()->TEST_WaitForCompact()); + } + } + ASSERT_EQ(1, periodic_compactions); + ASSERT_EQ(0, start_level); + ASSERT_EQ(4, output_level); + Destroy(opts); + + periodic_compactions = 0; + } + } + } +} + } // namespace ROCKSDB_NAMESPACE diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 9c285a3d8..f7b8a50ae 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -187,7 +187,7 @@ Status ImportColumnFamilyJob::Run() { nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks, EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock, cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay, - cfd_->current()->version_set()->offpeak_time_info()); + cfd_->current()->version_set()->offpeak_time_option()); Status s; for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) { diff --git a/db/repair.cc b/db/repair.cc index dee5d6c7e..cc77ef8e6 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -695,7 +695,7 @@ class Repairer { nullptr /* src_vstorage */, cfd->ioptions()->force_consistency_checks, EpochNumberRequirement::kMightMissing, cfd->ioptions()->clock, /*bottommost_file_compaction_delay=*/0, - cfd->current()->version_set()->offpeak_time_info()); + cfd->current()->version_set()->offpeak_time_option()); Status s; VersionEdit dummy_edit; for (const auto* table : cf_id_and_tables.second) { diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index 00b4a810a..2ca10c449 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -39,7 +39,7 @@ class VersionBuilderTest : public testing::Test { vstorage_(&icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMustPresent, ioptions_.clock, options_.bottommost_file_compaction_delay, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)), + OffpeakTimeOption(options_.daily_offpeak_time_utc)), file_num_(1) { mutable_cf_options_.RefreshDerivedOptions(ioptions_); size_being_compacted_.resize(options_.num_levels); @@ -204,7 +204,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -256,7 +256,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -312,7 +312,7 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -370,7 +370,7 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -394,7 +394,7 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, false, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); VersionEdit version_edit; version_edit.AddFile( @@ -563,7 +563,7 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -708,7 +708,7 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -753,7 +753,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileAddition) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -893,7 +893,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileInBase) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -967,7 +967,7 @@ TEST_F(VersionBuilderTest, ApplyBlobFileGarbageFileAdditionApplied) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1148,7 +1148,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1197,7 +1197,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesTo) { VersionStorageInfo newer_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &new_vstorage, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(second_builder.SaveTo(&newer_vstorage)); @@ -1284,7 +1284,7 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1388,7 +1388,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1428,7 +1428,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesInconsistentLinks) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1470,7 +1470,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbage) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1520,7 +1520,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFilesAllGarbageLinkedSsts) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); const Status s = builder.SaveTo(&new_vstorage); ASSERT_TRUE(s.IsCorruption()); @@ -1684,7 +1684,7 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { VersionStorageInfo new_vstorage( &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, &vstorage_, force_consistency_checks, EpochNumberRequirement::kMightMissing, nullptr, - 0, OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + 0, OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(builder.SaveTo(&new_vstorage)); @@ -1737,7 +1737,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, true /* force_consistency_checks */, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder.Apply(&version_edit)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -1749,7 +1749,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForFileDeletedTwice) { &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr, true /* force_consistency_checks */, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_NOK(version_builder2.Apply(&version_edit)); UnrefFilesInVersion(&new_vstorage); @@ -1789,7 +1789,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr /* src_vstorage */, true /* force_consistency_checks */, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder_1.Apply(&version_edit_1)); s = version_builder_1.SaveTo(&new_vstorage_1); @@ -1828,7 +1828,7 @@ TEST_F(VersionBuilderTest, CheckConsistencyForL0FilesSortedByEpochNumber) { &icmp_, ucmp_, options_.num_levels, kCompactionStyleLevel, nullptr /* src_vstorage */, true /* force_consistency_checks */, EpochNumberRequirement::kMightMissing, nullptr, 0, - OffpeakTimeInfo(options_.daily_offpeak_time_utc)); + OffpeakTimeOption(options_.daily_offpeak_time_utc)); ASSERT_OK(version_builder_2.Apply(&version_edit_2)); s = version_builder_2.SaveTo(&new_vstorage_2); diff --git a/db/version_set.cc b/db/version_set.cc index b66c9b8cd..0055d3968 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2125,7 +2125,7 @@ VersionStorageInfo::VersionStorageInfo( bool _force_consistency_checks, EpochNumberRequirement epoch_number_requirement, SystemClock* clock, uint32_t bottommost_file_compaction_delay, - OffpeakTimeInfo offpeak_time_info) + OffpeakTimeOption offpeak_time_option) : internal_comparator_(internal_comparator), user_comparator_(user_comparator), // cfd is nullptr if Version is dummy @@ -2158,7 +2158,7 @@ VersionStorageInfo::VersionStorageInfo( finalized_(false), force_consistency_checks_(_force_consistency_checks), epoch_number_requirement_(epoch_number_requirement), - offpeak_time_info_(offpeak_time_info) { + offpeak_time_option_(std::move(offpeak_time_option)) { if (ref_vstorage != nullptr) { accumulated_file_size_ = ref_vstorage->accumulated_file_size_; accumulated_raw_key_size_ = ref_vstorage->accumulated_raw_key_size_; @@ -2204,7 +2204,7 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset, cfd_ == nullptr ? nullptr : cfd_->ioptions()->clock, cfd_ == nullptr ? 0 : mutable_cf_options.bottommost_file_compaction_delay, - vset->offpeak_time_info()), + vset->offpeak_time_option()), vset_(vset), next_(this), prev_(this), @@ -3672,6 +3672,16 @@ void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction( const uint64_t allowed_time_limit = current_time - periodic_compaction_seconds; + // Find the adjust_allowed_time_limit such that it includes files that are + // going to expire by the time next daily offpeak starts. + const OffpeakTimeInfo offpeak_time_info = + offpeak_time_option_.GetOffpeakTimeInfo(current_time); + const uint64_t adjusted_allowed_time_limit = + allowed_time_limit + + (offpeak_time_info.is_now_offpeak + ? offpeak_time_info.seconds_till_next_offpeak_start + : 0); + for (int level = 0; level <= last_level; level++) { for (auto f : files_[level]) { if (!f->being_compacted) { @@ -3698,7 +3708,7 @@ void VersionStorageInfo::ComputeFilesMarkedForPeriodicCompaction( } } if (file_modification_time > 0 && - file_modification_time < allowed_time_limit) { + file_modification_time < adjusted_allowed_time_limit) { files_marked_for_periodic_compaction_.emplace_back(level, f); } } @@ -5077,7 +5087,7 @@ VersionSet::VersionSet( block_cache_tracer_(block_cache_tracer), io_tracer_(io_tracer), db_session_id_(db_session_id), - offpeak_time_info_(OffpeakTimeInfo(daily_offpeak_time_utc)) {} + offpeak_time_option_(OffpeakTimeOption(daily_offpeak_time_utc)) {} VersionSet::~VersionSet() { // we need to delete column_family_set_ because its destructor depends on diff --git a/db/version_set.h b/db/version_set.h index a6bfc5aa6..9aba238a4 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -136,7 +136,7 @@ class VersionStorageInfo { EpochNumberRequirement epoch_number_requirement, SystemClock* clock, uint32_t bottommost_file_compaction_delay, - OffpeakTimeInfo offpeak_time_info); + OffpeakTimeOption offpeak_time_option); // No copying allowed VersionStorageInfo(const VersionStorageInfo&) = delete; void operator=(const VersionStorageInfo&) = delete; @@ -766,7 +766,7 @@ class VersionStorageInfo { EpochNumberRequirement epoch_number_requirement_; - OffpeakTimeInfo offpeak_time_info_; + OffpeakTimeOption offpeak_time_option_; friend class Version; friend class VersionSet; @@ -1508,9 +1508,11 @@ class VersionSet { } // TODO - Consider updating together when file options change in SetDBOptions - const OffpeakTimeInfo& offpeak_time_info() { return offpeak_time_info_; } - void ChangeOffpeakTimeInfo(const std::string& daily_offpeak_time_utc) { - offpeak_time_info_.daily_offpeak_time_utc = daily_offpeak_time_utc; + const OffpeakTimeOption& offpeak_time_option() { + return offpeak_time_option_; + } + void ChangeOffpeakTimeOption(const std::string& daily_offpeak_time_utc) { + offpeak_time_option_.SetFromOffpeakTimeString(daily_offpeak_time_utc); } const ImmutableDBOptions* db_options() const { return db_options_; } @@ -1663,8 +1665,8 @@ class VersionSet { std::string db_session_id_; - // Off-peak time information used for compaction scoring - OffpeakTimeInfo offpeak_time_info_; + // Off-peak time option used for compaction scoring + OffpeakTimeOption offpeak_time_option_; private: // REQUIRES db mutex at beginning. may release and re-acquire db mutex diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 43d4036a3..aaf331c57 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -134,7 +134,7 @@ class VersionStorageInfoTestBase : public testing::Test { /*_force_consistency_checks=*/false, EpochNumberRequirement::kMustPresent, ioptions_.clock, mutable_cf_options_.bottommost_file_compaction_delay, - OffpeakTimeInfo()) {} + OffpeakTimeOption()) {} ~VersionStorageInfoTestBase() override { for (int i = 0; i < vstorage_.num_levels(); ++i) { @@ -2200,63 +2200,89 @@ TEST_F(VersionSetTest, OffpeakTimeInfoTest) { // and see if IsNowOffpeak() returns correctly per time changes int now_hour = 13; int now_minute = 30; - versions_->ChangeOffpeakTimeInfo("23:30-04:30"); + versions_->ChangeOffpeakTimeOption("23:30-04:30"); auto mock_clock = std::make_shared(env_->GetSystemClock()); // Add some extra random days to current time int days = rnd.Uniform(100); mock_clock->SetCurrentTime(days * 86400 + now_hour * 3600 + now_minute * 60); + int64_t now; + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); // Starting at 1:30PM. It's not off-peak - ASSERT_FALSE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_FALSE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Now it's at 4:30PM. Still not off-peak mock_clock->MockSleepForSeconds(3 * 3600); - ASSERT_FALSE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_FALSE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Now it's at 11:30PM. It's off-peak mock_clock->MockSleepForSeconds(7 * 3600); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Now it's at 2:30AM next day. It's still off-peak mock_clock->MockSleepForSeconds(3 * 3600); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Now it's at 4:30AM. It's still off-peak mock_clock->MockSleepForSeconds(2 * 3600); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Sleep for one more minute. It's at 4:31AM It's no longer off-peak mock_clock->MockSleepForSeconds(60); - ASSERT_FALSE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_FALSE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Entire day offpeak - versions_->ChangeOffpeakTimeInfo("00:00-23:59"); + versions_->ChangeOffpeakTimeOption("00:00-23:59"); // It doesn't matter what time it is. It should be just offpeak. - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Mock Sleep for 3 hours. It's still off-peak mock_clock->MockSleepForSeconds(3 * 3600); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Mock Sleep for 20 hours. It's still off-peak mock_clock->MockSleepForSeconds(20 * 3600); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Mock Sleep for 59 minutes. It's still off-peak mock_clock->MockSleepForSeconds(59 * 60); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Mock Sleep for 59 seconds. It's still off-peak mock_clock->MockSleepForSeconds(59); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Mock Sleep for 1 second (exactly 24h passed). It's still off-peak mock_clock->MockSleepForSeconds(1); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); // Another second for sanity check mock_clock->MockSleepForSeconds(1); - ASSERT_TRUE(versions_->offpeak_time_info().IsNowOffpeak(mock_clock.get())); + ASSERT_OK(mock_clock.get()->GetCurrentTime(&now)); + ASSERT_TRUE( + versions_->offpeak_time_option().GetOffpeakTimeInfo(now).is_now_offpeak); } TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) { diff --git a/options/offpeak_time_info.cc b/options/offpeak_time_info.cc index 678d112f1..4eaeb6e27 100644 --- a/options/offpeak_time_info.cc +++ b/options/offpeak_time_info.cc @@ -9,40 +9,51 @@ #include "util/string_util.h" namespace ROCKSDB_NAMESPACE { -OffpeakTimeInfo::OffpeakTimeInfo() : daily_offpeak_time_utc("") {} -OffpeakTimeInfo::OffpeakTimeInfo(const std::string& offpeak_time) - : daily_offpeak_time_utc(offpeak_time) {} +OffpeakTimeOption::OffpeakTimeOption() : OffpeakTimeOption("") {} +OffpeakTimeOption::OffpeakTimeOption(const std::string& offpeak_time_string) { + SetFromOffpeakTimeString(offpeak_time_string); +} + +void OffpeakTimeOption::SetFromOffpeakTimeString( + const std::string& offpeak_time_string) { + const int old_start_time = daily_offpeak_start_time_utc; + const int old_end_time = daily_offpeak_end_time_utc; + if (TryParseTimeRangeString(offpeak_time_string, daily_offpeak_start_time_utc, + daily_offpeak_end_time_utc)) { + daily_offpeak_time_utc = offpeak_time_string; + } else { + daily_offpeak_start_time_utc = old_start_time; + daily_offpeak_end_time_utc = old_end_time; + } +} -bool OffpeakTimeInfo::IsNowOffpeak(SystemClock* clock) const { - if (daily_offpeak_time_utc.empty()) { - return false; +OffpeakTimeInfo OffpeakTimeOption::GetOffpeakTimeInfo( + const int64_t& current_time) const { + OffpeakTimeInfo offpeak_time_info; + if (daily_offpeak_start_time_utc == daily_offpeak_end_time_utc) { + return offpeak_time_info; } - int64_t now; - if (clock->GetCurrentTime(&now).ok()) { - constexpr int kSecondsPerDay = 86400; - constexpr int kSecondsPerMinute = 60; - int seconds_since_midnight_to_nearest_minute = - (static_cast(now % kSecondsPerDay) / kSecondsPerMinute) * - kSecondsPerMinute; - int start_time = 0, end_time = 0; - bool success = - TryParseTimeRangeString(daily_offpeak_time_utc, start_time, end_time); - assert(success); - assert(start_time != end_time); - if (!success) { - // If the validation was done properly, we should never reach here - return false; - } - // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) - if (start_time > end_time) { - return start_time <= seconds_since_midnight_to_nearest_minute || - seconds_since_midnight_to_nearest_minute <= end_time; - } else { - return start_time <= seconds_since_midnight_to_nearest_minute && - seconds_since_midnight_to_nearest_minute <= end_time; - } + int seconds_since_midnight = static_cast(current_time % kSecondsPerDay); + int seconds_since_midnight_to_nearest_minute = + (seconds_since_midnight / kSecondsPerMinute) * kSecondsPerMinute; + // if the offpeak duration spans overnight (i.e. 23:30 - 4:30 next day) + if (daily_offpeak_start_time_utc > daily_offpeak_end_time_utc) { + offpeak_time_info.is_now_offpeak = + daily_offpeak_start_time_utc <= + seconds_since_midnight_to_nearest_minute || + seconds_since_midnight_to_nearest_minute <= daily_offpeak_end_time_utc; + } else { + offpeak_time_info.is_now_offpeak = + daily_offpeak_start_time_utc <= + seconds_since_midnight_to_nearest_minute && + seconds_since_midnight_to_nearest_minute <= daily_offpeak_end_time_utc; } - return false; + offpeak_time_info.seconds_till_next_offpeak_start = + seconds_since_midnight < daily_offpeak_start_time_utc + ? daily_offpeak_start_time_utc - seconds_since_midnight + : ((daily_offpeak_start_time_utc + kSecondsPerDay) - + seconds_since_midnight); + return offpeak_time_info; } } // namespace ROCKSDB_NAMESPACE diff --git a/options/offpeak_time_info.h b/options/offpeak_time_info.h index 74b456d3c..75d61abb4 100644 --- a/options/offpeak_time_info.h +++ b/options/offpeak_time_info.h @@ -13,10 +13,24 @@ namespace ROCKSDB_NAMESPACE { class SystemClock; struct OffpeakTimeInfo { - OffpeakTimeInfo(); - explicit OffpeakTimeInfo(const std::string& offpeak_time); - std::string daily_offpeak_time_utc; - bool IsNowOffpeak(SystemClock* clock) const; + bool is_now_offpeak = false; + int seconds_till_next_offpeak_start = 0; +}; + +struct OffpeakTimeOption { + static constexpr int kSecondsPerDay = 86400; + static constexpr int kSecondsPerHour = 3600; + static constexpr int kSecondsPerMinute = 60; + + OffpeakTimeOption(); + explicit OffpeakTimeOption(const std::string& offpeak_time_string); + std::string daily_offpeak_time_utc = ""; + int daily_offpeak_start_time_utc = 0; + int daily_offpeak_end_time_utc = 0; + + void SetFromOffpeakTimeString(const std::string& offpeak_time_string); + + OffpeakTimeInfo GetOffpeakTimeInfo(const int64_t& current_time) const; }; } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md b/unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md new file mode 100644 index 000000000..475adf7f7 --- /dev/null +++ b/unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md @@ -0,0 +1 @@ +During off-peak hours defined by `daily_offpeak_time_utc`, the compaction picker will select a larger number of files for periodic compaction. This selection will include files that are projected to expire by the next off-peak start time, ensuring that these files are not chosen for periodic compaction outside of off-peak hours. From 0ecfc4fbb45c14cfc34054d2f7f6fdfd910b46f9 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 6 Nov 2023 15:04:41 -0800 Subject: [PATCH 258/386] AttributeGroups - GetEntity Implementation (#11943) Summary: Implementation of `GetEntity()` API that returns wide-column entities as AttributeGroups from multiple column families for a single key. Regarding the definition of Attribute groups, please see the detailed example description in PR https://github.com/facebook/rocksdb/issues/11925 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11943 Test Plan: - `DBWideBasicTest::GetEntityAsPinnableAttributeGroups` added will enable the new API in the `db_stress` after merging Reviewed By: ltamasi Differential Revision: D50195794 Pulled By: jaykorean fbshipit-source-id: 218d54841ac7e337de62e13b1233b0a99bd91af3 --- db/db_impl/db_impl.cc | 47 ++++++++++++ db/db_impl/db_impl.h | 2 + db/wide/db_wide_basic_test.cc | 135 +++++++++++++++++++++++++++++++--- include/rocksdb/db.h | 10 +++ 4 files changed, 184 insertions(+), 10 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 741636957..a36eda16f 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2057,6 +2057,53 @@ Status DBImpl::GetEntity(const ReadOptions& _read_options, return GetImpl(read_options, key, get_impl_options); } +Status DBImpl::GetEntity(const ReadOptions& _read_options, const Slice& key, + PinnableAttributeGroups* result) { + if (!result) { + return Status::InvalidArgument( + "Cannot call GetEntity without PinnableAttributeGroups object"); + } + const size_t num_column_families = result->size(); + if (_read_options.io_activity != Env::IOActivity::kUnknown && + _read_options.io_activity != Env::IOActivity::kGetEntity) { + Status s = Status::InvalidArgument( + "Cannot call GetEntity with `ReadOptions::io_activity` != " + "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGetEntity`"); + for (size_t i = 0; i < num_column_families; ++i) { + (*result)[i].SetStatus(s); + } + return s; + } + // return early if no CF was passed in + if (num_column_families == 0) { + return Status::OK(); + } + ReadOptions read_options(_read_options); + if (read_options.io_activity == Env::IOActivity::kUnknown) { + read_options.io_activity = Env::IOActivity::kGetEntity; + } + std::vector keys; + std::vector column_families; + for (size_t i = 0; i < num_column_families; ++i) { + // Adding the same key slice for different CFs + keys.emplace_back(key); + column_families.emplace_back((*result)[i].column_family()); + } + std::vector columns(num_column_families); + std::vector statuses(num_column_families); + MultiGetCommon( + read_options, num_column_families, column_families.data(), keys.data(), + /* values */ nullptr, columns.data(), + /* timestamps */ nullptr, statuses.data(), /* sorted_input */ false); + // Set results + for (size_t i = 0; i < num_column_families; ++i) { + (*result)[i].Reset(); + (*result)[i].SetStatus(statuses[i]); + (*result)[i].SetColumns(std::move(columns[i])); + } + return Status::OK(); +} + bool DBImpl::ShouldReferenceSuperVersion(const MergeContext& merge_context) { // If both thresholds are reached, a function returning merge operands as // `PinnableSlice`s should reference the `SuperVersion` to avoid large and/or diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 6769d7f50..4ac6a2d14 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -242,6 +242,8 @@ class DBImpl : public DB { Status GetEntity(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, PinnableWideColumns* columns) override; + Status GetEntity(const ReadOptions& options, const Slice& key, + PinnableAttributeGroups* result) override; using DB::GetMergeOperands; Status GetMergeOperands(const ReadOptions& options, diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 03edf26b6..821e9b54a 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -236,6 +236,121 @@ TEST_F(DBWideBasicTest, PutEntityColumnFamily) { ASSERT_OK(db_->Write(WriteOptions(), &batch)); } +TEST_F(DBWideBasicTest, GetEntityAsPinnableAttributeGroups) { + Options options = GetDefaultOptions(); + CreateAndReopenWithCF({"hot_cf", "cold_cf"}, options); + + constexpr int kDefaultCfHandleIndex = 0; + constexpr int kHotCfHandleIndex = 1; + constexpr int kColdCfHandleIndex = 2; + + constexpr char first_key[] = "first"; + WideColumns first_default_columns{ + {"default_cf_col_1_name", "first_key_default_cf_col_1_value"}, + {"default_cf_col_2_name", "first_key_default_cf_col_2_value"}}; + WideColumns first_hot_columns{ + {"hot_cf_col_1_name", "first_key_hot_cf_col_1_value"}, + {"hot_cf_col_2_name", "first_key_hot_cf_col_2_value"}}; + WideColumns first_cold_columns{ + {"cold_cf_col_1_name", "first_key_cold_cf_col_1_value"}}; + + constexpr char second_key[] = "second"; + WideColumns second_hot_columns{ + {"hot_cf_col_1_name", "second_key_hot_cf_col_1_value"}}; + WideColumns second_cold_columns{ + {"cold_cf_col_1_name", "second_key_cold_cf_col_1_value"}}; + + // TODO - update this to use the multi-attribute-group PutEntity when ready + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kDefaultCfHandleIndex], + first_key, first_default_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], + first_key, first_hot_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], + first_key, first_cold_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], + second_key, second_hot_columns)); + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], + second_key, second_cold_columns)); + + std::vector all_cfs = handles_; + std::vector default_and_hot_cfs{ + {handles_[kDefaultCfHandleIndex], handles_[kHotCfHandleIndex]}}; + std::vector hot_and_cold_cfs{ + {handles_[kHotCfHandleIndex], handles_[kColdCfHandleIndex]}}; + auto create_result = + [](const std::vector& column_families) + -> PinnableAttributeGroups { + PinnableAttributeGroups result; + for (size_t i = 0; i < column_families.size(); ++i) { + result.emplace_back(column_families[i]); + } + return result; + }; + + { + // Case 1. Get first key from default cf and hot_cf and second key from + // hot_cf and cold_cf + constexpr size_t num_column_families = 2; + PinnableAttributeGroups first_key_result = + create_result(default_and_hot_cfs); + PinnableAttributeGroups second_key_result = create_result(hot_and_cold_cfs); + + // GetEntity for first_key + ASSERT_OK(db_->GetEntity(ReadOptions(), first_key, &first_key_result)); + ASSERT_EQ(num_column_families, first_key_result.size()); + // We expect to get values for all keys and CFs + for (size_t i = 0; i < num_column_families; ++i) { + ASSERT_OK(first_key_result[i].status()); + } + // verify values for first key (default cf and hot cf) + ASSERT_EQ(first_default_columns, first_key_result[0].columns()); + ASSERT_EQ(first_hot_columns, first_key_result[1].columns()); + + // GetEntity for second_key + ASSERT_OK(db_->GetEntity(ReadOptions(), second_key, &second_key_result)); + ASSERT_EQ(num_column_families, second_key_result.size()); + // We expect to get values for all keys and CFs + for (size_t i = 0; i < num_column_families; ++i) { + ASSERT_OK(second_key_result[i].status()); + } + // verify values for second key (hot cf and cold cf) + ASSERT_EQ(second_hot_columns, second_key_result[0].columns()); + ASSERT_EQ(second_cold_columns, second_key_result[1].columns()); + } + { + // Case 2. Get first key and second key from all cfs. For the second key, we + // don't expect to get columns from default cf. + constexpr size_t num_column_families = 3; + PinnableAttributeGroups first_key_result = create_result(all_cfs); + PinnableAttributeGroups second_key_result = create_result(all_cfs); + + // GetEntity for first_key + ASSERT_OK(db_->GetEntity(ReadOptions(), first_key, &first_key_result)); + ASSERT_EQ(num_column_families, first_key_result.size()); + // We expect to get values for all keys and CFs + for (size_t i = 0; i < num_column_families; ++i) { + ASSERT_OK(first_key_result[i].status()); + } + // verify values for first key + ASSERT_EQ(first_default_columns, first_key_result[0].columns()); + ASSERT_EQ(first_hot_columns, first_key_result[1].columns()); + ASSERT_EQ(first_cold_columns, first_key_result[2].columns()); + + // GetEntity for second_key + ASSERT_OK(db_->GetEntity(ReadOptions(), second_key, &second_key_result)); + ASSERT_EQ(num_column_families, second_key_result.size()); + // key does not exist in default cf + ASSERT_NOK(second_key_result[0].status()); + ASSERT_TRUE(second_key_result[0].status().IsNotFound()); + + // verify values for second key (hot cf and cold cf) + ASSERT_OK(second_key_result[1].status()); + ASSERT_OK(second_key_result[2].status()); + ASSERT_EQ(second_hot_columns, second_key_result[1].columns()); + ASSERT_EQ(second_cold_columns, second_key_result[2].columns()); + } +} + TEST_F(DBWideBasicTest, MultiCFMultiGetEntity) { Options options = GetDefaultOptions(); CreateAndReopenWithCF({"corinthian"}, options); @@ -274,9 +389,9 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { Options options = GetDefaultOptions(); CreateAndReopenWithCF({"hot_cf", "cold_cf"}, options); - constexpr int DEFAULT_CF_HANDLE_INDEX = 0; - constexpr int HOT_CF_HANDLE_INDEX = 1; - constexpr int COLD_CF_HANDLE_INDEX = 2; + constexpr int kDefaultCfHandleIndex = 0; + constexpr int kHotCfHandleIndex = 1; + constexpr int kColdCfHandleIndex = 2; constexpr char first_key[] = "first"; WideColumns first_default_columns{ @@ -294,24 +409,24 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { {"cold_cf_col_1_name", "second_key_cold_cf_col_1_value"}}; // TODO - update this to use the multi-attribute-group PutEntity when ready - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[DEFAULT_CF_HANDLE_INDEX], + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kDefaultCfHandleIndex], first_key, first_default_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[HOT_CF_HANDLE_INDEX], + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], first_key, first_hot_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[COLD_CF_HANDLE_INDEX], + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], first_key, first_cold_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[HOT_CF_HANDLE_INDEX], + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], second_key, second_hot_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[COLD_CF_HANDLE_INDEX], + ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], second_key, second_cold_columns)); constexpr size_t num_keys = 2; std::array keys = {first_key, second_key}; std::vector all_cfs = handles_; std::vector default_and_hot_cfs{ - {handles_[DEFAULT_CF_HANDLE_INDEX], handles_[HOT_CF_HANDLE_INDEX]}}; + {handles_[kDefaultCfHandleIndex], handles_[kHotCfHandleIndex]}}; std::vector hot_and_cold_cfs{ - {handles_[HOT_CF_HANDLE_INDEX], handles_[COLD_CF_HANDLE_INDEX]}}; + {handles_[kHotCfHandleIndex], handles_[kColdCfHandleIndex]}}; auto create_result = [](const std::vector& column_families) -> PinnableAttributeGroups { diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index a536a76c1..fae1729b1 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -602,6 +602,16 @@ class DB { return Status::NotSupported("GetEntity not supported"); } + // Returns logically grouped wide-column entities per column family (a.k.a. + // attribute groups) for a single key. PinnableAttributeGroups is a vector of + // PinnableAttributeGroup. Each PinnableAttributeGroup will have + // ColumnFamilyHandle* as input, and Status and PinnableWideColumns as output. + virtual Status GetEntity(const ReadOptions& /* options */, + const Slice& /* key */, + PinnableAttributeGroups* /* result */) { + return Status::NotSupported("GetEntity not supported"); + } + // Populates the `merge_operands` array with all the merge operands in the DB // for `key`. The `merge_operands` array will be populated in the order of // insertion. The number of entries populated in `merge_operands` will be From 92dc5f3e67d9a84715f763218f962bc9282274da Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 6 Nov 2023 16:06:01 -0800 Subject: [PATCH 259/386] AutoHCC: fix a bug with "blind" Insert (#12046) Summary: I have finally tracked down and fixed a bug affecting AutoHCC that was causing CI crash test assertion failures in AutoHCC when using secondary cache, but I was only able to reproduce locally a couple of times, after very long runs/repetitions. It turns out that the essential feature used by secondary cache to trigger the bug is Insert without keeping a handle, which is otherwise rarely used in RocksDB and not incorporated into cache_bench (also used for targeted correctness stress testing) until this change (new option `-blind_insert_percent`). The problem was in copying some logic from FixedHCC that makes the entry "sharable" but unreferenced once populated, if no reference is to be saved. The problem in AutoHCC is that we can only add the entry to a chain after it is in the sharable state, and must be removed from the chain while in the "under (de)construction" state and before it is back in the "empty" state. Also, it is possible for Lookup to find entries that are not connected to any chain, by design for efficiency, and for Release to erase_if_last_ref. Therefore, we could have * Thread 1 starts to Insert a cache entry without keeping ref, and pauses before adding to the chain. * Thread 2 finds it with Lookup optimizations, and then does Release with `erase_if_last_ref=true` causing it to trigger erasure on the entry. It successfully locks the home chain for the entry and purges any entries pending erasure. It is OK that this entry is not found on the chain, as another thread is allowed to remove it from the chain before we are able to (but after is it marked for (de)construction). And after the purge of the chain, the entry is marked empty. * Thread 1 resumes in adding the slot (presumed entry) to the home chain for what was being inserted, but that now violates invariants and sets up a race or double-chain-reference as another thread could insert a new entry in the slot and try to insert into a different chain. This is easily fixed by holding on to a reference until inserted onto the chain. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12046 Test Plan: As I don't have a reliable local reproducer, I triggered 20 runs of internal CI on fbcode_blackbox_crash_test that were previously failing in AutoHCC with about 1/3 probability, and they all passed. Also re-enabling AutoHCC in the crash test with this change. (Revert https://github.com/facebook/rocksdb/issues/12000) Reviewed By: jowlyzhang Differential Revision: D51016979 Pulled By: pdillinger fbshipit-source-id: 3840fb829d65b97c779d8aed62a4a4a433aeff2b --- cache/cache_bench_tool.cc | 16 ++++++++++++++-- cache/clock_cache.cc | 16 ++++++++++++---- tools/db_crashtest.py | 7 ++++--- unreleased_history/bug_fixes/exp_autohcc_fix.md | 1 + 4 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 unreleased_history/bug_fixes/exp_autohcc_fix.md diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index 1aff1b8bf..89945abf7 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -71,11 +71,14 @@ DEFINE_double( "ratio less than full size and full size. If vary_capacity_ratio + " "pinned_ratio is close to or exceeds 1.0, the cache might thrash."); -DEFINE_uint32(lookup_insert_percent, 87, +DEFINE_uint32(lookup_insert_percent, 82, "Ratio of lookup (+ insert on not found) to total workload " "(expressed as a percentage)"); DEFINE_uint32(insert_percent, 2, "Ratio of insert to total workload (expressed as a percentage)"); +DEFINE_uint32(blind_insert_percent, 5, + "Ratio of insert without keeping handle to total workload " + "(expressed as a percentage)"); DEFINE_uint32(lookup_percent, 10, "Ratio of lookup to total workload (expressed as a percentage)"); DEFINE_uint32(erase_percent, 1, @@ -360,7 +363,9 @@ class CacheBench { FLAGS_lookup_insert_percent), insert_threshold_(lookup_insert_threshold_ + kHundredthUint64 * FLAGS_insert_percent), - lookup_threshold_(insert_threshold_ + + blind_insert_threshold_(insert_threshold_ + + kHundredthUint64 * FLAGS_blind_insert_percent), + lookup_threshold_(blind_insert_threshold_ + kHundredthUint64 * FLAGS_lookup_percent), erase_threshold_(lookup_threshold_ + kHundredthUint64 * FLAGS_erase_percent) { @@ -560,6 +565,7 @@ class CacheBench { // Cumulative thresholds in the space of a random uint64_t const uint64_t lookup_insert_threshold_; const uint64_t insert_threshold_; + const uint64_t blind_insert_threshold_; const uint64_t lookup_threshold_; const uint64_t erase_threshold_; @@ -735,6 +741,12 @@ class CacheBench { key, createValue(thread->rnd, cache_->memory_allocator()), &helper3, FLAGS_value_bytes, &pinned.emplace_back()); assert(s.ok()); + } else if (random_op < blind_insert_threshold_) { + // insert without keeping a handle + Status s = cache_->Insert( + key, createValue(thread->rnd, cache_->memory_allocator()), &helper3, + FLAGS_value_bytes); + assert(s.ok()); } else if (random_op < lookup_threshold_) { // do lookup auto handle = cache_->Lookup(key, &helper2, /*context*/ nullptr, diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index a6d41985a..dfa3c5e1f 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -2903,13 +2903,18 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( // Approximate average cache lines read to find an existing entry: // = 1.65 cache lines + // Even if we aren't saving a ref to this entry (take_ref == false), we need + // to keep a reference while we are inserting the entry into a chain, so that + // it is not erased by another thread while trying to insert it on the chain. + constexpr bool initial_take_ref = true; + size_t used_length = LengthInfoToUsedLength(state.saved_length_info); assert(home < used_length); size_t idx = home; bool already_matches = false; bool already_matches_ignore = false; - if (TryInsert(proto, arr[idx], initial_countdown, take_ref, + if (TryInsert(proto, arr[idx], initial_countdown, initial_take_ref, &already_matches)) { assert(idx == home); } else if (already_matches) { @@ -2921,7 +2926,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( // incorporate logic for here cleanly and efficiently. } else if (UNLIKELY(state.likely_empty_slot > 0) && TryInsert(proto, arr[state.likely_empty_slot], initial_countdown, - take_ref, &already_matches_ignore)) { + initial_take_ref, &already_matches_ignore)) { idx = state.likely_empty_slot; } else { // We need to search for an available slot outside of the home. @@ -2955,7 +2960,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( if (idx >= used_length) { idx -= used_length; } - if (TryInsert(proto, arr[idx], initial_countdown, take_ref, + if (TryInsert(proto, arr[idx], initial_countdown, initial_take_ref, &already_matches)) { break; } @@ -3010,7 +3015,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( } } } - if (TryInsert(proto, arr[idx], initial_countdown, take_ref, + if (TryInsert(proto, arr[idx], initial_countdown, initial_take_ref, &already_matches)) { break; } @@ -3073,6 +3078,9 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( if (arr[home].head_next_with_shift.compare_exchange_weak( next_with_shift, head_next_with_shift, std::memory_order_acq_rel)) { // Success + if (!take_ref) { + Unref(arr[idx]); + } return arr + idx; } } diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index a10ad14fa..01c3ae329 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -125,9 +125,10 @@ "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1), "mock_direct_io": False, "cache_type": lambda: random.choice( - ["lru_cache", "fixed_hyper_clock_cache", - # NOTE: auto_hyper_clock_cache disabled for now - "tiered_lru_cache", "tiered_fixed_hyper_clock_cache"] + ["lru_cache", "fixed_hyper_clock_cache", "auto_hyper_clock_cache", + "auto_hyper_clock_cache", "tiered_lru_cache", + "tiered_fixed_hyper_clock_cache", "tiered_auto_hyper_clock_cache", + "tiered_auto_hyper_clock_cache"] ), "use_full_merge_v1": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1), diff --git a/unreleased_history/bug_fixes/exp_autohcc_fix.md b/unreleased_history/bug_fixes/exp_autohcc_fix.md new file mode 100644 index 000000000..95a0de7b5 --- /dev/null +++ b/unreleased_history/bug_fixes/exp_autohcc_fix.md @@ -0,0 +1 @@ +Fixed a crash or assertion failure bug in experimental new HyperClockCache variant, especially when running with a SecondaryCache. From 2adef5367a68ce099bde51e7d710889d86a1b3d0 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 6 Nov 2023 16:52:51 -0800 Subject: [PATCH 260/386] AttributeGroups - PutEntity Implementation (#11977) Summary: Write Path for AttributeGroup Support. The new `PutEntity()` API uses `WriteBatch` and atomically writes WideColumns entities in multiple Column Families. Combined the release note from PR https://github.com/facebook/rocksdb/issues/11925 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11977 Test Plan: - `DBWideBasicTest::MultiCFMultiGetEntityAsPinnableAttributeGroups` updated - `WriteBatchTest::AttributeGroupTest` added - `WriteBatchTest::AttributeGroupSavePointTest` added Reviewed By: ltamasi Differential Revision: D50457122 Pulled By: jaykorean fbshipit-source-id: 4997b265e415588ce077933082dcd1ac3eeae2cd --- db/db_impl/db_impl.h | 2 + db/db_impl/db_impl_readonly.h | 4 + db/db_impl/db_impl_secondary.h | 4 + db/db_impl/db_impl_write.cc | 27 ++++++ db/wide/db_wide_basic_test.cc | 45 ++++----- db/write_batch.cc | 17 ++++ db/write_batch_test.cc | 93 +++++++++++++++++++ include/rocksdb/db.h | 4 + include/rocksdb/utilities/stackable_db.h | 4 + .../utilities/write_batch_with_index.h | 10 +- include/rocksdb/wide_columns.h | 25 ++++- include/rocksdb/write_batch.h | 5 + include/rocksdb/write_batch_base.h | 5 + .../new_features/attribute_group_support.md | 1 + 14 files changed, 220 insertions(+), 26 deletions(-) create mode 100644 unreleased_history/new_features/attribute_group_support.md diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 4ac6a2d14..86d81a9dc 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -197,6 +197,8 @@ class DBImpl : public DB { Status PutEntity(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const WideColumns& columns) override; + Status PutEntity(const WriteOptions& options, const Slice& key, + const AttributeGroups& attribute_groups) override; using DB::Merge; Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family, diff --git a/db/db_impl/db_impl_readonly.h b/db/db_impl/db_impl_readonly.h index cc925ba50..32bc85607 100644 --- a/db/db_impl/db_impl_readonly.h +++ b/db/db_impl/db_impl_readonly.h @@ -53,6 +53,10 @@ class DBImplReadOnly : public DBImpl { const WideColumns& /* columns */) override { return Status::NotSupported("Not supported operation in read only mode."); } + Status PutEntity(const WriteOptions& /* options */, const Slice& /* key */, + const AttributeGroups& /* attribute_groups */) override { + return Status::NotSupported("Not supported operation in read only mode."); + } using DBImpl::Merge; virtual Status Merge(const WriteOptions& /*options*/, diff --git a/db/db_impl/db_impl_secondary.h b/db/db_impl/db_impl_secondary.h index 8616b9ed4..12a8bbdd7 100644 --- a/db/db_impl/db_impl_secondary.h +++ b/db/db_impl/db_impl_secondary.h @@ -136,6 +136,10 @@ class DBImplSecondary : public DBImpl { const WideColumns& /* columns */) override { return Status::NotSupported("Not supported operation in secondary mode."); } + Status PutEntity(const WriteOptions& /* options */, const Slice& /* key */, + const AttributeGroups& /* attribute_groups */) override { + return Status::NotSupported("Not supported operation in secondary mode."); + } using DBImpl::Merge; Status Merge(const WriteOptions& /*options*/, diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 77c1d55e7..ee2c24046 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -48,6 +48,17 @@ Status DBImpl::PutEntity(const WriteOptions& options, return DB::PutEntity(options, column_family, key, columns); } +Status DBImpl::PutEntity(const WriteOptions& options, const Slice& key, + const AttributeGroups& attribute_groups) { + for (const AttributeGroup& ag : attribute_groups) { + const Status s = FailIfCfHasTs(ag.column_family()); + if (!s.ok()) { + return s; + } + } + return DB::PutEntity(options, key, attribute_groups); +} + Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family, const Slice& key, const Slice& val) { const Status s = FailIfCfHasTs(column_family); @@ -2385,6 +2396,22 @@ Status DB::PutEntity(const WriteOptions& options, return Write(options, &batch); } +Status DB::PutEntity(const WriteOptions& options, const Slice& key, + const AttributeGroups& attribute_groups) { + ColumnFamilyHandle* default_cf = DefaultColumnFamily(); + assert(default_cf); + const Comparator* const default_cf_ucmp = default_cf->GetComparator(); + assert(default_cf_ucmp); + WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, + options.protection_bytes_per_key, + default_cf_ucmp->timestamp_size()); + const Status s = batch.PutEntity(key, attribute_groups); + if (!s.ok()) { + return s; + } + return Write(options, &batch); +} + Status DB::Delete(const WriteOptions& opt, ColumnFamilyHandle* column_family, const Slice& key) { WriteBatch batch(0 /* reserved_bytes */, 0 /* max_bytes */, diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 821e9b54a..413bfc19f 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -260,17 +260,17 @@ TEST_F(DBWideBasicTest, GetEntityAsPinnableAttributeGroups) { WideColumns second_cold_columns{ {"cold_cf_col_1_name", "second_key_cold_cf_col_1_value"}}; - // TODO - update this to use the multi-attribute-group PutEntity when ready - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kDefaultCfHandleIndex], - first_key, first_default_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], - first_key, first_hot_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], - first_key, first_cold_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], - second_key, second_hot_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], - second_key, second_cold_columns)); + AttributeGroups first_key_attribute_groups{ + AttributeGroup(handles_[kDefaultCfHandleIndex], first_default_columns), + AttributeGroup(handles_[kHotCfHandleIndex], first_hot_columns), + AttributeGroup(handles_[kColdCfHandleIndex], first_cold_columns)}; + AttributeGroups second_key_attribute_groups{ + AttributeGroup(handles_[kHotCfHandleIndex], second_hot_columns), + AttributeGroup(handles_[kColdCfHandleIndex], second_cold_columns)}; + ASSERT_OK( + db_->PutEntity(WriteOptions(), first_key, first_key_attribute_groups)); + ASSERT_OK( + db_->PutEntity(WriteOptions(), second_key, second_key_attribute_groups)); std::vector all_cfs = handles_; std::vector default_and_hot_cfs{ @@ -408,17 +408,18 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { WideColumns second_cold_columns{ {"cold_cf_col_1_name", "second_key_cold_cf_col_1_value"}}; - // TODO - update this to use the multi-attribute-group PutEntity when ready - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kDefaultCfHandleIndex], - first_key, first_default_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], - first_key, first_hot_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], - first_key, first_cold_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kHotCfHandleIndex], - second_key, second_hot_columns)); - ASSERT_OK(db_->PutEntity(WriteOptions(), handles_[kColdCfHandleIndex], - second_key, second_cold_columns)); + AttributeGroups first_key_attribute_groups{ + AttributeGroup(handles_[kDefaultCfHandleIndex], first_default_columns), + AttributeGroup(handles_[kHotCfHandleIndex], first_hot_columns), + AttributeGroup(handles_[kColdCfHandleIndex], first_cold_columns)}; + AttributeGroups second_key_attribute_groups{ + AttributeGroup(handles_[kHotCfHandleIndex], second_hot_columns), + AttributeGroup(handles_[kColdCfHandleIndex], second_cold_columns)}; + + ASSERT_OK( + db_->PutEntity(WriteOptions(), first_key, first_key_attribute_groups)); + ASSERT_OK( + db_->PutEntity(WriteOptions(), second_key, second_key_attribute_groups)); constexpr size_t num_keys = 2; std::array keys = {first_key, second_key}; diff --git a/db/write_batch.cc b/db/write_batch.cc index 4502a81ff..d5c6df3d6 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -39,6 +39,7 @@ #include "rocksdb/write_batch.h" #include +#include #include #include #include @@ -1016,6 +1017,22 @@ Status WriteBatch::PutEntity(ColumnFamilyHandle* column_family, return WriteBatchInternal::PutEntity(this, cf_id, key, columns); } +Status WriteBatch::PutEntity(const Slice& key, + const AttributeGroups& attribute_groups) { + if (attribute_groups.empty()) { + return Status::InvalidArgument( + "Cannot call this method with empty attribute groups"); + } + Status s; + for (const AttributeGroup& ag : attribute_groups) { + s = PutEntity(ag.column_family(), key, ag.columns()); + if (!s.ok()) { + return s; + } + } + return s; +} + Status WriteBatchInternal::InsertNoop(WriteBatch* b) { b->rep_.push_back(static_cast(kTypeNoop)); return Status::OK(); diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index 174052644..e8fc1aa35 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -12,7 +12,9 @@ #include "db/column_family.h" #include "db/db_test_util.h" #include "db/memtable.h" +#include "db/wide/wide_columns_helper.h" #include "db/write_batch_internal.h" +#include "dbformat.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" #include "rocksdb/env.h" @@ -276,6 +278,21 @@ struct TestHandler : public WriteBatch::Handler { } return Status::OK(); } + Status PutEntityCF(uint32_t column_family_id, const Slice& key, + const Slice& entity) override { + std::ostringstream oss; + Status s = WideColumnsHelper::DumpSliceAsWideColumns(entity, oss, false); + if (!s.ok()) { + return s; + } + if (column_family_id == 0) { + seen += "PutEntity(" + key.ToString() + ", " + oss.str() + ")"; + } else { + seen += "PutEntityCF(" + std::to_string(column_family_id) + ", " + + key.ToString() + ", " + oss.str() + ")"; + } + return Status::OK(); + } Status DeleteCF(uint32_t column_family_id, const Slice& key) override { if (column_family_id == 0) { seen += "Delete(" + key.ToString() + ")"; @@ -665,6 +682,82 @@ class ColumnFamilyHandleImplDummy : public ColumnFamilyHandleImpl { }; } // anonymous namespace +TEST_F(WriteBatchTest, AttributeGroupTest) { + WriteBatch batch; + ColumnFamilyHandleImplDummy zero(0), two(2); + AttributeGroups foo_ags; + WideColumn zero_col_1{"0_c_1_n", "0_c_1_v"}; + WideColumn zero_col_2{"0_c_2_n", "0_c_2_v"}; + WideColumns zero_col_1_col_2{zero_col_1, zero_col_2}; + + WideColumn two_col_1{"2_c_1_n", "2_c_1_v"}; + WideColumn two_col_2{"2_c_2_n", "2_c_2_v"}; + WideColumns two_col_1_col_2{two_col_1, two_col_2}; + + foo_ags.emplace_back(&zero, zero_col_1_col_2); + foo_ags.emplace_back(&two, two_col_1_col_2); + + ASSERT_OK(batch.PutEntity("foo", foo_ags)); + + TestHandler handler; + ASSERT_OK(batch.Iterate(&handler)); + ASSERT_EQ( + "PutEntity(foo, 0_c_1_n:0_c_1_v " + "0_c_2_n:0_c_2_v)" + "PutEntityCF(2, foo, 2_c_1_n:2_c_1_v " + "2_c_2_n:2_c_2_v)", + handler.seen); +} + +TEST_F(WriteBatchTest, AttributeGroupSavePointTest) { + WriteBatch batch; + batch.SetSavePoint(); + + ColumnFamilyHandleImplDummy zero(0), two(2), three(3); + AttributeGroups foo_ags; + WideColumn zero_col_1{"0_c_1_n", "0_c_1_v"}; + WideColumn zero_col_2{"0_c_2_n", "0_c_2_v"}; + WideColumns zero_col_1_col_2{zero_col_1, zero_col_2}; + + WideColumn two_col_1{"2_c_1_n", "2_c_1_v"}; + WideColumn two_col_2{"2_c_2_n", "2_c_2_v"}; + WideColumns two_col_1_col_2{two_col_1, two_col_2}; + + foo_ags.emplace_back(&zero, zero_col_1_col_2); + foo_ags.emplace_back(&two, two_col_1_col_2); + + AttributeGroups bar_ags; + WideColumn three_col_1{"3_c_1_n", "3_c_1_v"}; + WideColumn three_col_2{"3_c_2_n", "3_c_2_v"}; + WideColumns three_col_1_col_2{three_col_1, three_col_2}; + + bar_ags.emplace_back(&zero, zero_col_1_col_2); + bar_ags.emplace_back(&three, three_col_1_col_2); + + ASSERT_OK(batch.PutEntity("foo", foo_ags)); + batch.SetSavePoint(); + + ASSERT_OK(batch.PutEntity("bar", bar_ags)); + + TestHandler handler; + ASSERT_OK(batch.Iterate(&handler)); + ASSERT_EQ( + "PutEntity(foo, 0_c_1_n:0_c_1_v 0_c_2_n:0_c_2_v)" + "PutEntityCF(2, foo, 2_c_1_n:2_c_1_v 2_c_2_n:2_c_2_v)" + "PutEntity(bar, 0_c_1_n:0_c_1_v 0_c_2_n:0_c_2_v)" + "PutEntityCF(3, bar, 3_c_1_n:3_c_1_v 3_c_2_n:3_c_2_v)", + handler.seen); + + ASSERT_OK(batch.RollbackToSavePoint()); + + handler.seen.clear(); + ASSERT_OK(batch.Iterate(&handler)); + ASSERT_EQ( + "PutEntity(foo, 0_c_1_n:0_c_1_v 0_c_2_n:0_c_2_v)" + "PutEntityCF(2, foo, 2_c_1_n:2_c_1_v 2_c_2_n:2_c_2_v)", + handler.seen); +} + TEST_F(WriteBatchTest, ColumnFamiliesBatchTest) { WriteBatch batch; ColumnFamilyHandleImplDummy zero(0), two(2), three(3), eight(8); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index fae1729b1..d3f4d4349 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -435,6 +435,10 @@ class DB { virtual Status PutEntity(const WriteOptions& options, ColumnFamilyHandle* column_family, const Slice& key, const WideColumns& columns); + // Split and store wide column entities in multiple column families (a.k.a. + // AttributeGroups) + virtual Status PutEntity(const WriteOptions& options, const Slice& key, + const AttributeGroups& attribute_groups); // Remove the database entry (if any) for "key". Returns OK on // success, and a non-OK status on error. It is not an error if "key" diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index 8674f10c9..e377d7ed2 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -92,6 +92,10 @@ class StackableDB : public DB { const WideColumns& columns) override { return db_->PutEntity(options, column_family, key, columns); } + Status PutEntity(const WriteOptions& options, const Slice& key, + const AttributeGroups& attribute_groups) override { + return db_->PutEntity(options, key, attribute_groups); + } using DB::Get; virtual Status Get(const ReadOptions& options, diff --git a/include/rocksdb/utilities/write_batch_with_index.h b/include/rocksdb/utilities/write_batch_with_index.h index ae1c08840..ecc8ef059 100644 --- a/include/rocksdb/utilities/write_batch_with_index.h +++ b/include/rocksdb/utilities/write_batch_with_index.h @@ -118,7 +118,16 @@ class WriteBatchWithIndex : public WriteBatchBase { return Status::InvalidArgument( "Cannot call this method without a column family handle"); } + return Status::NotSupported( + "PutEntity not supported by WriteBatchWithIndex"); + } + Status PutEntity(const Slice& /* key */, + const AttributeGroups& attribute_groups) override { + if (attribute_groups.empty()) { + return Status::InvalidArgument( + "Cannot call this method without attribute groups"); + } return Status::NotSupported( "PutEntity not supported by WriteBatchWithIndex"); } @@ -301,4 +310,3 @@ class WriteBatchWithIndex : public WriteBatchBase { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/include/rocksdb/wide_columns.h b/include/rocksdb/wide_columns.h index c4353134a..35b81268b 100644 --- a/include/rocksdb/wide_columns.h +++ b/include/rocksdb/wide_columns.h @@ -221,8 +221,27 @@ inline bool operator!=(const PinnableWideColumns& lhs, } // Class representing attribute group. Attribute group is a logical grouping of -// wide-column entities by leveraging Column Families. Wide-columns returned -// from the query are pinnable. +// wide-column entities by leveraging Column Families. +// Used in Write Path +class AttributeGroup { + public: + ColumnFamilyHandle* column_family() const { return column_family_; } + const WideColumns& columns() const { return columns_; } + WideColumns& columns() { return columns_; } + + explicit AttributeGroup(ColumnFamilyHandle* column_family, + const WideColumns& columns) + : column_family_(column_family), columns_(columns) {} + + private: + ColumnFamilyHandle* column_family_; + WideColumns columns_; +}; + +// A collection of Attribute Groups. +using AttributeGroups = std::vector; + +// Used in Read Path. Wide-columns returned from the query are pinnable. class PinnableAttributeGroup { public: ColumnFamilyHandle* column_family() const { return column_family_; } @@ -255,7 +274,7 @@ inline void PinnableAttributeGroup::Reset() { columns_.Reset(); } -// A collection of Attribute Groups. +// A collection of Pinnable Attribute Groups. using PinnableAttributeGroups = std::vector; } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index 6752d9931..dfc2bfdf4 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -106,6 +106,11 @@ class WriteBatch : public WriteBatchBase { Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key, const WideColumns& columns) override; + // Split and store wide column entities in multiple column families (a.k.a. + // AttributeGroups) + Status PutEntity(const Slice& key, + const AttributeGroups& attribute_groups) override; + using WriteBatchBase::Delete; // If the database contains a mapping for "key", erase it. Else do nothing. // The following Delete(..., const Slice& key) can be used when user-defined diff --git a/include/rocksdb/write_batch_base.h b/include/rocksdb/write_batch_base.h index f6f39ef0b..d82eefd49 100644 --- a/include/rocksdb/write_batch_base.h +++ b/include/rocksdb/write_batch_base.h @@ -47,6 +47,11 @@ class WriteBatchBase { virtual Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key, const WideColumns& columns) = 0; + // Split and store wide column entities in multiple column families (a.k.a. + // AttributeGroups) + virtual Status PutEntity(const Slice& key, + const AttributeGroups& attribute_groups) = 0; + // Merge "value" with the existing value of "key" in the database. // "key->merge(existing, value)" virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key, diff --git a/unreleased_history/new_features/attribute_group_support.md b/unreleased_history/new_features/attribute_group_support.md new file mode 100644 index 000000000..4645a1a63 --- /dev/null +++ b/unreleased_history/new_features/attribute_group_support.md @@ -0,0 +1 @@ +Add GetEntity() and PutEntity() API implementation for Attribute Group support. Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. From 16ae3548a208dce2a0adb318d0058d385b8208b0 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 7 Nov 2023 10:40:39 -0800 Subject: [PATCH 261/386] AutoHCC: Improve/fix allocation/detection of grow homes (#12047) Summary: This change simplifies some code and logic by introducing a new atomic field that tracks the next slot to grow into. It should offer slightly better performance during the growth phase (not measurable; see Test Plan below) and fix a suspected (but unconfirmed) bug like this: * Thread 1 is in non-trivial SplitForGrow() with grow_home=n. * Thread 2 reaches Grow() with grow_home=2n, and waits at the start of SplitForGrow() for the rewrite lock on n. By this point, the head at 2n is marked with the new shift amount but no chain is locked. * Thread 3 reaches Grow() with grow_home=4n, and waits before SplitForGrow() for the rewrite lock on n. By this point, the head at 4n is marked with the new shift amount but no chain is locked. * Thread 4 reaches Grow() with grow_home=8n and meets no resistance to proceeding through a SplitForGrow() on an empty chain, permanently missing out on any entries from chain n that should have ended up here. This is fixed by not updating the shift amount at the grow_home head until we have checked the preconditions that Grow()s feeding into this one have completed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12047 Test Plan: Some manual cache_bench stress runs, and about 20 triggered runs of fbcode_blackbox_crash_test No discernible performance difference on this benchmark, running before & after in parallel for a few minutes: ``` (while ./cache_bench -cache_type=auto_hyper_clock_cache -populate_cache=0 -cache_size=3000000000 -ops_per_thread=50000 -threads=12 -histograms=0 2>&1 | grep parallel; do :; done) | awk '{ s += $3; c++; print "Avg time: " (s/c);}' ``` Reviewed By: jowlyzhang Differential Revision: D51017007 Pulled By: pdillinger fbshipit-source-id: 5f6d6a6194fc966f94693f3205ed75c87cdad269 --- cache/clock_cache.cc | 102 +++++++++++++++++++------------------------ cache/clock_cache.h | 14 +++--- 2 files changed, 52 insertions(+), 64 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index dfa3c5e1f..f4aaa0623 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -1850,18 +1850,23 @@ size_t CalcOccupancyLimit(size_t used_length) { class AutoHyperClockTable::ChainRewriteLock { public: using HandleImpl = AutoHyperClockTable::HandleImpl; - explicit ChainRewriteLock(HandleImpl* h, std::atomic& yield_count, - bool already_locked_or_end = false) + + // Acquire lock if head of h is not an end + explicit ChainRewriteLock(HandleImpl* h, std::atomic& yield_count) : head_ptr_(&h->head_next_with_shift) { - if (already_locked_or_end) { - new_head_ = head_ptr_->load(std::memory_order_acquire); - // already locked or end - assert(new_head_ & HandleImpl::kHeadLocked); - return; - } Acquire(yield_count); } + // RAII wrap existing lock held (or end) + explicit ChainRewriteLock(HandleImpl* h, + std::atomic& /*yield_count*/, + uint64_t already_locked_or_end) + : head_ptr_(&h->head_next_with_shift) { + new_head_ = already_locked_or_end; + // already locked or end + assert(new_head_ & HandleImpl::kHeadLocked); + } + ~ChainRewriteLock() { if (!IsEnd()) { // Release lock @@ -1880,13 +1885,6 @@ class AutoHyperClockTable::ChainRewriteLock { // Expected current state, assuming no parallel updates. uint64_t GetNewHead() const { return new_head_; } - // Only safe if we know that the value hasn't changed from other threads - void SimpleUpdate(uint64_t next_with_shift) { - assert(head_ptr_->load(std::memory_order_acquire) == new_head_); - new_head_ = next_with_shift | HandleImpl::kHeadLocked; - head_ptr_->store(new_head_, std::memory_order_release); - } - bool CasUpdate(uint64_t next_with_shift, std::atomic& yield_count) { uint64_t new_head = next_with_shift | HandleImpl::kHeadLocked; uint64_t expected = GetNewHead(); @@ -1960,6 +1958,7 @@ AutoHyperClockTable::AutoHyperClockTable( length_info_(UsedLengthToLengthInfo(GetStartingLength(capacity))), occupancy_limit_( CalcOccupancyLimit(LengthInfoToUsedLength(length_info_.load()))), + grow_frontier_(GetTableSize()), clock_pointer_mask_( BottomNBits(UINT64_MAX, LengthInfoToMinShift(length_info_.load()))) { if (metadata_charge_policy == @@ -2128,7 +2127,7 @@ bool AutoHyperClockTable::GrowIfNeeded(size_t new_occupancy, // However, there's an awkward state where other threads own growing the // table to sufficient usable size, but the udpated size is not yet // published. If we wait, then that likely slows the ramp-up cache - // performance. If we unblock ourselves by ensure we grow by at least one + // performance. If we unblock ourselves by ensuring we grow by at least one // slot, we could technically overshoot required size by number of parallel // threads accessing block cache. On balance considering typical cases and // the modest consequences of table being slightly too large, the latter @@ -2152,35 +2151,14 @@ bool AutoHyperClockTable::GrowIfNeeded(size_t new_occupancy, } bool AutoHyperClockTable::Grow(InsertState& state) { - size_t used_length = LengthInfoToUsedLength(state.saved_length_info); - - // Try to take ownership of a grow slot as the first thread to set its - // head_next_with_shift to non-zero, specifically a valid empty chain - // in case that is to be the final value. - // (We don't need to be super efficient here.) - size_t grow_home = used_length; - int old_shift; - for (;; ++grow_home) { - if (grow_home >= array_.Count()) { - // Can't grow any more. - // (Tested by unit test ClockCacheTest/Limits) - return false; - } - - old_shift = FloorLog2(grow_home); - assert(old_shift >= 1); - - uint64_t empty_head = MakeNextWithShiftEnd(grow_home, old_shift + 1); - uint64_t expected_zero = HandleImpl::kUnusedMarker; - bool own = array_[grow_home].head_next_with_shift.compare_exchange_strong( - expected_zero, empty_head, std::memory_order_acq_rel); - if (own) { - assert(array_[grow_home].meta.load(std::memory_order_acquire) == 0); - break; - } else { - // Taken by another thread. Try next slot. - assert(expected_zero != 0); - } + // Allocate the next grow slot + size_t grow_home = grow_frontier_.fetch_add(1, std::memory_order_relaxed); + if (grow_home >= array_.Count()) { + // Can't grow any more. + // (Tested by unit test ClockCacheTest/Limits) + // Make sure we don't overflow grow_frontier_ by reaching here repeatedly + grow_frontier_.store(array_.Count(), std::memory_order_relaxed); + return false; } #ifdef COERCE_CONTEXT_SWITCH // This is useful in reproducing concurrency issues in Grow() @@ -2191,12 +2169,15 @@ bool AutoHyperClockTable::Grow(InsertState& state) { // Basically, to implement https://en.wikipedia.org/wiki/Linear_hashing // entries that belong in a new chain starting at grow_home will be // split off from the chain starting at old_home, which is computed here. + int old_shift = FloorLog2(grow_home); size_t old_home = BottomNBits(grow_home, old_shift); assert(old_home + (size_t{1} << old_shift) == grow_home); // Wait here to ensure any Grow operations that would directly feed into // this one are finished, though the full waiting actually completes in - // acquiring the rewrite lock for old_home in SplitForGrow. + // acquiring the rewrite lock for old_home in SplitForGrow. Here we ensure + // the expected shift amount has been reached, and there we ensure the + // chain rewrite lock has been released. size_t old_old_home = BottomNBits(grow_home, old_shift - 1); for (;;) { uint64_t old_old_head = array_[old_old_home].head_next_with_shift.load( @@ -2416,12 +2397,12 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, // Acquire rewrite lock on zero chain (if it's non-empty) ChainRewriteLock zero_head_lock(&arr[old_home], yield_count_); - // Create an RAII wrapper for one chain rewrite lock, for once it becomes - // non-empty. This head is unused by Lookup and DoInsert until the zero - // head is updated with new shift amount. - ChainRewriteLock one_head_lock(&arr[grow_home], yield_count_, - /*already_locked_or_end=*/true); - assert(one_head_lock.IsEnd()); + + // Used for locking the one chain below + uint64_t saved_one_head; + // One head has not been written to + assert(arr[grow_home].head_next_with_shift.load(std::memory_order_acquire) == + 0); // old_home will also the head of the new "zero chain" -- all entries in the // "from" chain whose next hash bit is 0. grow_home will be head of the new @@ -2504,11 +2485,13 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, assert((chain_frontier_first < 0) == (zero_chain_frontier == SIZE_MAX && one_chain_frontier == SIZE_MAX)); - // Always update one chain's head first (safe). - one_head_lock.SimpleUpdate( - one_chain_frontier != SIZE_MAX - ? MakeNextWithShift(one_chain_frontier, new_shift) - : MakeNextWithShiftEnd(grow_home, new_shift)); + // Always update one chain's head first (safe), and mark it as locked + saved_one_head = HandleImpl::kHeadLocked | + (one_chain_frontier != SIZE_MAX + ? MakeNextWithShift(one_chain_frontier, new_shift) + : MakeNextWithShiftEnd(grow_home, new_shift)); + arr[grow_home].head_next_with_shift.store(saved_one_head, + std::memory_order_release); // Make sure length_info_ hasn't been updated too early, as we're about // to make the change that makes it safe to update (e.g. in DoInsert()) @@ -2535,6 +2518,11 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, } } + // Create an RAII wrapper for the one chain rewrite lock we are already + // holding (if was not end) and is now "published" after successful CAS on + // zero chain head. + ChainRewriteLock one_head_lock(&arr[grow_home], yield_count_, saved_one_head); + // Except for trivial cases, we have something like // AHome -New-> [A0] -Old-> [B0] -Old-> [C0] \ | // BHome --------------------New------------> [A1] -Old-> ... diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 908e64f1a..63610d1f9 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -939,13 +939,9 @@ class AutoHyperClockTable : public BaseClockTable { // log time to find the correct chain, but normally this value enables // readers to find the correct chain on the first try. // - // NOTES: length_info_ is only updated at the end of a Grow operation, - // so that waiting in Grow operations isn't done while entries are pinned - // for internal operation purposes. Thus, Lookup and Insert have to - // detect and support cases where length_info hasn't caught up to updated - // chains. Winning grow thread is the one that transitions - // head_next_with_shift from zeros. Grow threads can spin/yield wait for - // preconditions and postconditions to be met. + // To maximize parallelization of Grow() operations, this field is only + // updated opportunistically after Grow() operations and in DoInsert() where + // it is found to be out-of-date. See CatchUpLengthInfoNoWait(). std::atomic length_info_; // An already-computed version of the usable length times the max load @@ -953,6 +949,10 @@ class AutoHyperClockTable : public BaseClockTable { // that internally. std::atomic occupancy_limit_; + // The next index to use from array_ upon the next Grow(). Might be ahead of + // length_info_. + std::atomic grow_frontier_; + // See explanation in AutoHyperClockTable::Evict std::atomic clock_pointer_mask_; }; // class AutoHyperClockTable From c06309c832880b693f43864ccc4ca36622643f69 Mon Sep 17 00:00:00 2001 From: Guozhang Wu <30565051+zcxsythenew@users.noreply.github.com> Date: Tue, 7 Nov 2023 11:44:20 -0800 Subject: [PATCH 262/386] Not to print unnecessary commands in Makefile (#11978) Summary: When I run `make check`, there is a command that should not be printed to screen, which is shown below. ```text ... ... Generating parallel test scripts for util_merge_operators_test Generating parallel test scripts for write_batch_with_index_test make[2]: Leaving directory '/home/z/rocksdb' make[1]: Leaving directory '/home/z/rocksdb' GEN check make[1]: Entering directory '/home/z/rocksdb' $DEBUG_LEVEL is 1, $LIB_MODE is shared Makefile:185: Warning: Compiling in debug mode. Don't use the resulting binary in production printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ { \ printf './%s\n' db_bloom_filter_test deletefile_test env_test c_test; \ find t -name 'run-*' -print; \ } \ | perl -pe 's,(^.*MySQLStyleTransactionTest.*$|^.*SnapshotConcurrentAccessTest.*$|^.*SeqAdvanceConcurrentTest.*$|^t/run-table_test-HarnessTest.Randomized$|^t/run-db_test-.*(?:FileCreationRandomFailure|EncodeDecompressedBlockSizeTest)$|^.*RecoverFromCorruptedWALWithoutFlush$),100 $1,' | sort -k1,1gr | sed 's/^[.0-9]* //' \ | grep -E '.' \ | grep -E -v '"^$"' \ | build_tools/gnu_parallel -j100% --plain --joblog=LOG --eta --gnu \ --tmpdir=/dev/shm/rocksdb.6lop '{} >& t/log-{/} || bash -c "cat t/log-{/}; exit $?"' ; \ parallel_retcode=$? ; \ awk '{ if ($7 != 0 || $8 != 0) { if ($7 == "Exitval") { h = $0; } else { if (!f) print h; print; f = 1 } } } END { if(f) exit 1; }' < LOG ; \ awk_retcode=$?; \ if [ $parallel_retcode -ne 0 ] || [ $awk_retcode -ne 0 ] ; then exit 1 ; fi To monitor subtest , run "make watch-log" in a separate window Computers / CPU cores / Max jobs to run 1:local / 16 / 16 ``` The `printf` command will make the output confusing. It would be better not to print it. **Before Change** ![image](https://github.com/facebook/rocksdb/assets/30565051/92cf681a-40b7-462e-ae5b-23eeacbb8f82) **After Change** ![image](https://github.com/facebook/rocksdb/assets/30565051/4a70b04b-e4ef-4bed-9ce0-d942ed9d132e) **Test Plan** Not applicable. This is a trivial change, only to add a `@` before a Makefile command, and it will not impact any workflows. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11978 Reviewed By: jaykorean Differential Revision: D51076606 Pulled By: cbi42 fbshipit-source-id: dc079ab8f60a5a5b9d04a83888884657b2e442ff --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3f308c6dd..50dddc976 100644 --- a/Makefile +++ b/Makefile @@ -994,7 +994,7 @@ endif .PHONY: check_0 check_0: - printf '%s\n' '' \ + @printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ { \ @@ -1016,7 +1016,7 @@ valgrind-exclude-regexp = InlineSkipTest.ConcurrentInsert|TransactionStressTest. .PHONY: valgrind_check_0 valgrind_check_0: test_log_prefix := valgrind_ valgrind_check_0: - printf '%s\n' '' \ + @printf '%s\n' '' \ 'To monitor subtest ,' \ ' run "make watch-log" in a separate window' ''; \ { \ From c181667c4fd60f4b6d72fa5c11a935bb55736865 Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Tue, 7 Nov 2023 11:58:58 -0800 Subject: [PATCH 263/386] FIX new blog post (JNI performance) Locate images correctly (#12050) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: We set up the images / references to the images wrongly in https://github.com/facebook/rocksdb/pull/11818 Images should be in the docs/static/images/… directory with an absolute reference to /static/images/… Make it so. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12050 Reviewed By: pdillinger Differential Revision: D51079811 Pulled By: jaykorean fbshipit-source-id: 4c1ab80d313b70d0e60eec94086451d7b2814922 --- .../2023-11-06-java-jni-benchmarks.markdown | 22 +++++++----------- .../fig_1024_1_copyout_nopoolbig.png | Bin .../fig_1024_1_none_nopoolbig.png | Bin .../fig_1024_1_none_nopoolsmall.png | Bin .../jni-get-benchmarks/optimization-graph.png | Bin 5 files changed, 9 insertions(+), 13 deletions(-) rename docs/{_posts => static/images}/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png (100%) rename docs/{_posts => static/images}/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png (100%) rename docs/{_posts => static/images}/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png (100%) rename docs/{_posts => static/images}/jni-get-benchmarks/optimization-graph.png (100%) diff --git a/docs/_posts/2023-11-06-java-jni-benchmarks.markdown b/docs/_posts/2023-11-06-java-jni-benchmarks.markdown index 5eb47ef3a..2cf5c8362 100644 --- a/docs/_posts/2023-11-06-java-jni-benchmarks.markdown +++ b/docs/_posts/2023-11-06-java-jni-benchmarks.markdown @@ -106,7 +106,7 @@ Benchmarks ran for a duration of order 6 hours on an otherwise unloaded VM, the error bars are small and we can have strong confidence in the values derived and plotted. -![Raw JNI Get](./jni-get-benchmarks/fig_1024_1_none_nopoolbig.png). +![Raw JNI Get small](/static/images/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png) Comparing all the benchmarks as the data size tends large, the conclusions we can draw are: @@ -130,7 +130,7 @@ can draw are: At small(er) data sizes, we can see whether other factors are important. -![Raw JNI Get](./jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png) +![Raw JNI Get large](/static/images/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png) - Indirect byte buffers are the most significant overhead here. Again, we can conclude that this is due to pure overhead compared to `byte[]` operations. @@ -156,7 +156,7 @@ of result. the access is presumably word by word, using normal Java mechanisms. -![Copy out JNI Get](./jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png). +![Copy out JNI Get](/static/images/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png) ### PutJNIBenchmark @@ -191,7 +191,7 @@ Of course there is some noise within the results. but we can agree: * Don't make copies you don't need to make * Don't allocate/deallocate when you can avoid it - + Translating this into designing an efficient API, we want to: * Support API methods that return results in buffers supplied by the client. @@ -202,7 +202,7 @@ Translating this into designing an efficient API, we want to: * Simplicity of implementation, as we can wrap `byte[]`-oriented methods * Continue to support methods which allocate return buffers per-call, as these are the easiest to use on initial encounter with the RocksDB API. -High performance Java interaction with RocksDB ultimately requires architectural decisions by the client +High performance Java interaction with RocksDB ultimately requires architectural decisions by the client * Use more complex (client supplied buffer) API methods where performance matters * Don't allocate/deallocate where you don't need to * recycle your own buffers where this makes sense @@ -227,7 +227,7 @@ java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,500 ``` The y-axis shows `ops/sec` in throughput, so higher is better. -![image](./jni-get-benchmarks/optimization-graph.png) +![](/static/images/jni-get-benchmarks/optimization-graph.png) ### Analysis @@ -238,9 +238,9 @@ Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) ``` - + After PinnableSlice the correct way for new code to implement a `get()` is like this - + ```cpp Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, @@ -262,7 +262,7 @@ jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BIIJ( 1. Create an empty `std::string value` 2. Call `DB::Get()` using the `std::string` variant 3. Copy the resultant `std::string` into Java, using the JNI `SetByteArrayRegion()` method - + So stage (3) costs us a copy into Java. It's mostly unavoidable that there will be at least the one copy from a C++ buffer into a Java buffer. But what does stage 2 do ? @@ -285,7 +285,3 @@ Luckily this is easy to fix. In the Java API (JNI) implementation: In the case where the `PinnableSlice` has succesfully pinned the data, this saves us the intermediate copy to the `std::string`. In the case where it hasn't, we still have the extra copy so the observed performance improvement depends on when the data can be pinned. Luckily, our benchmarking suggests that the pin is happening in a significant number of cases. On discussion with the RocksDB core team we understand that the core `PinnableSlice` optimization is most likely to succeed when pages are loaded from the block cache, rather than when they are in `memtable`. And it might be possible to successfully pin in the `memtable` as well, with some extra coding effort. This would likely improve the results for these benchmarks. - - - - diff --git a/docs/_posts/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png b/docs/static/images/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png similarity index 100% rename from docs/_posts/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png rename to docs/static/images/jni-get-benchmarks/fig_1024_1_copyout_nopoolbig.png diff --git a/docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png b/docs/static/images/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png similarity index 100% rename from docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png rename to docs/static/images/jni-get-benchmarks/fig_1024_1_none_nopoolbig.png diff --git a/docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png b/docs/static/images/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png similarity index 100% rename from docs/_posts/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png rename to docs/static/images/jni-get-benchmarks/fig_1024_1_none_nopoolsmall.png diff --git a/docs/_posts/jni-get-benchmarks/optimization-graph.png b/docs/static/images/jni-get-benchmarks/optimization-graph.png similarity index 100% rename from docs/_posts/jni-get-benchmarks/optimization-graph.png rename to docs/static/images/jni-get-benchmarks/optimization-graph.png From 58f2a29fb48c8c7ff89d8cffbe350ebda545051d Mon Sep 17 00:00:00 2001 From: Zaidoon Abd Al Hadi <43054535+zaidoon1@users.noreply.github.com> Date: Tue, 7 Nov 2023 12:46:50 -0800 Subject: [PATCH 264/386] Expose Options::periodic_compaction_seconds through C API (#12019) Summary: fixes [11090](https://github.com/facebook/rocksdb/issues/11090) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12019 Reviewed By: jaykorean Differential Revision: D51076427 Pulled By: cbi42 fbshipit-source-id: de353ff66c7f73aba70ab3379e20d8c40f50d873 --- db/c.cc | 10 ++++++++++ db/c_test.c | 10 ++++++++++ include/rocksdb/c.h | 4 ++++ 3 files changed, 24 insertions(+) diff --git a/db/c.cc b/db/c.cc index 05935d539..847f5af34 100644 --- a/db/c.cc +++ b/db/c.cc @@ -3029,6 +3029,16 @@ void rocksdb_options_set_max_bytes_for_level_multiplier_additional( } } +void rocksdb_options_set_periodic_compaction_seconds(rocksdb_options_t* opt, + uint64_t seconds) { + opt->rep.periodic_compaction_seconds = seconds; +} + +uint64_t rocksdb_options_get_periodic_compaction_seconds( + rocksdb_options_t* opt) { + return opt->rep.periodic_compaction_seconds; +} + void rocksdb_options_enable_statistics(rocksdb_options_t* opt) { opt->rep.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); } diff --git a/db/c_test.c b/db/c_test.c index 1305ee3cd..33dc5240d 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -1873,6 +1873,10 @@ int main(int argc, char** argv) { CheckCondition(2.0 == rocksdb_options_get_max_bytes_for_level_multiplier(o)); + rocksdb_options_set_periodic_compaction_seconds(o, 100000); + CheckCondition(100000 == + rocksdb_options_get_periodic_compaction_seconds(o)); + rocksdb_options_set_skip_stats_update_on_db_open(o, 1); CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o)); @@ -2303,6 +2307,12 @@ int main(int argc, char** argv) { CheckCondition(2.0 == rocksdb_options_get_max_bytes_for_level_multiplier(o)); + rocksdb_options_set_periodic_compaction_seconds(copy, 8000); + CheckCondition(8000 == + rocksdb_options_get_periodic_compaction_seconds(copy)); + CheckCondition(100000 == + rocksdb_options_get_periodic_compaction_seconds(o)); + rocksdb_options_set_skip_stats_update_on_db_open(copy, 0); CheckCondition(0 == rocksdb_options_get_skip_stats_update_on_db_open(copy)); CheckCondition(1 == rocksdb_options_get_skip_stats_update_on_db_open(o)); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index eecbe1c64..d57b6cfdc 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1256,6 +1256,10 @@ rocksdb_options_set_max_bytes_for_level_multiplier_additional( rocksdb_options_t*, int* level_values, size_t num_levels); extern ROCKSDB_LIBRARY_API void rocksdb_options_enable_statistics( rocksdb_options_t*); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_periodic_compaction_seconds( + rocksdb_options_t*, uint64_t); +extern ROCKSDB_LIBRARY_API uint64_t +rocksdb_options_get_periodic_compaction_seconds(rocksdb_options_t*); enum { rocksdb_statistics_level_disable_all = 0, From 9af25a392b9565f2f66566b4df3b33c8b7bfaf29 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 7 Nov 2023 16:35:19 -0800 Subject: [PATCH 265/386] Clean up AutoHyperClockTable::PurgeImpl (#12052) Summary: There was some unncessary logic (e.g. a dead assignment to home_shift) left over from earlier revision of the code. Also, rename confusing ChainRewriteLock::new_head_ / GetNewHead() to saved_head_ / GetSavedHead(). Pull Request resolved: https://github.com/facebook/rocksdb/pull/12052 Test Plan: existing tests Reviewed By: jowlyzhang Differential Revision: D51091499 Pulled By: pdillinger fbshipit-source-id: 4b191b60a2b16085681e59d49c4d97e802869db8 --- cache/clock_cache.cc | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index f4aaa0623..1d0e48937 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -1862,9 +1862,9 @@ class AutoHyperClockTable::ChainRewriteLock { std::atomic& /*yield_count*/, uint64_t already_locked_or_end) : head_ptr_(&h->head_next_with_shift) { - new_head_ = already_locked_or_end; + saved_head_ = already_locked_or_end; // already locked or end - assert(new_head_ & HandleImpl::kHeadLocked); + assert(saved_head_ & HandleImpl::kHeadLocked); } ~ChainRewriteLock() { @@ -1883,23 +1883,23 @@ class AutoHyperClockTable::ChainRewriteLock { } // Expected current state, assuming no parallel updates. - uint64_t GetNewHead() const { return new_head_; } + uint64_t GetSavedHead() const { return saved_head_; } bool CasUpdate(uint64_t next_with_shift, std::atomic& yield_count) { uint64_t new_head = next_with_shift | HandleImpl::kHeadLocked; - uint64_t expected = GetNewHead(); + uint64_t expected = GetSavedHead(); bool success = head_ptr_->compare_exchange_strong( expected, new_head, std::memory_order_acq_rel); if (success) { // Ensure IsEnd() is kept up-to-date, including for dtor - new_head_ = new_head; + saved_head_ = new_head; } else { // Parallel update to head, such as Insert() if (IsEnd()) { // Didn't previously hold a lock if (HandleImpl::IsEnd(expected)) { // Still don't need to - new_head_ = expected; + saved_head_ = expected; } else { // Need to acquire lock before proceeding Acquire(yield_count); @@ -1908,13 +1908,13 @@ class AutoHyperClockTable::ChainRewriteLock { // Parallel update must preserve our lock assert((expected & HandleImpl::kNextEndFlags) == HandleImpl::kHeadLocked); - new_head_ = expected; + saved_head_ = expected; } } return success; } - bool IsEnd() const { return HandleImpl::IsEnd(new_head_); } + bool IsEnd() const { return HandleImpl::IsEnd(saved_head_); } private: void Acquire(std::atomic& yield_count) { @@ -1928,7 +1928,7 @@ class AutoHyperClockTable::ChainRewriteLock { (old_head & HandleImpl::kNextEndFlags) == HandleImpl::kNextEndFlags); - new_head_ = old_head | HandleImpl::kHeadLocked; + saved_head_ = old_head | HandleImpl::kHeadLocked; break; } // NOTE: one of the few yield-wait loops, which is rare enough in practice @@ -1940,7 +1940,7 @@ class AutoHyperClockTable::ChainRewriteLock { } std::atomic* head_ptr_; - uint64_t new_head_; + uint64_t saved_head_; }; AutoHyperClockTable::AutoHyperClockTable( @@ -2424,7 +2424,7 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, assert(cur == SIZE_MAX); assert(chain_frontier_first == -1); - uint64_t next_with_shift = zero_head_lock.GetNewHead(); + uint64_t next_with_shift = zero_head_lock.GetSavedHead(); // Find a single representative for each target chain, or scan the whole // chain if some target chain has no representative. @@ -2643,7 +2643,7 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, HandleImpl* const arr = array_.Get(); - uint64_t next_with_shift = rewrite_lock.GetNewHead(); + uint64_t next_with_shift = rewrite_lock.GetSavedHead(); assert(!HandleImpl::IsEnd(next_with_shift)); int home_shift = GetShiftFromNextWithShift(next_with_shift); (void)home; @@ -2710,8 +2710,8 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, // no risk of duplicate clock updates to entries. Any entries already // updated must have been evicted (purgeable) and it's OK to clock // update any new entries just inserted in parallel. - // Can simply restart (GetNewHead() already updated from CAS failure). - next_with_shift = rewrite_lock.GetNewHead(); + // Can simply restart (GetSavedHead() already updated from CAS failure). + next_with_shift = rewrite_lock.GetSavedHead(); assert(!HandleImpl::IsEnd(next_with_shift)); next = GetNextFromNextWithShift(next_with_shift); assert(next < array_.Count()); @@ -2803,6 +2803,7 @@ void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { (*op_data)[1], &home, &home_shift); assert(home_shift > 0); } else { + assert(kIsClockUpdateChain); // Evict callers must specify home assert(home < SIZE_MAX); } @@ -2812,13 +2813,14 @@ void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { // Acquire the RAII rewrite lock (if not an empty chain) ChainRewriteLock rewrite_lock(&arr[home], yield_count_); - int shift; - for (;;) { - shift = GetShiftFromNextWithShift(rewrite_lock.GetNewHead()); + if constexpr (kIsPurge) { + // Ensure we are at the correct home for the shift in effect for the + // chain head. + for (;;) { + int shift = GetShiftFromNextWithShift(rewrite_lock.GetSavedHead()); - if constexpr (kIsPurge) { if (shift > home_shift) { - // At head. Thus, we know the newer shift applies to us. + // Found a newer shift at candidate head, which must apply to us. // Newer shift might not yet be reflected in length_info_ (an atomicity // gap in Grow), so operate as if it is. Note that other insertions // could happen using this shift before length_info_ is updated, and @@ -2835,11 +2837,8 @@ void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { } else { assert(shift == home_shift); } - } else { - assert(home_shift == 0); - home_shift = shift; + break; } - break; } // If the chain is empty, nothing to do From e406c26c4e3edb2010e3c02d03f54cd1e1af373e Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Wed, 8 Nov 2023 12:28:00 -0800 Subject: [PATCH 266/386] Update the API comments of NewRandomRWFile() (#11820) Summary: Env::NewRandomRWFile() will not create the file if it doesn't exist, as the test saying https://github.com/facebook/rocksdb/blob/main/env/env_test.cc#L2208. This patch correct the comments of Env::NewRandomRWFile(), it may mislead the developers who use rocksdb Env() as an utility. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11820 Reviewed By: ajkr Differential Revision: D50176707 Pulled By: jowlyzhang fbshipit-source-id: a6ee469f549360de8d551a4fe8517b4450df7b15 --- include/rocksdb/env.h | 2 +- include/rocksdb/file_system.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 63a161923..7b0220635 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -280,7 +280,7 @@ class Env : public Customizable { const EnvOptions& options); // Open `fname` for random read and write, if file doesn't exist the file - // will be created. On success, stores a pointer to the new file in + // will not be created. On success, stores a pointer to the new file in // *result and returns OK. On failure returns non-OK. // // The returned file will only be accessed by one thread at a time. diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 7e9d5d4fe..647aad6c9 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -391,7 +391,7 @@ class FileSystem : public Customizable { IODebugContext* dbg); // Open `fname` for random read and write, if file doesn't exist the file - // will be created. On success, stores a pointer to the new file in + // will not be created. On success, stores a pointer to the new file in // *result and returns OK. On failure returns non-OK. // // The returned file will only be accessed by one thread at a time. From 65cde19f40bd42ea9cd75469b70522e9f3001440 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 8 Nov 2023 13:28:43 -0800 Subject: [PATCH 267/386] Safer wrapper for std::atomic, use in HCC (#12051) Summary: See new atomic.h file comments for motivation. I have updated HyperClockCache to use the new atomic wrapper, fixing a few cases where an implicit conversion was accidentally used and therefore mixing std::memory_order_seq_cst where release/acquire ordering (or relaxed) was intended. There probably wasn't a real bug because I think all the cases happened to be in single-threaded contexts like constructors/destructors or statistical ops like `GetCapacity()` that don't need any particular ordering constraints. Recommended follow-up: * Replace other uses of std::atomic to help keep them safe from bugs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12051 Test Plan: Did some local correctness stress testing with cache_bench. Also triggered 15 runs of fbcode_blackbox_crash_test and saw no related failures (just 3 failures in ~CacheWithSecondaryAdapter(), already known) No performance difference seen before & after running simultaneously: ``` (while ./cache_bench -cache_type=fixed_hyper_clock_cache -populate_cache=0 -cache_size=3000000000 -ops_per_thread=500000 -threads=12 -histograms=0 2>&1 | grep parallel; do :; done) | awk '{ s += $3; c++; print "Avg time: " (s/c);}' ``` ... for both fixed_hcc and auto_hcc. Reviewed By: jowlyzhang Differential Revision: D51090518 Pulled By: pdillinger fbshipit-source-id: eeb324facb3185584603f9ea0c4de6f32919a2d7 --- cache/clock_cache.cc | 491 +++++++++++++++++++------------------------ cache/clock_cache.h | 59 +++--- util/atomic.h | 111 ++++++++++ 3 files changed, 359 insertions(+), 302 deletions(-) create mode 100644 util/atomic.h diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index 1d0e48937..fd330d90d 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -64,11 +64,11 @@ inline uint64_t GetInitialCountdown(Cache::Priority priority) { inline void MarkEmpty(ClockHandle& h) { #ifndef NDEBUG // Mark slot as empty, with assertion - uint64_t meta = h.meta.exchange(0, std::memory_order_release); + uint64_t meta = h.meta.Exchange(0); assert(meta >> ClockHandle::kStateShift == ClockHandle::kStateConstruction); #else // Mark slot as empty - h.meta.store(0, std::memory_order_release); + h.meta.Store(0); #endif } @@ -88,8 +88,7 @@ inline void Unref(const ClockHandle& h, uint64_t count = 1) { // Pretend we never took the reference // WART: there's a tiny chance we release last ref to invisible // entry here. If that happens, we let eviction take care of it. - uint64_t old_meta = h.meta.fetch_sub(ClockHandle::kAcquireIncrement * count, - std::memory_order_release); + uint64_t old_meta = h.meta.FetchSub(ClockHandle::kAcquireIncrement * count); assert(GetRefcount(old_meta) != 0); (void)old_meta; } @@ -101,12 +100,12 @@ inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { // In AutoHCC, our eviction process follows the chain structure, so we // should ensure that we see the latest state of each entry, at least for // assertion checking. - meta = h.meta.load(std::memory_order_acquire); + meta = h.meta.Load(); } else { // In FixedHCC, our eviction process is a simple iteration without regard // to probing order, displacements, etc., so it doesn't matter if we see // somewhat stale data. - meta = h.meta.load(std::memory_order_relaxed); + meta = h.meta.LoadRelaxed(); } if (((meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit) == @@ -140,16 +139,14 @@ inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { (meta & ClockHandle::kHitBitMask) | (new_count << ClockHandle::kReleaseCounterShift) | (new_count << ClockHandle::kAcquireCounterShift); - h.meta.compare_exchange_strong(meta, new_meta, std::memory_order_relaxed); + h.meta.CasStrongRelaxed(meta, new_meta); return false; } // Otherwise, remove entry (either unreferenced invisible or // unreferenced and expired visible). - if (h.meta.compare_exchange_strong(meta, - (uint64_t{ClockHandle::kStateConstruction} - << ClockHandle::kStateShift) | - (meta & ClockHandle::kHitBitMask), - std::memory_order_acquire)) { + if (h.meta.CasStrong(meta, (uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift) | + (meta & ClockHandle::kHitBitMask))) { // Took ownership. return true; } else { @@ -214,7 +211,7 @@ inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { // motivates only checking for release counter in high state, not both in high // state.) inline void CorrectNearOverflow(uint64_t old_meta, - std::atomic& meta) { + AcqRelAtomic& meta) { // We clear both top-most counter bits at the same time. constexpr uint64_t kCounterTopBit = uint64_t{1} << (ClockHandle::kCounterNumBits - 1); @@ -228,7 +225,7 @@ inline void CorrectNearOverflow(uint64_t old_meta, << ClockHandle::kReleaseCounterShift; if (UNLIKELY(old_meta & kCheckBits)) { - meta.fetch_and(~kClearBits, std::memory_order_relaxed); + meta.FetchAndRelaxed(~kClearBits); } } @@ -237,9 +234,8 @@ inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h, assert(*already_matches == false); // Optimistically transition the slot from "empty" to // "under construction" (no effect on other states) - uint64_t old_meta = h.meta.fetch_or( - uint64_t{ClockHandle::kStateOccupiedBit} << ClockHandle::kStateShift, - std::memory_order_acq_rel); + uint64_t old_meta = h.meta.FetchOr(uint64_t{ClockHandle::kStateOccupiedBit} + << ClockHandle::kStateShift); uint64_t old_state = old_meta >> ClockHandle::kStateShift; if (old_state == ClockHandle::kStateEmpty) { @@ -255,16 +251,14 @@ inline bool BeginSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h, // refs for initial countdown, so that we boost the clock state if // this is a match. old_meta = - h.meta.fetch_add(ClockHandle::kAcquireIncrement * initial_countdown, - std::memory_order_acq_rel); + h.meta.FetchAdd(ClockHandle::kAcquireIncrement * initial_countdown); // Like Lookup if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) { // Acquired a read reference if (h.hashed_key == proto.hashed_key) { // Match. Release in a way that boosts the clock state old_meta = - h.meta.fetch_add(ClockHandle::kReleaseIncrement * initial_countdown, - std::memory_order_acq_rel); + h.meta.FetchAdd(ClockHandle::kReleaseIncrement * initial_countdown); // Correct for possible (but rare) overflow CorrectNearOverflow(old_meta, h.meta); // Insert detached instead (only if return handle needed) @@ -303,12 +297,12 @@ inline void FinishSlotInsert(const ClockHandleBasicData& proto, ClockHandle& h, #ifndef NDEBUG // Save the state transition, with assertion - uint64_t old_meta = h.meta.exchange(new_meta, std::memory_order_release); + uint64_t old_meta = h.meta.Exchange(new_meta); assert(old_meta >> ClockHandle::kStateShift == ClockHandle::kStateConstruction); #else // Save the state transition - h.meta.store(new_meta, std::memory_order_release); + h.meta.Store(new_meta); #endif } @@ -334,14 +328,13 @@ void ConstApplyToEntriesRange(const Func& func, const HandleImpl* begin, for (const HandleImpl* h = begin; h < end; ++h) { // Note: to avoid using compare_exchange, we have to be extra careful. - uint64_t old_meta = h->meta.load(std::memory_order_relaxed); + uint64_t old_meta = h->meta.LoadRelaxed(); // Check if it's an entry visible to lookups if ((old_meta >> ClockHandle::kStateShift) & check_state_mask) { // Increment acquire counter. Note: it's possible that the entry has // completely changed since we loaded old_meta, but incrementing acquire // count is always safe. (Similar to optimistic Lookup here.) - old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + old_meta = h->meta.FetchAdd(ClockHandle::kAcquireIncrement); // Check whether we actually acquired a reference. if ((old_meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit) { @@ -383,10 +376,9 @@ HandleImpl* BaseClockTable::StandaloneInsert( uint64_t meta = uint64_t{ClockHandle::kStateInvisible} << ClockHandle::kStateShift; meta |= uint64_t{1} << ClockHandle::kAcquireCounterShift; - h->meta.store(meta, std::memory_order_release); + h->meta.Store(meta); // Keep track of how much of usage is standalone - standalone_usage_.fetch_add(proto.GetTotalCharge(), - std::memory_order_relaxed); + standalone_usage_.FetchAddRelaxed(proto.GetTotalCharge()); return h; } @@ -417,7 +409,7 @@ typename Table::HandleImpl* BaseClockTable::CreateStandalone( /*need_evict_for_occupancy=*/false, state); if (!success) { // Force the issue - usage_.fetch_add(total_charge, std::memory_order_relaxed); + usage_.FetchAddRelaxed(total_charge); } } @@ -434,7 +426,7 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( std::to_string(total_charge) + " > " + std::to_string(capacity)); } // Grab any available capacity, and free up any more required. - size_t old_usage = usage_.load(std::memory_order_relaxed); + size_t old_usage = usage_.LoadRelaxed(); size_t new_usage; do { new_usage = std::min(capacity, old_usage + total_charge); @@ -442,8 +434,7 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( // No change needed break; } - } while (!usage_.compare_exchange_weak(old_usage, new_usage, - std::memory_order_relaxed)); + } while (!usage_.CasWeakRelaxed(old_usage, new_usage)); // How much do we need to evict then? size_t need_evict_charge = old_usage + total_charge - new_usage; size_t request_evict_charge = need_evict_charge; @@ -454,17 +445,15 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( if (request_evict_charge > 0) { EvictionData data; static_cast(this)->Evict(request_evict_charge, state, &data); - occupancy_.fetch_sub(data.freed_count, std::memory_order_release); + occupancy_.FetchSub(data.freed_count); if (LIKELY(data.freed_charge > need_evict_charge)) { assert(data.freed_count > 0); // Evicted more than enough - usage_.fetch_sub(data.freed_charge - need_evict_charge, - std::memory_order_relaxed); + usage_.FetchSubRelaxed(data.freed_charge - need_evict_charge); } else if (data.freed_charge < need_evict_charge || (UNLIKELY(need_evict_for_occupancy) && data.freed_count == 0)) { // Roll back to old usage minus evicted - usage_.fetch_sub(data.freed_charge + (new_usage - old_usage), - std::memory_order_relaxed); + usage_.FetchSubRelaxed(data.freed_charge + (new_usage - old_usage)); if (data.freed_charge < need_evict_charge) { return Status::MemoryLimit( "Insert failed because unable to evict entries to stay within " @@ -495,7 +484,7 @@ inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( // charge. Thus, we should evict some extra if it's not a signifcant // portion of the shard capacity. This can have the side benefit of // involving fewer threads in eviction. - size_t old_usage = usage_.load(std::memory_order_relaxed); + size_t old_usage = usage_.LoadRelaxed(); size_t need_evict_charge; // NOTE: if total_charge > old_usage, there isn't yet enough to evict // `total_charge` amount. Even if we only try to evict `old_usage` amount, @@ -529,13 +518,13 @@ inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( return false; } else { // Update occupancy for evictions - occupancy_.fetch_sub(data.freed_count, std::memory_order_release); + occupancy_.FetchSub(data.freed_count); } } // Track new usage even if we weren't able to evict enough - usage_.fetch_add(total_charge - data.freed_charge, std::memory_order_relaxed); + usage_.FetchAddRelaxed(total_charge - data.freed_charge); // No underflow - assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); + assert(usage_.LoadRelaxed() < SIZE_MAX / 2); // Success return true; } @@ -549,11 +538,11 @@ void BaseClockTable::TrackAndReleaseEvictedEntry( if (eviction_callback_) { // For key reconstructed from hash UniqueId64x2 unhashed; - took_value_ownership = eviction_callback_( - ClockCacheShard::ReverseHash( - h->GetHash(), &unhashed, hash_seed_), - reinterpret_cast(h), - h->meta.load(std::memory_order_relaxed) & ClockHandle::kHitBitMask); + took_value_ownership = + eviction_callback_(ClockCacheShard::ReverseHash( + h->GetHash(), &unhashed, hash_seed_), + reinterpret_cast(h), + h->meta.LoadRelaxed() & ClockHandle::kHitBitMask); } if (!took_value_ownership) { h->FreeData(allocator_); @@ -574,7 +563,7 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, // Do we have the available occupancy? Optimistically assume we do // and deal with it if we don't. - size_t old_occupancy = occupancy_.fetch_add(1, std::memory_order_acquire); + size_t old_occupancy = occupancy_.FetchAdd(1); // Whether we over-committed and need an eviction to make up for it bool need_evict_for_occupancy = !derived.GrowIfNeeded(old_occupancy + 1, state); @@ -588,7 +577,7 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, total_charge, capacity, need_evict_for_occupancy, state); if (!s.ok()) { // Revert occupancy - occupancy_.fetch_sub(1, std::memory_order_relaxed); + occupancy_.FetchSubRelaxed(1); return s; } } else { @@ -597,7 +586,7 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, total_charge, capacity, need_evict_for_occupancy, state); if (!success) { // Revert occupancy - occupancy_.fetch_sub(1, std::memory_order_relaxed); + occupancy_.FetchSubRelaxed(1); if (handle == nullptr) { // Don't insert the entry but still return ok, as if the entry // inserted into cache and evicted immediately. @@ -605,7 +594,7 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, return Status::OK(); } else { // Need to track usage of fallback standalone insert - usage_.fetch_add(total_charge, std::memory_order_relaxed); + usage_.FetchAddRelaxed(total_charge); use_standalone_insert = true; } } @@ -635,13 +624,13 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, } // Not inserted // Revert occupancy - occupancy_.fetch_sub(1, std::memory_order_relaxed); + occupancy_.FetchSubRelaxed(1); // Maybe fall back on standalone insert if (handle == nullptr) { // Revert usage - usage_.fetch_sub(total_charge, std::memory_order_relaxed); + usage_.FetchSubRelaxed(total_charge); // No underflow - assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); + assert(usage_.LoadRelaxed() < SIZE_MAX / 2); // As if unrefed entry immdiately evicted proto.FreeData(allocator_); return Status::OK(); @@ -665,8 +654,7 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, void BaseClockTable::Ref(ClockHandle& h) { // Increment acquire counter - uint64_t old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + uint64_t old_meta = h.meta.FetchAdd(ClockHandle::kAcquireIncrement); assert((old_meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit); @@ -678,8 +666,7 @@ void BaseClockTable::Ref(ClockHandle& h) { #ifndef NDEBUG void BaseClockTable::TEST_RefN(ClockHandle& h, size_t n) { // Increment acquire counter - uint64_t old_meta = h.meta.fetch_add(n * ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + uint64_t old_meta = h.meta.FetchAdd(n * ClockHandle::kAcquireIncrement); assert((old_meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit); @@ -691,8 +678,8 @@ void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) { // Like n-1 Releases, but assumes one more will happen in the caller to take // care of anything like erasing an unreferenced, invisible entry. - uint64_t old_meta = h->meta.fetch_add( - (n - 1) * ClockHandle::kReleaseIncrement, std::memory_order_acquire); + uint64_t old_meta = + h->meta.FetchAdd((n - 1) * ClockHandle::kReleaseIncrement); assert((old_meta >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit); (void)old_meta; @@ -715,7 +702,7 @@ FixedHyperClockTable::FixedHyperClockTable( array_(new HandleImpl[size_t{1} << length_bits_]) { if (metadata_charge_policy == CacheMetadataChargePolicy::kFullChargeCacheMetadata) { - usage_ += size_t{GetTableSize()} * sizeof(HandleImpl); + usage_.FetchAddRelaxed(size_t{GetTableSize()} * sizeof(HandleImpl)); } static_assert(sizeof(HandleImpl) == 64U, @@ -727,13 +714,13 @@ FixedHyperClockTable::~FixedHyperClockTable() { // in the table. for (size_t i = 0; i < GetTableSize(); i++) { HandleImpl& h = array_[i]; - switch (h.meta >> ClockHandle::kStateShift) { + switch (h.meta.LoadRelaxed() >> ClockHandle::kStateShift) { case ClockHandle::kStateEmpty: // noop break; case ClockHandle::kStateInvisible: // rare but possible case ClockHandle::kStateVisible: - assert(GetRefcount(h.meta) == 0); + assert(GetRefcount(h.meta.LoadRelaxed()) == 0); h.FreeData(allocator_); #ifndef NDEBUG Rollback(h.hashed_key, &h); @@ -749,13 +736,13 @@ FixedHyperClockTable::~FixedHyperClockTable() { #ifndef NDEBUG for (size_t i = 0; i < GetTableSize(); i++) { - assert(array_[i].displacements.load() == 0); + assert(array_[i].displacements.LoadRelaxed() == 0); } #endif - assert(usage_.load() == 0 || - usage_.load() == size_t{GetTableSize()} * sizeof(HandleImpl)); - assert(occupancy_ == 0); + assert(usage_.LoadRelaxed() == 0 || + usage_.LoadRelaxed() == size_t{GetTableSize()} * sizeof(HandleImpl)); + assert(occupancy_.LoadRelaxed() == 0); } void FixedHyperClockTable::StartInsert(InsertState&) {} @@ -789,7 +776,7 @@ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::DoInsert( // Search is ending. Roll back displacements Rollback(proto.hashed_key, h); } else { - h->displacements.fetch_add(1, std::memory_order_relaxed); + h->displacements.FetchAddRelaxed(1); } }); if (already_matches) { @@ -818,13 +805,13 @@ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup( [&](HandleImpl* h) { // Mostly branch-free version (similar performance) /* - uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, + uint64_t old_meta = h->meta.FetchAdd(ClockHandle::kAcquireIncrement, std::memory_order_acquire); bool Shareable = (old_meta >> (ClockHandle::kStateShift + 1)) & 1U; bool visible = (old_meta >> ClockHandle::kStateShift) & 1U; bool match = (h->key == key) & visible; - h->meta.fetch_sub(static_cast(Shareable & !match) << - ClockHandle::kAcquireCounterShift, std::memory_order_release); return + h->meta.FetchSub(static_cast(Shareable & !match) << + ClockHandle::kAcquireCounterShift); return match; */ // Optimistic lookup should pay off when the table is relatively @@ -832,15 +819,14 @@ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup( constexpr bool kOptimisticLookup = true; uint64_t old_meta; if (!kOptimisticLookup) { - old_meta = h->meta.load(std::memory_order_acquire); + old_meta = h->meta.Load(); if ((old_meta >> ClockHandle::kStateShift) != ClockHandle::kStateVisible) { return false; } } // (Optimistically) increment acquire counter - old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + old_meta = h->meta.FetchAdd(ClockHandle::kAcquireIncrement); // Check if it's an entry visible to lookups if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) { @@ -849,8 +835,7 @@ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup( // Match // Update the hit bit if (eviction_callback_) { - h->meta.fetch_or(uint64_t{1} << ClockHandle::kHitBitShift, - std::memory_order_relaxed); + h->meta.FetchOrRelaxed(uint64_t{1} << ClockHandle::kHitBitShift); } return true; } else { @@ -869,9 +854,7 @@ FixedHyperClockTable::HandleImpl* FixedHyperClockTable::Lookup( } return false; }, - [&](HandleImpl* h) { - return h->displacements.load(std::memory_order_relaxed) == 0; - }, + [&](HandleImpl* h) { return h->displacements.LoadRelaxed() == 0; }, [&](HandleImpl* /*h*/, bool /*is_last*/) {}); return e; @@ -887,12 +870,10 @@ bool FixedHyperClockTable::Release(HandleImpl* h, bool useful, uint64_t old_meta; if (useful) { // Increment release counter to indicate was used - old_meta = h->meta.fetch_add(ClockHandle::kReleaseIncrement, - std::memory_order_release); + old_meta = h->meta.FetchAdd(ClockHandle::kReleaseIncrement); } else { // Decrement acquire counter to pretend it never happened - old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + old_meta = h->meta.FetchSub(ClockHandle::kAcquireIncrement); } assert((old_meta >> ClockHandle::kStateShift) & @@ -908,7 +889,7 @@ bool FixedHyperClockTable::Release(HandleImpl* h, bool useful, // FIXME: There's a chance here that another thread could replace this // entry and we end up erasing the wrong one. - // Update for last fetch_add op + // Update for last FetchAdd op if (useful) { old_meta += ClockHandle::kReleaseIncrement; } else { @@ -930,18 +911,17 @@ bool FixedHyperClockTable::Release(HandleImpl* h, bool useful, // Note that there's a small chance that we release, another thread // replaces this entry with another, reaches zero refs, and then we end // up erasing that other entry. That's an acceptable risk / imprecision. - } while (!h->meta.compare_exchange_weak( - old_meta, - uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift, - std::memory_order_acquire)); + } while ( + !h->meta.CasWeak(old_meta, uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift)); // Took ownership size_t total_charge = h->GetTotalCharge(); if (UNLIKELY(h->IsStandalone())) { h->FreeData(allocator_); // Delete standalone handle delete h; - standalone_usage_.fetch_sub(total_charge, std::memory_order_relaxed); - usage_.fetch_sub(total_charge, std::memory_order_relaxed); + standalone_usage_.FetchSubRelaxed(total_charge); + usage_.FetchSubRelaxed(total_charge); } else { Rollback(h->hashed_key, h); FreeDataMarkEmpty(*h, allocator_); @@ -973,8 +953,7 @@ void FixedHyperClockTable::Erase(const UniqueId64x2& hashed_key) { [&](HandleImpl* h) { // Could be multiple entries in rare cases. Erase them all. // Optimistically increment acquire counter - uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + uint64_t old_meta = h->meta.FetchAdd(ClockHandle::kAcquireIncrement); // Check if it's an entry visible to lookups if ((old_meta >> ClockHandle::kStateShift) == ClockHandle::kStateVisible) { @@ -982,9 +961,8 @@ void FixedHyperClockTable::Erase(const UniqueId64x2& hashed_key) { if (h->hashed_key == hashed_key) { // Match. Set invisible. old_meta = - h->meta.fetch_and(~(uint64_t{ClockHandle::kStateVisibleBit} - << ClockHandle::kStateShift), - std::memory_order_acq_rel); + h->meta.FetchAnd(~(uint64_t{ClockHandle::kStateVisibleBit} + << ClockHandle::kStateShift)); // Apply update to local copy old_meta &= ~(uint64_t{ClockHandle::kStateVisibleBit} << ClockHandle::kStateShift); @@ -996,11 +974,9 @@ void FixedHyperClockTable::Erase(const UniqueId64x2& hashed_key) { // Pretend we never took the reference Unref(*h); break; - } else if (h->meta.compare_exchange_weak( - old_meta, - uint64_t{ClockHandle::kStateConstruction} - << ClockHandle::kStateShift, - std::memory_order_acq_rel)) { + } else if (h->meta.CasWeak( + old_meta, uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift)) { // Took ownership assert(hashed_key == h->hashed_key); size_t total_charge = h->GetTotalCharge(); @@ -1026,9 +1002,7 @@ void FixedHyperClockTable::Erase(const UniqueId64x2& hashed_key) { } return false; }, - [&](HandleImpl* h) { - return h->displacements.load(std::memory_order_relaxed) == 0; - }, + [&](HandleImpl* h) { return h->displacements.LoadRelaxed() == 0; }, [&](HandleImpl* /*h*/, bool /*is_last*/) {}); } @@ -1036,14 +1010,12 @@ void FixedHyperClockTable::EraseUnRefEntries() { for (size_t i = 0; i <= this->length_bits_mask_; i++) { HandleImpl& h = array_[i]; - uint64_t old_meta = h.meta.load(std::memory_order_relaxed); + uint64_t old_meta = h.meta.LoadRelaxed(); if (old_meta & (uint64_t{ClockHandle::kStateShareableBit} << ClockHandle::kStateShift) && GetRefcount(old_meta) == 0 && - h.meta.compare_exchange_strong(old_meta, - uint64_t{ClockHandle::kStateConstruction} - << ClockHandle::kStateShift, - std::memory_order_acquire)) { + h.meta.CasStrong(old_meta, uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift)) { // Took ownership size_t total_charge = h.GetTotalCharge(); Rollback(h.hashed_key, &h); @@ -1094,17 +1066,17 @@ inline void FixedHyperClockTable::Rollback(const UniqueId64x2& hashed_key, size_t current = ModTableSize(hashed_key[1]); size_t increment = static_cast(hashed_key[0]) | 1U; while (&array_[current] != h) { - array_[current].displacements.fetch_sub(1, std::memory_order_relaxed); + array_[current].displacements.FetchSubRelaxed(1); current = ModTableSize(current + increment); } } inline void FixedHyperClockTable::ReclaimEntryUsage(size_t total_charge) { - auto old_occupancy = occupancy_.fetch_sub(1U, std::memory_order_release); + auto old_occupancy = occupancy_.FetchSub(1U); (void)old_occupancy; // No underflow assert(old_occupancy > 0); - auto old_usage = usage_.fetch_sub(total_charge, std::memory_order_relaxed); + auto old_usage = usage_.FetchSubRelaxed(total_charge); (void)old_usage; // No underflow assert(old_usage >= total_charge); @@ -1119,8 +1091,7 @@ inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&, constexpr size_t step_size = 4; // First (concurrent) increment clock pointer - uint64_t old_clock_pointer = - clock_pointer_.fetch_add(step_size, std::memory_order_relaxed); + uint64_t old_clock_pointer = clock_pointer_.FetchAddRelaxed(step_size); // Cap the eviction effort at this thread (along with those operating in // parallel) circling through the whole structure kMaxCountdown times. @@ -1149,8 +1120,7 @@ inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&, } // Advance clock pointer (concurrently) - old_clock_pointer = - clock_pointer_.fetch_add(step_size, std::memory_order_relaxed); + old_clock_pointer = clock_pointer_.FetchAddRelaxed(step_size); } } @@ -1167,7 +1137,8 @@ ClockCacheShard

    ::ClockCacheShard( capacity_(capacity), strict_capacity_limit_(strict_capacity_limit) { // Initial charge metadata should not exceed capacity - assert(table_.GetUsage() <= capacity_ || capacity_ < sizeof(HandleImpl)); + assert(table_.GetUsage() <= capacity_.LoadRelaxed() || + capacity_.LoadRelaxed() < sizeof(HandleImpl)); } template @@ -1233,15 +1204,14 @@ int FixedHyperClockTable::CalcHashBits( template void ClockCacheShard
    ::SetCapacity(size_t capacity) { - capacity_.store(capacity, std::memory_order_relaxed); + capacity_.StoreRelaxed(capacity); // next Insert will take care of any necessary evictions } template void ClockCacheShard
    ::SetStrictCapacityLimit( bool strict_capacity_limit) { - strict_capacity_limit_.store(strict_capacity_limit, - std::memory_order_relaxed); + strict_capacity_limit_.StoreRelaxed(strict_capacity_limit); // next Insert will take care of any necessary evictions } @@ -1261,9 +1231,9 @@ Status ClockCacheShard
    ::Insert(const Slice& key, proto.value = value; proto.helper = helper; proto.total_charge = charge; - return table_.template Insert
    ( - proto, handle, priority, capacity_.load(std::memory_order_relaxed), - strict_capacity_limit_.load(std::memory_order_relaxed)); + return table_.template Insert
    (proto, handle, priority, + capacity_.LoadRelaxed(), + strict_capacity_limit_.LoadRelaxed()); } template @@ -1279,8 +1249,8 @@ typename Table::HandleImpl* ClockCacheShard
    ::CreateStandalone( proto.helper = helper; proto.total_charge = charge; return table_.template CreateStandalone
    ( - proto, capacity_.load(std::memory_order_relaxed), - strict_capacity_limit_.load(std::memory_order_relaxed), allow_uncharged); + proto, capacity_.LoadRelaxed(), strict_capacity_limit_.LoadRelaxed(), + allow_uncharged); } template @@ -1349,7 +1319,7 @@ size_t ClockCacheShard
    ::GetStandaloneUsage() const { template size_t ClockCacheShard
    ::GetCapacity() const { - return capacity_; + return capacity_.LoadRelaxed(); } template @@ -1365,7 +1335,7 @@ size_t ClockCacheShard
    ::GetPinnedUsage() const { metadata_charge_policy_ == kFullChargeCacheMetadata; ConstApplyToEntriesRange( [&table_pinned_usage, charge_metadata](const HandleImpl& h) { - uint64_t meta = h.meta.load(std::memory_order_relaxed); + uint64_t meta = h.meta.LoadRelaxed(); uint64_t refcount = GetRefcount(meta); // Holding one ref for ConstApplyToEntriesRange assert(refcount > 0); @@ -1470,8 +1440,7 @@ void AddShardEvaluation(const FixedHyperClockCache::Shard& shard, } bool IsSlotOccupied(const ClockHandle& h) { - return (h.meta.load(std::memory_order_relaxed) >> ClockHandle::kStateShift) != - 0; + return (h.meta.LoadRelaxed() >> ClockHandle::kStateShift) != 0; } } // namespace @@ -1749,8 +1718,7 @@ inline bool MatchAndRef(const UniqueId64x2* hashed_key, const ClockHandle& h, uint64_t old_meta; // (Optimistically) increment acquire counter. - old_meta = h.meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + old_meta = h.meta.FetchAdd(ClockHandle::kAcquireIncrement); // Check if it's a referencable (sharable) entry if ((old_meta & (uint64_t{ClockHandle::kStateShareableBit} << ClockHandle::kStateShift)) == 0) { @@ -1801,8 +1769,7 @@ void UpgradeShiftsOnRange(AutoHyperClockTable::HandleImpl* arr, (void)new_shift; using HandleImpl = AutoHyperClockTable::HandleImpl; for (;;) { - uint64_t next_with_shift = - arr[frontier].chain_next_with_shift.load(std::memory_order_acquire); + uint64_t next_with_shift = arr[frontier].chain_next_with_shift.Load(); assert(GetShiftFromNextWithShift(next_with_shift) == old_shift); if (next_with_shift == stop_before_or_new_tail) { // Stopping at entry with pointer matching "stop before" @@ -1812,15 +1779,13 @@ void UpgradeShiftsOnRange(AutoHyperClockTable::HandleImpl* arr, if (HandleImpl::IsEnd(next_with_shift)) { // Also update tail to new tail assert(HandleImpl::IsEnd(stop_before_or_new_tail)); - arr[frontier].chain_next_with_shift.store(stop_before_or_new_tail, - std::memory_order_release); + arr[frontier].chain_next_with_shift.Store(stop_before_or_new_tail); // Mark nothing left to upgrade frontier = SIZE_MAX; return; } // Next is another entry to process, so upgrade and advance frontier - arr[frontier].chain_next_with_shift.fetch_add(1U, - std::memory_order_acq_rel); + arr[frontier].chain_next_with_shift.FetchAdd(1U); assert(GetShiftFromNextWithShift(next_with_shift + 1) == new_shift); frontier = GetNextFromNextWithShift(next_with_shift); } @@ -1841,7 +1806,7 @@ size_t CalcOccupancyLimit(size_t used_length) { // while holding the lock--or wanting to should it become non-empty. // // The flag bits on the head are such that the head cannot be locked if it -// is an empty chain, so that a "blind" fetch_or will try to lock a non-empty +// is an empty chain, so that a "blind" FetchOr will try to lock a non-empty // chain but have no effect on an empty chain. When a potential rewrite // operation see an empty head pointer, there is no need to lock as the // operation is a no-op. However, there are some cases such as CAS-update @@ -1852,14 +1817,14 @@ class AutoHyperClockTable::ChainRewriteLock { using HandleImpl = AutoHyperClockTable::HandleImpl; // Acquire lock if head of h is not an end - explicit ChainRewriteLock(HandleImpl* h, std::atomic& yield_count) + explicit ChainRewriteLock(HandleImpl* h, RelaxedAtomic& yield_count) : head_ptr_(&h->head_next_with_shift) { Acquire(yield_count); } // RAII wrap existing lock held (or end) explicit ChainRewriteLock(HandleImpl* h, - std::atomic& /*yield_count*/, + RelaxedAtomic& /*yield_count*/, uint64_t already_locked_or_end) : head_ptr_(&h->head_next_with_shift) { saved_head_ = already_locked_or_end; @@ -1870,14 +1835,13 @@ class AutoHyperClockTable::ChainRewriteLock { ~ChainRewriteLock() { if (!IsEnd()) { // Release lock - uint64_t old = head_ptr_->fetch_and(~HandleImpl::kHeadLocked, - std::memory_order_release); + uint64_t old = head_ptr_->FetchAnd(~HandleImpl::kHeadLocked); (void)old; assert((old & HandleImpl::kNextEndFlags) == HandleImpl::kHeadLocked); } } - void Reset(HandleImpl* h, std::atomic& yield_count) { + void Reset(HandleImpl* h, RelaxedAtomic& yield_count) { this->~ChainRewriteLock(); new (this) ChainRewriteLock(h, yield_count); } @@ -1885,11 +1849,11 @@ class AutoHyperClockTable::ChainRewriteLock { // Expected current state, assuming no parallel updates. uint64_t GetSavedHead() const { return saved_head_; } - bool CasUpdate(uint64_t next_with_shift, std::atomic& yield_count) { + bool CasUpdate(uint64_t next_with_shift, + RelaxedAtomic& yield_count) { uint64_t new_head = next_with_shift | HandleImpl::kHeadLocked; uint64_t expected = GetSavedHead(); - bool success = head_ptr_->compare_exchange_strong( - expected, new_head, std::memory_order_acq_rel); + bool success = head_ptr_->CasStrong(expected, new_head); if (success) { // Ensure IsEnd() is kept up-to-date, including for dtor saved_head_ = new_head; @@ -1917,11 +1881,10 @@ class AutoHyperClockTable::ChainRewriteLock { bool IsEnd() const { return HandleImpl::IsEnd(saved_head_); } private: - void Acquire(std::atomic& yield_count) { + void Acquire(RelaxedAtomic& yield_count) { for (;;) { // Acquire removal lock on the chain - uint64_t old_head = head_ptr_->fetch_or(HandleImpl::kHeadLocked, - std::memory_order_acq_rel); + uint64_t old_head = head_ptr_->FetchOr(HandleImpl::kHeadLocked); if ((old_head & HandleImpl::kNextEndFlags) != HandleImpl::kHeadLocked) { // Either acquired the lock or lock not needed (end) assert((old_head & HandleImpl::kNextEndFlags) == 0 || @@ -1934,12 +1897,12 @@ class AutoHyperClockTable::ChainRewriteLock { // NOTE: one of the few yield-wait loops, which is rare enough in practice // for its performance to be insignificant. (E.g. using C++20 atomic // wait/notify would likely be worse because of wasted notify costs.) - yield_count.fetch_add(1, std::memory_order_relaxed); + yield_count.FetchAddRelaxed(1); std::this_thread::yield(); } } - std::atomic* head_ptr_; + AcqRelAtomic* head_ptr_; uint64_t saved_head_; }; @@ -1957,21 +1920,21 @@ AutoHyperClockTable::AutoHyperClockTable( metadata_charge_policy))), length_info_(UsedLengthToLengthInfo(GetStartingLength(capacity))), occupancy_limit_( - CalcOccupancyLimit(LengthInfoToUsedLength(length_info_.load()))), + CalcOccupancyLimit(LengthInfoToUsedLength(length_info_.Load()))), grow_frontier_(GetTableSize()), clock_pointer_mask_( - BottomNBits(UINT64_MAX, LengthInfoToMinShift(length_info_.load()))) { + BottomNBits(UINT64_MAX, LengthInfoToMinShift(length_info_.Load()))) { if (metadata_charge_policy == CacheMetadataChargePolicy::kFullChargeCacheMetadata) { // NOTE: ignoring page boundaries for simplicity - usage_ += size_t{GetTableSize()} * sizeof(HandleImpl); + usage_.FetchAddRelaxed(size_t{GetTableSize()} * sizeof(HandleImpl)); } static_assert(sizeof(HandleImpl) == 64U, "Expecting size / alignment with common cache line size"); // Populate head pointers - uint64_t length_info = length_info_.load(); + uint64_t length_info = length_info_.Load(); int min_shift = LengthInfoToMinShift(length_info); int max_shift = min_shift + 1; size_t major = uint64_t{1} << min_shift; @@ -1988,9 +1951,10 @@ AutoHyperClockTable::AutoHyperClockTable( size_t home; #endif if (major + i < used_length) { - array_[i].head_next_with_shift = MakeNextWithShiftEnd(i, max_shift); - array_[major + i].head_next_with_shift = - MakeNextWithShiftEnd(major + i, max_shift); + array_[i].head_next_with_shift.StoreRelaxed( + MakeNextWithShiftEnd(i, max_shift)); + array_[major + i].head_next_with_shift.StoreRelaxed( + MakeNextWithShiftEnd(major + i, max_shift)); #ifndef NDEBUG // Extra invariant checking GetHomeIndexAndShift(length_info, i, &home, &shift); assert(home == i); @@ -2000,7 +1964,8 @@ AutoHyperClockTable::AutoHyperClockTable( assert(shift == max_shift); #endif } else { - array_[i].head_next_with_shift = MakeNextWithShiftEnd(i, min_shift); + array_[i].head_next_with_shift.StoreRelaxed( + MakeNextWithShiftEnd(i, min_shift)); #ifndef NDEBUG // Extra invariant checking GetHomeIndexAndShift(length_info, i, &home, &shift); assert(home == i); @@ -2022,36 +1987,38 @@ AutoHyperClockTable::~AutoHyperClockTable() { // date. Probe for first unused slot to ensure we see the whole structure. size_t used_end = GetTableSize(); while (used_end < array_.Count() && - array_[used_end].head_next_with_shift.load() != + array_[used_end].head_next_with_shift.LoadRelaxed() != HandleImpl::kUnusedMarker) { used_end++; } #ifndef NDEBUG for (size_t i = used_end; i < array_.Count(); i++) { - assert(array_[i].head_next_with_shift.load() == 0); - assert(array_[i].chain_next_with_shift.load() == 0); - assert(array_[i].meta.load() == 0); + assert(array_[i].head_next_with_shift.LoadRelaxed() == 0); + assert(array_[i].chain_next_with_shift.LoadRelaxed() == 0); + assert(array_[i].meta.LoadRelaxed() == 0); } std::vector was_populated(used_end); std::vector was_pointed_to(used_end); #endif for (size_t i = 0; i < used_end; i++) { HandleImpl& h = array_[i]; - switch (h.meta >> ClockHandle::kStateShift) { + switch (h.meta.LoadRelaxed() >> ClockHandle::kStateShift) { case ClockHandle::kStateEmpty: // noop break; case ClockHandle::kStateInvisible: // rare but possible case ClockHandle::kStateVisible: - assert(GetRefcount(h.meta) == 0); + assert(GetRefcount(h.meta.LoadRelaxed()) == 0); h.FreeData(allocator_); #ifndef NDEBUG // Extra invariant checking - usage_.fetch_sub(h.total_charge, std::memory_order_relaxed); - occupancy_.fetch_sub(1U, std::memory_order_relaxed); + usage_.FetchSubRelaxed(h.total_charge); + occupancy_.FetchSubRelaxed(1U); was_populated[i] = true; - if (!HandleImpl::IsEnd(h.chain_next_with_shift)) { - assert((h.chain_next_with_shift & HandleImpl::kHeadLocked) == 0); - size_t next = GetNextFromNextWithShift(h.chain_next_with_shift); + if (!HandleImpl::IsEnd(h.chain_next_with_shift.LoadRelaxed())) { + assert((h.chain_next_with_shift.LoadRelaxed() & + HandleImpl::kHeadLocked) == 0); + size_t next = + GetNextFromNextWithShift(h.chain_next_with_shift.LoadRelaxed()); assert(!was_pointed_to[next]); was_pointed_to[next] = true; } @@ -2063,8 +2030,9 @@ AutoHyperClockTable::~AutoHyperClockTable() { break; } #ifndef NDEBUG // Extra invariant checking - if (!HandleImpl::IsEnd(h.head_next_with_shift)) { - size_t next = GetNextFromNextWithShift(h.head_next_with_shift); + if (!HandleImpl::IsEnd(h.head_next_with_shift.LoadRelaxed())) { + size_t next = + GetNextFromNextWithShift(h.head_next_with_shift.LoadRelaxed()); assert(!was_pointed_to[next]); was_pointed_to[next] = true; } @@ -2084,21 +2052,21 @@ AutoHyperClockTable::~AutoHyperClockTable() { #endif // Metadata charging only follows the published table size - assert(usage_.load() == 0 || - usage_.load() == GetTableSize() * sizeof(HandleImpl)); - assert(occupancy_ == 0); + assert(usage_.LoadRelaxed() == 0 || + usage_.LoadRelaxed() == GetTableSize() * sizeof(HandleImpl)); + assert(occupancy_.LoadRelaxed() == 0); } size_t AutoHyperClockTable::GetTableSize() const { - return LengthInfoToUsedLength(length_info_.load(std::memory_order_acquire)); + return LengthInfoToUsedLength(length_info_.Load()); } size_t AutoHyperClockTable::GetOccupancyLimit() const { - return occupancy_limit_.load(std::memory_order_acquire); + return occupancy_limit_.LoadRelaxed(); } void AutoHyperClockTable::StartInsert(InsertState& state) { - state.saved_length_info = length_info_.load(std::memory_order_acquire); + state.saved_length_info = length_info_.Load(); } // Because we have linked lists, bugs or even hardware errors can make it @@ -2138,8 +2106,7 @@ bool AutoHyperClockTable::GrowIfNeeded(size_t new_occupancy, // we might need to grow more than once to actually increase the occupancy // limit (due to max load factor < 1.0) - while (UNLIKELY(new_occupancy > - occupancy_limit_.load(std::memory_order_relaxed))) { + while (UNLIKELY(new_occupancy > occupancy_limit_.LoadRelaxed())) { // At this point we commit the thread to growing unless we've reached the // limit (returns false). if (!Grow(state)) { @@ -2152,12 +2119,12 @@ bool AutoHyperClockTable::GrowIfNeeded(size_t new_occupancy, bool AutoHyperClockTable::Grow(InsertState& state) { // Allocate the next grow slot - size_t grow_home = grow_frontier_.fetch_add(1, std::memory_order_relaxed); + size_t grow_home = grow_frontier_.FetchAddRelaxed(1); if (grow_home >= array_.Count()) { // Can't grow any more. // (Tested by unit test ClockCacheTest/Limits) // Make sure we don't overflow grow_frontier_ by reaching here repeatedly - grow_frontier_.store(array_.Count(), std::memory_order_relaxed); + grow_frontier_.StoreRelaxed(array_.Count()); return false; } #ifdef COERCE_CONTEXT_SWITCH @@ -2180,8 +2147,7 @@ bool AutoHyperClockTable::Grow(InsertState& state) { // chain rewrite lock has been released. size_t old_old_home = BottomNBits(grow_home, old_shift - 1); for (;;) { - uint64_t old_old_head = array_[old_old_home].head_next_with_shift.load( - std::memory_order_acquire); + uint64_t old_old_head = array_[old_old_home].head_next_with_shift.Load(); if (GetShiftFromNextWithShift(old_old_head) >= old_shift) { if ((old_old_head & HandleImpl::kNextEndFlags) != HandleImpl::kHeadLocked) { @@ -2190,7 +2156,7 @@ bool AutoHyperClockTable::Grow(InsertState& state) { } // NOTE: one of the few yield-wait loops, which is rare enough in practice // for its performance to be insignificant. - yield_count_.fetch_add(1, std::memory_order_relaxed); + yield_count_.FetchAddRelaxed(1); std::this_thread::yield(); } @@ -2231,7 +2197,7 @@ bool AutoHyperClockTable::Grow(InsertState& state) { // See call in Grow() void AutoHyperClockTable::CatchUpLengthInfoNoWait( size_t known_usable_grow_home) { - uint64_t current_length_info = length_info_.load(std::memory_order_acquire); + uint64_t current_length_info = length_info_.Load(); size_t published_usable_size = LengthInfoToUsedLength(current_length_info); while (published_usable_size <= known_usable_grow_home) { // For when published_usable_size was grow_home @@ -2244,9 +2210,8 @@ void AutoHyperClockTable::CatchUpLengthInfoNoWait( if (published_usable_size < known_usable_grow_home) { int old_shift = FloorLog2(next_usable_size - 1); size_t old_home = BottomNBits(published_usable_size, old_shift); - int shift = - GetShiftFromNextWithShift(array_[old_home].head_next_with_shift.load( - std::memory_order_acquire)); + int shift = GetShiftFromNextWithShift( + array_[old_home].head_next_with_shift.Load()); if (shift <= old_shift) { // Not ready break; @@ -2255,14 +2220,13 @@ void AutoHyperClockTable::CatchUpLengthInfoNoWait( // CAS update length_info_. This only moves in one direction, so if CAS // fails, someone else made progress like we are trying, and we can just // pick up the new value and keep going as appropriate. - if (length_info_.compare_exchange_strong( - current_length_info, next_length_info, std::memory_order_acq_rel)) { + if (length_info_.CasStrong(current_length_info, next_length_info)) { current_length_info = next_length_info; // Update usage_ if metadata charge policy calls for it if (metadata_charge_policy_ == CacheMetadataChargePolicy::kFullChargeCacheMetadata) { // NOTE: ignoring page boundaries for simplicity - usage_.fetch_add(sizeof(HandleImpl), std::memory_order_relaxed); + usage_.FetchAddRelaxed(sizeof(HandleImpl)); } } published_usable_size = LengthInfoToUsedLength(current_length_info); @@ -2270,13 +2234,12 @@ void AutoHyperClockTable::CatchUpLengthInfoNoWait( // After updating lengh_info_ we can update occupancy_limit_, // allowing for later operations to update it before us. - // Note: there is no std::atomic max operation, so we have to use a CAS loop - size_t old_occupancy_limit = occupancy_limit_.load(std::memory_order_acquire); + // Note: there is no AcqRelAtomic max operation, so we have to use a CAS loop + size_t old_occupancy_limit = occupancy_limit_.LoadRelaxed(); size_t new_occupancy_limit = CalcOccupancyLimit(published_usable_size); while (old_occupancy_limit < new_occupancy_limit) { - if (occupancy_limit_.compare_exchange_weak(old_occupancy_limit, - new_occupancy_limit, - std::memory_order_acq_rel)) { + if (occupancy_limit_.CasWeakRelaxed(old_occupancy_limit, + new_occupancy_limit)) { break; } } @@ -2401,8 +2364,7 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, // Used for locking the one chain below uint64_t saved_one_head; // One head has not been written to - assert(arr[grow_home].head_next_with_shift.load(std::memory_order_acquire) == - 0); + assert(arr[grow_home].head_next_with_shift.Load() == 0); // old_home will also the head of the new "zero chain" -- all entries in the // "from" chain whose next hash bit is 0. grow_home will be head of the new @@ -2473,8 +2435,7 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, } } - next_with_shift = - arr[cur].chain_next_with_shift.load(std::memory_order_acquire); + next_with_shift = arr[cur].chain_next_with_shift.Load(); } // Try to update heads for initial migration info @@ -2490,13 +2451,11 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, (one_chain_frontier != SIZE_MAX ? MakeNextWithShift(one_chain_frontier, new_shift) : MakeNextWithShiftEnd(grow_home, new_shift)); - arr[grow_home].head_next_with_shift.store(saved_one_head, - std::memory_order_release); + arr[grow_home].head_next_with_shift.Store(saved_one_head); // Make sure length_info_ hasn't been updated too early, as we're about // to make the change that makes it safe to update (e.g. in DoInsert()) - assert(LengthInfoToUsedLength( - length_info_.load(std::memory_order_acquire)) <= grow_home); + assert(LengthInfoToUsedLength(length_info_.Load()) <= grow_home); // Try to set zero's head. if (zero_head_lock.CasUpdate( @@ -2566,19 +2525,17 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, : /*&*/ one_chain_frontier; assert(cur != first_frontier); assert(GetNextFromNextWithShift( - arr[first_frontier].chain_next_with_shift.load( - std::memory_order_acquire)) == other_frontier); + arr[first_frontier].chain_next_with_shift.Load()) == + other_frontier); - uint64_t next_with_shift = - arr[cur].chain_next_with_shift.load(std::memory_order_acquire); + uint64_t next_with_shift = arr[cur].chain_next_with_shift.Load(); // Check for end of original chain if (HandleImpl::IsEnd(next_with_shift)) { // Can set upgraded tail on first chain uint64_t first_new_tail = MakeNextWithShiftEnd( chain_frontier_first == 0 ? old_home : grow_home, new_shift); - arr[first_frontier].chain_next_with_shift.store( - first_new_tail, std::memory_order_release); + arr[first_frontier].chain_next_with_shift.Store(first_new_tail); // And upgrade remainder of other chain uint64_t other_new_tail = MakeNextWithShiftEnd( chain_frontier_first != 0 ? old_home : grow_home, new_shift); @@ -2603,8 +2560,7 @@ void AutoHyperClockTable::SplitForGrow(size_t grow_home, size_t old_home, if (target_chain == chain_frontier_first) { // Found next entry to skip to on the first chain uint64_t skip_to = MakeNextWithShift(cur, new_shift); - arr[first_frontier].chain_next_with_shift.store( - skip_to, std::memory_order_release); + arr[first_frontier].chain_next_with_shift.Store(skip_to); first_frontier = cur; // Upgrade other chain up to entry before that one UpgradeShiftsOnRange(arr, other_frontier, next_with_shift, old_shift, @@ -2680,28 +2636,26 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, op_data->push_back(h); // Entries for eviction become purgeable purgeable = true; - assert((h->meta.load(std::memory_order_acquire) >> - ClockHandle::kStateShift) == ClockHandle::kStateConstruction); + assert((h->meta.Load() >> ClockHandle::kStateShift) == + ClockHandle::kStateConstruction); } } else { (void)op_data; - purgeable = ((h->meta.load(std::memory_order_acquire) >> - ClockHandle::kStateShift) & + purgeable = ((h->meta.Load() >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit) == 0; } } if (purgeable) { - assert((h->meta.load(std::memory_order_acquire) >> - ClockHandle::kStateShift) == ClockHandle::kStateConstruction); + assert((h->meta.Load() >> ClockHandle::kStateShift) == + ClockHandle::kStateConstruction); pending_purge = true; } else if (pending_purge) { if (prev_to_keep) { // Update chain next to skip purgeable entries - assert(prev_to_keep->chain_next_with_shift.load( - std::memory_order_acquire) == prev_to_keep_next_with_shift); - prev_to_keep->chain_next_with_shift.store(next_with_shift, - std::memory_order_release); + assert(prev_to_keep->chain_next_with_shift.Load() == + prev_to_keep_next_with_shift); + prev_to_keep->chain_next_with_shift.Store(next_with_shift); } else if (rewrite_lock.CasUpdate(next_with_shift, yield_count_)) { // Managed to update head without any parallel insertions } else { @@ -2733,7 +2687,7 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, } // Read chain pointer - next_with_shift = h->chain_next_with_shift.load(std::memory_order_acquire); + next_with_shift = h->chain_next_with_shift.Load(); #ifndef NDEBUG if (prev_to_keep == h) { prev_to_keep_next_with_shift = next_with_shift; @@ -2799,8 +2753,8 @@ void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { if constexpr (kIsPurge) { // Purge callers leave home unspecified, to be determined from key assert(home == SIZE_MAX); - GetHomeIndexAndShift(length_info_.load(std::memory_order_acquire), - (*op_data)[1], &home, &home_shift); + GetHomeIndexAndShift(length_info_.Load(), (*op_data)[1], &home, + &home_shift); assert(home_shift > 0); } else { assert(kIsClockUpdateChain); @@ -2987,8 +2941,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( start -= used_length; } if (i >= used_length) { - used_length = LengthInfoToUsedLength( - length_info_.load(std::memory_order_acquire)); + used_length = LengthInfoToUsedLength(length_info_.Load()); if (i >= used_length * 2) { // Cycling back should not happen unless there is enough random // churn in parallel that we happen to hit each slot at a time @@ -3020,8 +2973,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( // Might need to retry for (int i = 0;; ++i) { CHECK_TOO_MANY_ITERATIONS(i); - next_with_shift = - arr[home].head_next_with_shift.load(std::memory_order_acquire); + next_with_shift = arr[home].head_next_with_shift.Load(); int shift = GetShiftFromNextWithShift(next_with_shift); if (UNLIKELY(shift != home_shift)) { @@ -3035,8 +2987,8 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( assert((home & hash_bit_mask) == 0); // BEGIN leftover updates to length_info_ for Grow() size_t grow_home = home + hash_bit_mask; - assert(arr[grow_home].head_next_with_shift.load( - std::memory_order_acquire) != HandleImpl::kUnusedMarker); + assert(arr[grow_home].head_next_with_shift.Load() != + HandleImpl::kUnusedMarker); CatchUpLengthInfoNoWait(grow_home); // END leftover updates to length_info_ for Grow() home += proto.hashed_key[1] & hash_bit_mask; @@ -3060,10 +3012,9 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::DoInsert( chain_next_with_shift &= ~HandleImpl::kHeadLocked; } - arr[idx].chain_next_with_shift.store(chain_next_with_shift, - std::memory_order_release); - if (arr[home].head_next_with_shift.compare_exchange_weak( - next_with_shift, head_next_with_shift, std::memory_order_acq_rel)) { + arr[idx].chain_next_with_shift.Store(chain_next_with_shift); + if (arr[home].head_next_with_shift.CasWeak(next_with_shift, + head_next_with_shift)) { // Success if (!take_ref) { Unref(arr[idx]); @@ -3104,8 +3055,8 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( // home and head. size_t home; int home_shift; - GetHomeIndexAndShift(length_info_.load(std::memory_order_relaxed), - hashed_key[1], &home, &home_shift); + GetHomeIndexAndShift(length_info_.LoadRelaxed(), hashed_key[1], &home, + &home_shift); assert(home_shift > 0); // The full Lookup algorithm however is not great for hot path efficiency, @@ -3127,7 +3078,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( // of a loop as possible. HandleImpl* const arr = array_.Get(); - uint64_t next_with_shift = arr[home].head_next_with_shift; + uint64_t next_with_shift = arr[home].head_next_with_shift.LoadRelaxed(); for (size_t i = 0; !HandleImpl::IsEnd(next_with_shift) && i < 10; ++i) { HandleImpl* h = &arr[GetNextFromNextWithShift(next_with_shift)]; // Attempt cheap key match without acquiring a read ref. This could give a @@ -3142,8 +3093,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( #endif if (probably_equal) { // Increment acquire counter for definitive check - uint64_t old_meta = h->meta.fetch_add(ClockHandle::kAcquireIncrement, - std::memory_order_acquire); + uint64_t old_meta = h->meta.FetchAdd(ClockHandle::kAcquireIncrement); // Check if it's a referencable (sharable) entry if (LIKELY(old_meta & (uint64_t{ClockHandle::kStateShareableBit} << ClockHandle::kStateShift))) { @@ -3163,7 +3113,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( } } - next_with_shift = h->chain_next_with_shift.load(std::memory_order_relaxed); + next_with_shift = h->chain_next_with_shift.LoadRelaxed(); } // If we get here, falling back on full Lookup algorithm. @@ -3173,8 +3123,8 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( for (size_t i = 0;; ++i) { CHECK_TOO_MANY_ITERATIONS(i); // Read head or chain pointer - next_with_shift = - h ? h->chain_next_with_shift : arr[home].head_next_with_shift; + next_with_shift = h ? h->chain_next_with_shift.Load() + : arr[home].head_next_with_shift.Load(); int shift = GetShiftFromNextWithShift(next_with_shift); // Make sure it's usable @@ -3271,8 +3221,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( } // Update the hit bit if (eviction_callback_) { - h->meta.fetch_or(uint64_t{1} << ClockHandle::kHitBitShift, - std::memory_order_relaxed); + h->meta.FetchOrRelaxed(uint64_t{1} << ClockHandle::kHitBitShift); } // All done. return h; @@ -3312,7 +3261,7 @@ AutoHyperClockTable::HandleImpl* AutoHyperClockTable::Lookup( } void AutoHyperClockTable::Remove(HandleImpl* h) { - assert((h->meta.load() >> ClockHandle::kStateShift) == + assert((h->meta.Load() >> ClockHandle::kStateShift) == ClockHandle::kStateConstruction); const HandleImpl& c_h = *h; @@ -3324,14 +3273,13 @@ bool AutoHyperClockTable::TryEraseHandle(HandleImpl* h, bool holding_ref, uint64_t meta; if (mark_invisible) { // Set invisible - meta = h->meta.fetch_and( - ~(uint64_t{ClockHandle::kStateVisibleBit} << ClockHandle::kStateShift), - std::memory_order_acq_rel); + meta = h->meta.FetchAnd( + ~(uint64_t{ClockHandle::kStateVisibleBit} << ClockHandle::kStateShift)); // To local variable also meta &= ~(uint64_t{ClockHandle::kStateVisibleBit} << ClockHandle::kStateShift); } else { - meta = h->meta.load(std::memory_order_acquire); + meta = h->meta.Load(); } // Take ownership if no other refs @@ -3349,10 +3297,8 @@ bool AutoHyperClockTable::TryEraseHandle(HandleImpl* h, bool holding_ref, // another thread replaces this entry with another, reaches zero refs, and // then we end up erasing that other entry. That's an acceptable risk / // imprecision. - } while (!h->meta.compare_exchange_weak( - meta, - uint64_t{ClockHandle::kStateConstruction} << ClockHandle::kStateShift, - std::memory_order_acquire)); + } while (!h->meta.CasWeak(meta, uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift)); // Took ownership // TODO? Delay freeing? h->FreeData(allocator_); @@ -3360,14 +3306,14 @@ bool AutoHyperClockTable::TryEraseHandle(HandleImpl* h, bool holding_ref, if (UNLIKELY(h->IsStandalone())) { // Delete detached handle delete h; - standalone_usage_.fetch_sub(total_charge, std::memory_order_relaxed); + standalone_usage_.FetchSubRelaxed(total_charge); } else { Remove(h); MarkEmpty(*h); - occupancy_.fetch_sub(1U, std::memory_order_release); + occupancy_.FetchSub(1U); } - usage_.fetch_sub(total_charge, std::memory_order_relaxed); - assert(usage_.load(std::memory_order_relaxed) < SIZE_MAX / 2); + usage_.FetchSubRelaxed(total_charge); + assert(usage_.LoadRelaxed() < SIZE_MAX / 2); return true; } @@ -3382,14 +3328,12 @@ bool AutoHyperClockTable::Release(HandleImpl* h, bool useful, uint64_t old_meta; if (useful) { // Increment release counter to indicate was used - old_meta = h->meta.fetch_add(ClockHandle::kReleaseIncrement, - std::memory_order_release); + old_meta = h->meta.FetchAdd(ClockHandle::kReleaseIncrement); // Correct for possible (but rare) overflow CorrectNearOverflow(old_meta, h->meta); } else { // Decrement acquire counter to pretend it never happened - old_meta = h->meta.fetch_sub(ClockHandle::kAcquireIncrement, - std::memory_order_release); + old_meta = h->meta.FetchSub(ClockHandle::kAcquireIncrement); } assert((old_meta >> ClockHandle::kStateShift) & @@ -3441,22 +3385,20 @@ void AutoHyperClockTable::EraseUnRefEntries() { for (size_t i = 0; i < usable_size; i++) { HandleImpl& h = array_[i]; - uint64_t old_meta = h.meta.load(std::memory_order_relaxed); + uint64_t old_meta = h.meta.LoadRelaxed(); if (old_meta & (uint64_t{ClockHandle::kStateShareableBit} << ClockHandle::kStateShift) && GetRefcount(old_meta) == 0 && - h.meta.compare_exchange_strong(old_meta, - uint64_t{ClockHandle::kStateConstruction} - << ClockHandle::kStateShift, - std::memory_order_acquire)) { + h.meta.CasStrong(old_meta, uint64_t{ClockHandle::kStateConstruction} + << ClockHandle::kStateShift)) { // Took ownership h.FreeData(allocator_); - usage_.fetch_sub(h.total_charge, std::memory_order_relaxed); + usage_.FetchSubRelaxed(h.total_charge); // NOTE: could be more efficient with a dedicated variant of // PurgeImpl, but this is not a common operation Remove(&h); MarkEmpty(h); - occupancy_.fetch_sub(1U, std::memory_order_release); + occupancy_.FetchSub(1U); } } } @@ -3486,8 +3428,7 @@ void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, // the life of the cache), there will be a brief period where concurrent // eviction threads could use the old mask value, possibly causing redundant // or missed clock updates for a *small* portion of the table. - size_t clock_pointer_mask = - clock_pointer_mask_.load(std::memory_order_relaxed); + size_t clock_pointer_mask = clock_pointer_mask_.LoadRelaxed(); uint64_t max_clock_pointer = 0; // unset @@ -3499,8 +3440,7 @@ void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, // Loop until enough freed, or limit reached (see bottom of loop) for (;;) { // First (concurrent) increment clock pointer - uint64_t old_clock_pointer = - clock_pointer_.fetch_add(step_size, std::memory_order_relaxed); + uint64_t old_clock_pointer = clock_pointer_.FetchAddRelaxed(step_size); if (UNLIKELY((old_clock_pointer & clock_pointer_mask) == 0)) { // Back at the beginning. See if clock_pointer_mask should be updated. @@ -3508,8 +3448,7 @@ void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, UINT64_MAX, LengthInfoToMinShift(state.saved_length_info)); if (clock_pointer_mask != mask) { clock_pointer_mask = static_cast(mask); - clock_pointer_mask_.store(clock_pointer_mask, - std::memory_order_relaxed); + clock_pointer_mask_.StoreRelaxed(clock_pointer_mask); } } @@ -3579,7 +3518,7 @@ size_t AutoHyperClockTable::CalcMaxUsableLength( namespace { bool IsHeadNonempty(const AutoHyperClockTable::HandleImpl& h) { return !AutoHyperClockTable::HandleImpl::IsEnd( - h.head_next_with_shift.load(std::memory_order_relaxed)); + h.head_next_with_shift.LoadRelaxed()); } bool IsEntryAtHome(const AutoHyperClockTable::HandleImpl& h, int shift, size_t home) { diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 63610d1f9..3086e7e97 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -24,6 +24,7 @@ #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/secondary_cache.h" +#include "util/atomic.h" #include "util/autovector.h" #include "util/math.h" @@ -368,7 +369,7 @@ struct ClockHandle : public ClockHandleBasicData { // TODO: make these coundown values tuning parameters for eviction? // See above. Mutable for read reference counting. - mutable std::atomic meta{}; + mutable AcqRelAtomic meta{}; }; // struct ClockHandle class BaseClockTable { @@ -395,19 +396,15 @@ class BaseClockTable { void Ref(ClockHandle& handle); - size_t GetOccupancy() const { - return occupancy_.load(std::memory_order_relaxed); - } + size_t GetOccupancy() const { return occupancy_.LoadRelaxed(); } - size_t GetUsage() const { return usage_.load(std::memory_order_relaxed); } + size_t GetUsage() const { return usage_.LoadRelaxed(); } - size_t GetStandaloneUsage() const { - return standalone_usage_.load(std::memory_order_relaxed); - } + size_t GetStandaloneUsage() const { return standalone_usage_.LoadRelaxed(); } uint32_t GetHashSeed() const { return hash_seed_; } - uint64_t GetYieldCount() const { return yield_count_.load(); } + uint64_t GetYieldCount() const { return yield_count_.LoadRelaxed(); } struct EvictionData { size_t freed_charge = 0; @@ -460,21 +457,23 @@ class BaseClockTable { // operations in ClockCacheShard. // Clock algorithm sweep pointer. - std::atomic clock_pointer_{}; + // (Relaxed: only needs to be consistent with itself.) + RelaxedAtomic clock_pointer_{}; // Counter for number of times we yield to wait on another thread. - std::atomic yield_count_{}; + // (Relaxed: a simple stat counter.) + RelaxedAtomic yield_count_{}; // TODO: is this separation needed if we don't do background evictions? ALIGN_AS(CACHE_LINE_SIZE) // Number of elements in the table. - std::atomic occupancy_{}; + AcqRelAtomic occupancy_{}; // Memory usage by entries tracked by the cache (including standalone) - std::atomic usage_{}; + AcqRelAtomic usage_{}; // Part of usage by standalone entries (not in table) - std::atomic standalone_usage_{}; + AcqRelAtomic standalone_usage_{}; ALIGN_AS(CACHE_LINE_SIZE) const CacheMetadataChargePolicy metadata_charge_policy_; @@ -500,7 +499,11 @@ class FixedHyperClockTable : public BaseClockTable { struct ALIGN_AS(64U) HandleImpl : public ClockHandle { // The number of elements that hash to this slot or a lower one, but wind // up in this slot or a higher one. - std::atomic displacements{}; + // (Relaxed: within a Cache op, does not need consistency with entries + // inserted/removed during that op. For example, a Lookup() that + // happens-after an Insert() will see an appropriate displacements value + // for the entry to be in a published state.) + RelaxedAtomic displacements{}; // Whether this is a "deteched" handle that is independently allocated // with `new` (so must be deleted with `delete`). @@ -787,17 +790,16 @@ class AutoHyperClockTable : public BaseClockTable { // See above. The head pointer is logically independent of the rest of // the entry, including the chain next pointer. - std::atomic head_next_with_shift{kUnusedMarker}; - std::atomic chain_next_with_shift{kUnusedMarker}; + AcqRelAtomic head_next_with_shift{kUnusedMarker}; + AcqRelAtomic chain_next_with_shift{kUnusedMarker}; // For supporting CreateStandalone and some fallback cases. inline bool IsStandalone() const { - return head_next_with_shift.load(std::memory_order_acquire) == - kStandaloneMarker; + return head_next_with_shift.Load() == kStandaloneMarker; } inline void SetStandalone() { - head_next_with_shift.store(kStandaloneMarker, std::memory_order_release); + head_next_with_shift.Store(kStandaloneMarker); } }; // struct HandleImpl @@ -942,19 +944,22 @@ class AutoHyperClockTable : public BaseClockTable { // To maximize parallelization of Grow() operations, this field is only // updated opportunistically after Grow() operations and in DoInsert() where // it is found to be out-of-date. See CatchUpLengthInfoNoWait(). - std::atomic length_info_; + AcqRelAtomic length_info_; // An already-computed version of the usable length times the max load // factor. Could be slightly out of date but GrowIfNeeded()/Grow() handle // that internally. - std::atomic occupancy_limit_; + // (Relaxed: allowed to lag behind length_info_ by a little) + RelaxedAtomic occupancy_limit_; // The next index to use from array_ upon the next Grow(). Might be ahead of // length_info_. - std::atomic grow_frontier_; + // (Relaxed: self-contained source of truth for next grow home) + RelaxedAtomic grow_frontier_; // See explanation in AutoHyperClockTable::Evict - std::atomic clock_pointer_mask_; + // (Relaxed: allowed to lag behind clock_pointer_ and length_info_ state) + RelaxedAtomic clock_pointer_mask_; }; // class AutoHyperClockTable // A single shard of sharded cache. @@ -1070,10 +1075,12 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { Table table_; // Maximum total charge of all elements stored in the table. - std::atomic capacity_; + // (Relaxed: eventual consistency/update is OK) + RelaxedAtomic capacity_; // Whether to reject insertion if cache reaches its full capacity. - std::atomic strict_capacity_limit_; + // (Relaxed: eventual consistency/update is OK) + RelaxedAtomic strict_capacity_limit_; }; // class ClockCacheShard template diff --git a/util/atomic.h b/util/atomic.h new file mode 100644 index 000000000..afb3dc540 --- /dev/null +++ b/util/atomic.h @@ -0,0 +1,111 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include + +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +// Background: +// std::atomic is somewhat easy to misuse: +// * Implicit conversion to T using std::memory_order_seq_cst, along with +// memory order parameter defaults, make it easy to accidentally mix sequential +// consistency ordering with acquire/release memory ordering. See +// "The single total order might not be consistent with happens-before" at +// https://en.cppreference.com/w/cpp/atomic/memory_order +// * It's easy to use nonsensical (UB) combinations like store with +// std::memory_order_acquire. +// For such reasons, we provide wrappers below to make safe usage easier. + +// Wrapper around std::atomic to avoid certain bugs (see Background above). +// +// This relaxed-only wrapper is intended for atomics that do not need +// ordering constraints with other data reads/writes aside from those +// necessary for computing data values or given by other happens-before +// relationships. For example, a cross-thread counter that never returns +// the same result can be a RelaxedAtomic. +template +class RelaxedAtomic { + public: + explicit RelaxedAtomic(T initial = {}) : v_(initial) {} + void StoreRelaxed(T desired) { v_.store(desired, std::memory_order_relaxed); } + T LoadRelaxed() const { return v_.load(std::memory_order_relaxed); } + bool CasWeakRelaxed(T& expected, T desired) { + return v_.compare_exchange_weak(expected, desired, + std::memory_order_relaxed); + } + bool CasStrongRelaxed(T& expected, T desired) { + return v_.compare_exchange_strong(expected, desired, + std::memory_order_relaxed); + } + T ExchangeRelaxed(T desired) { + return v_.exchange(desired, std::memory_order_relaxed); + } + T FetchAddRelaxed(T operand) { + return v_.fetch_add(operand, std::memory_order_relaxed); + } + T FetchSubRelaxed(T operand) { + return v_.fetch_sub(operand, std::memory_order_relaxed); + } + T FetchAndRelaxed(T operand) { + return v_.fetch_and(operand, std::memory_order_relaxed); + } + T FetchOrRelaxed(T operand) { + return v_.fetch_or(operand, std::memory_order_relaxed); + } + T FetchXorRelaxed(T operand) { + return v_.fetch_xor(operand, std::memory_order_relaxed); + } + + protected: + std::atomic v_; +}; + +// Wrapper around std::atomic to avoid certain bugs (see Background above). +// +// Except for some unusual cases requiring sequential consistency, this is +// a general-purpose atomic. Relaxed operations can be mixed in as appropriate. +template +class AcqRelAtomic : public RelaxedAtomic { + public: + explicit AcqRelAtomic(T initial = {}) : RelaxedAtomic(initial) {} + void Store(T desired) { + RelaxedAtomic::v_.store(desired, std::memory_order_release); + } + T Load() const { + return RelaxedAtomic::v_.load(std::memory_order_acquire); + } + bool CasWeak(T& expected, T desired) { + return RelaxedAtomic::v_.compare_exchange_weak( + expected, desired, std::memory_order_acq_rel); + } + bool CasStrong(T& expected, T desired) { + return RelaxedAtomic::v_.compare_exchange_strong( + expected, desired, std::memory_order_acq_rel); + } + T Exchange(T desired) { + return RelaxedAtomic::v_.exchange(desired, std::memory_order_acq_rel); + } + T FetchAdd(T operand) { + return RelaxedAtomic::v_.fetch_add(operand, std::memory_order_acq_rel); + } + T FetchSub(T operand) { + return RelaxedAtomic::v_.fetch_sub(operand, std::memory_order_acq_rel); + } + T FetchAnd(T operand) { + return RelaxedAtomic::v_.fetch_and(operand, std::memory_order_acq_rel); + } + T FetchOr(T operand) { + return RelaxedAtomic::v_.fetch_or(operand, std::memory_order_acq_rel); + } + T FetchXor(T operand) { + return RelaxedAtomic::v_.fetch_xor(operand, std::memory_order_acq_rel); + } +}; + +} // namespace ROCKSDB_NAMESPACE From f337533b6f523c006bb60d647e9f93ada96d8a3f Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 8 Nov 2023 14:00:36 -0800 Subject: [PATCH 268/386] Ensure and clarify how RocksDB calls TablePropertiesCollector's functions (#12053) Summary: **Context/Summary:** It's intuitive for users to assume `TablePropertiesCollector::Finish()` is called only once by RocksDB internal by the word "finish". However, this is currently not true as RocksDB also calls this function in `BlockBased/PlainTableBuilder::GetTableProperties()` to populate user collected properties on demand. This PR avoids that by moving that populating to where we first call `Finish()` (i.e, `NotifyCollectTableCollectorsOnFinish`) Bonus: clarified in the API that `GetReadableProperties()` will be called after `Finish()` and added UT to ensure that. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12053 Test Plan: - Modified test `DBPropertiesTest.GetUserDefinedTableProperties` to ensure `Finish()` only called once. - Existing test particularly `db_properties_test, table_properties_collector_test` verify the functionality `NotifyCollectTableCollectorsOnFinish` and `GetReadableProperties()` are not broken by this change. Reviewed By: ajkr Differential Revision: D51095434 Pulled By: hx235 fbshipit-source-id: 1c6275258f9b99dedad313ee8427119126817973 --- db/db_properties_test.cc | 4 ++++ include/rocksdb/table_properties.h | 3 +++ .../block_based/block_based_table_builder.cc | 16 +++++---------- table/meta_blocks.cc | 20 +++++++++++-------- table/meta_blocks.h | 6 +++++- table/plain/plain_table_builder.cc | 12 +++++++---- table/plain/plain_table_builder.h | 12 +---------- .../table_prop_collector_api_call.md | 1 + 8 files changed, 39 insertions(+), 35 deletions(-) create mode 100644 unreleased_history/behavior_changes/table_prop_collector_api_call.md diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index a7faa6414..f3281ec74 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -1084,12 +1084,14 @@ class CountingUserTblPropCollector : public TablePropertiesCollector { const char* Name() const override { return "CountingUserTblPropCollector"; } Status Finish(UserCollectedProperties* properties) override { + assert(!finish_called_); std::string encoded; PutVarint32(&encoded, count_); *properties = UserCollectedProperties{ {"CountingUserTblPropCollector", message_}, {"Count", encoded}, }; + finish_called_ = true; return Status::OK(); } @@ -1101,12 +1103,14 @@ class CountingUserTblPropCollector : public TablePropertiesCollector { } UserCollectedProperties GetReadableProperties() const override { + assert(finish_called_); return UserCollectedProperties{}; } private: std::string message_ = "Rocksdb"; uint32_t count_ = 0; + bool finish_called_ = false; }; class CountingUserTblPropCollectorFactory diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 0256fbddd..052df3503 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -122,12 +122,15 @@ class TablePropertiesCollector { // Finish() will be called when a table has already been built and is ready // for writing the properties block. + // It will be called only once by RocksDB internal. + // // @params properties User will add their collected statistics to // `properties`. virtual Status Finish(UserCollectedProperties* properties) = 0; // Return the human-readable properties, where the key is property name and // the value is the human-readable form of value. + // It will only be called after Finish() has been called by RocksDB internal. virtual UserCollectedProperties GetReadableProperties() const = 0; // The name of the properties collector can be used for debugging purpose. diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index cc4f17413..e66c4939a 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1710,9 +1710,10 @@ void BlockBasedTableBuilder::WritePropertiesBlock( property_block_builder.AddTableProperty(rep_->props); // Add use collected properties - NotifyCollectTableCollectorsOnFinish(rep_->table_properties_collectors, - rep_->ioptions.logger, - &property_block_builder); + NotifyCollectTableCollectorsOnFinish( + rep_->table_properties_collectors, rep_->ioptions.logger, + &property_block_builder, rep_->props.user_collected_properties, + rep_->props.readable_properties); Slice block_data = property_block_builder.Finish(); TEST_SYNC_POINT_CALLBACK( @@ -2061,14 +2062,7 @@ bool BlockBasedTableBuilder::NeedCompact() const { } TableProperties BlockBasedTableBuilder::GetTableProperties() const { - TableProperties ret = rep_->props; - for (const auto& collector : rep_->table_properties_collectors) { - for (const auto& prop : collector->GetReadableProperties()) { - ret.readable_properties.insert(prop); - } - collector->Finish(&ret.user_collected_properties).PermitUncheckedError(); - } - return ret; + return rep_->props; } std::string BlockBasedTableBuilder::GetFileChecksum() const { diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index b82b5962f..2cbaacec0 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -213,21 +213,25 @@ void NotifyCollectTableCollectorsOnBlockAdd( bool NotifyCollectTableCollectorsOnFinish( const std::vector>& collectors, - Logger* info_log, PropertyBlockBuilder* builder) { + Logger* info_log, PropertyBlockBuilder* builder, + UserCollectedProperties& user_collected_properties, + UserCollectedProperties& readable_properties) { bool all_succeeded = true; for (auto& collector : collectors) { - UserCollectedProperties user_collected_properties; Status s = collector->Finish(&user_collected_properties); - - all_succeeded = all_succeeded && s.ok(); - if (!s.ok()) { + if (s.ok()) { + for (const auto& prop : collector->GetReadableProperties()) { + readable_properties.insert(prop); + } + builder->Add(user_collected_properties); + } else { LogPropertiesCollectionError(info_log, "Finish" /* method */, collector->Name()); - } else { - builder->Add(user_collected_properties); + if (all_succeeded) { + all_succeeded = false; + } } } - return all_succeeded; } diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 1ed9cf27f..0a404dc9c 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -98,9 +98,13 @@ void NotifyCollectTableCollectorsOnBlockAdd( // NotifyCollectTableCollectorsOnFinish() triggers the `Finish` event for all // property collectors. The collected properties will be added to `builder`. +// It will also populate `user_collected_properties` and `readable_properties` +// with the collected properties. bool NotifyCollectTableCollectorsOnFinish( const std::vector>& collectors, - Logger* info_log, PropertyBlockBuilder* builder); + Logger* info_log, PropertyBlockBuilder* builder, + UserCollectedProperties& user_collected_properties, + UserCollectedProperties& readable_properties); // Read table properties from a file using known BlockHandle. // @returns a status to indicate if the operation succeeded. On success, diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index ffa811c3c..24dd0f97a 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -265,12 +265,16 @@ Status PlainTableBuilder::Finish() { PropertyBlockBuilder property_block_builder; // -- Add basic properties property_block_builder.AddTableProperty(properties_); - + // -- Add eixsting user collected properties property_block_builder.Add(properties_.user_collected_properties); - - // -- Add user collected properties + // -- Add more user collected properties + UserCollectedProperties more_user_collected_properties; NotifyCollectTableCollectorsOnFinish( - table_properties_collectors_, ioptions_.logger, &property_block_builder); + table_properties_collectors_, ioptions_.logger, &property_block_builder, + more_user_collected_properties, properties_.readable_properties); + properties_.user_collected_properties.insert( + more_user_collected_properties.begin(), + more_user_collected_properties.end()); // -- Write property block BlockHandle property_block_handle; diff --git a/table/plain/plain_table_builder.h b/table/plain/plain_table_builder.h index 27c07898f..fb7ea63be 100644 --- a/table/plain/plain_table_builder.h +++ b/table/plain/plain_table_builder.h @@ -85,16 +85,7 @@ class PlainTableBuilder : public TableBuilder { // Finish() call, returns the size of the final generated file. uint64_t FileSize() const override; - TableProperties GetTableProperties() const override { - TableProperties ret = properties_; - for (const auto& collector : table_properties_collectors_) { - for (const auto& prop : collector->GetReadableProperties()) { - ret.readable_properties.insert(prop); - } - collector->Finish(&ret.user_collected_properties).PermitUncheckedError(); - } - return ret; - } + TableProperties GetTableProperties() const override { return properties_; } bool SaveIndexInFile() const { return store_index_in_file_; } @@ -158,4 +149,3 @@ class PlainTableBuilder : public TableBuilder { }; } // namespace ROCKSDB_NAMESPACE - diff --git a/unreleased_history/behavior_changes/table_prop_collector_api_call.md b/unreleased_history/behavior_changes/table_prop_collector_api_call.md new file mode 100644 index 000000000..ce69b3b2b --- /dev/null +++ b/unreleased_history/behavior_changes/table_prop_collector_api_call.md @@ -0,0 +1 @@ +Make RocksDB only call `TablePropertiesCollector::Finish()` once. From f9b7877cf343226023af08aeefa641e51a0531fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kasper=20Isager=20Dalsgar=C3=B0?= Date: Thu, 9 Nov 2023 10:41:38 -0800 Subject: [PATCH 269/386] Ensure `target_include_directories()` is called with correct target name (#12055) Summary: `${PROJECT_NAME}` isn't guaranteed to match a target name when an artefact suffix is specified. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12055 Reviewed By: anand1976 Differential Revision: D51125532 Pulled By: ajkr fbshipit-source-id: cd1f4a5b11eb517c379e3ee3f78592f7e606a034 --- CMakeLists.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95ecf7917..23a4014bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1106,11 +1106,15 @@ set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc) configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY) add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES} ${BUILD_VERSION_CC}) +target_include_directories(${ROCKSDB_STATIC_LIB} PUBLIC + $) target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) if(ROCKSDB_BUILD_SHARED) add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES} ${BUILD_VERSION_CC}) + target_include_directories(${ROCKSDB_SHARED_LIB} PUBLIC + $) target_link_libraries(${ROCKSDB_SHARED_LIB} PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) @@ -1597,6 +1601,3 @@ option(WITH_BENCHMARK "build benchmark tests" OFF) if(WITH_BENCHMARK) add_subdirectory(${PROJECT_SOURCE_DIR}/microbench/) endif() - -target_include_directories(${PROJECT_NAME} PUBLIC - $) From e90e9825b4addbc0eb3131bc9b08cba849546816 Mon Sep 17 00:00:00 2001 From: brodyhuang Date: Thu, 9 Nov 2023 10:43:16 -0800 Subject: [PATCH 270/386] Drop wal record when sequence is illegal (#11985) Summary: - Our database is corrupted, causing some sequences of wal record to be invalid (but the `record_checksum` looks fine). - When we RecoverLogFiles in WALRecoveryMode::kPointInTimeRecovery, `assert(seq <= kMaxSequenceNumber)` will be failed. - When it is found that sequence is illegal, can we drop the file to recover as much data as possible ? Thx ! Pull Request resolved: https://github.com/facebook/rocksdb/pull/11985 Reviewed By: anand1976 Differential Revision: D50698039 Pulled By: ajkr fbshipit-source-id: 1e42113b58823088d7c0c3a92af5b3efbb5f5296 --- db/db_impl/db_impl_open.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index a67de6ff1..074fa8621 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1251,6 +1251,13 @@ Status DBImpl::RecoverLogFiles(const std::vector& wal_numbers, } SequenceNumber sequence = WriteBatchInternal::Sequence(batch_to_use); + if (sequence > kMaxSequenceNumber) { + reporter.Corruption( + record.size(), + Status::Corruption("sequence " + std::to_string(sequence) + + " is too large")); + continue; + } if (immutable_db_options_.wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) { From c4c62c230438b06bae67189baaa5a7661e590160 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Thu, 9 Nov 2023 10:45:13 -0800 Subject: [PATCH 271/386] Support to use environment variable to test customer encryption plugins (#12025) Summary: The CreateEnvTest.CreateEncryptedFileSystem unit test is to verify the creation functionality of EncryptedFileSystem, but now it just support the builtin CTREncryptionProvider class. This patch make it flexible to use environment variable `TEST_FS_URI`, it is useful to test customer encryption plugins. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12025 Reviewed By: anand1976 Differential Revision: D50799656 Pulled By: ajkr fbshipit-source-id: dbcacfefbf07de9c7803f7707b34c5193bec17bf --- env/env_test.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/env/env_test.cc b/env/env_test.cc index fb23bae13..1bd176fb0 100644 --- a/env/env_test.cc +++ b/env/env_test.cc @@ -2906,6 +2906,13 @@ TEST_F(CreateEnvTest, CreateEncryptedFileSystem) { std::string base_opts = std::string("provider=1://test; id=") + EncryptedFileSystem::kClassName(); + // Rewrite the default FileSystem URI if the "TEST_FS_URI" environment + // variable is set. This is useful to test customer encryption plugins. + const char* uri = getenv("TEST_FS_URI"); + if (uri != nullptr) { + base_opts = uri; + } + // The EncryptedFileSystem requires a "provider" option. ASSERT_NOK(FileSystem::CreateFromString( config_options_, EncryptedFileSystem::kClassName(), &fs)); From dfaf4dc111ff090800f2765fdcd88d48a6abbab8 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 9 Nov 2023 15:58:07 -0800 Subject: [PATCH 272/386] Stubs for piping write time (#12043) Summary: As titled. This PR contains the API and stubbed implementation for piping write time. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12043 Reviewed By: pdillinger Differential Revision: D51076575 Pulled By: jowlyzhang fbshipit-source-id: 3b341263498351b9ccaff27cf35d5aeb5bdf0cf1 --- db/db_iter.cc | 3 +++ db/write_batch_test.cc | 5 +++++ include/rocksdb/iterator.h | 10 ++++++++++ .../rocksdb/utilities/write_batch_with_index.h | 8 ++++++++ include/rocksdb/write_batch.h | 11 +++++++++++ include/rocksdb/write_batch_base.h | 16 ++++++++++++++++ 6 files changed, 53 insertions(+) diff --git a/db/db_iter.cc b/db/db_iter.cc index 247542811..418c538d4 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -114,6 +114,9 @@ Status DBIter::GetProperty(std::string prop_name, std::string* prop) { } else if (prop_name == "rocksdb.iterator.internal-key") { *prop = saved_key_.GetUserKey().ToString(); return Status::OK(); + } else if (prop_name == "rocksdb.iterator.write-time") { + // TODO(yuzhangyu): implement return the actual write time. + return Status::NotSupported("write time property is under construction"); } return Status::InvalidArgument("Unidentified property."); } diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index e8fc1aa35..00faea4ce 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -770,6 +770,9 @@ TEST_F(WriteBatchTest, ColumnFamiliesBatchTest) { ASSERT_OK(batch.Merge(&three, Slice("threethree"), Slice("3three"))); ASSERT_OK(batch.Put(&zero, Slice("foo"), Slice("bar"))); ASSERT_OK(batch.Merge(Slice("omom"), Slice("nom"))); + // TODO(yuzhangyu): implement this. + ASSERT_TRUE( + batch.TimedPut(&zero, Slice("foo"), Slice("bar"), 0u).IsNotSupported()); TestHandler handler; ASSERT_OK(batch.Iterate(&handler)); @@ -797,6 +800,8 @@ TEST_F(WriteBatchTest, ColumnFamiliesBatchWithIndexTest) { ASSERT_OK(batch.Merge(&three, Slice("threethree"), Slice("3three"))); ASSERT_OK(batch.Put(&zero, Slice("foo"), Slice("bar"))); ASSERT_OK(batch.Merge(Slice("omom"), Slice("nom"))); + ASSERT_TRUE( + batch.TimedPut(&zero, Slice("foo"), Slice("bar"), 0u).IsNotSupported()); std::unique_ptr iter; diff --git a/include/rocksdb/iterator.h b/include/rocksdb/iterator.h index c50c825f3..8568dd258 100644 --- a/include/rocksdb/iterator.h +++ b/include/rocksdb/iterator.h @@ -133,6 +133,16 @@ class Iterator : public Cleanable { // Property "rocksdb.iterator.internal-key": // Get the user-key portion of the internal key at which the iteration // stopped. + // Property "rocksdb.iterator.write-time": + // DO NOT USE, UNDER CONSTRUCTION + // Get the unix time of the best estimate of the write time of the entry. + // Returned as 64-bit raw value (8 bytes). It can be converted to uint64_t + // with util method `DecodeU64Ts`. The accuracy of the write time depends on + // settings like preserve_internal_time_seconds. If this feature is + // disabled, this property will always be empty. The actual write time of + // the entry should be the same or newer than the returned write time. So + // this property can be interpreted as the possible oldest write time for + // the entry. virtual Status GetProperty(std::string prop_name, std::string* prop); virtual Slice timestamp() const { diff --git a/include/rocksdb/utilities/write_batch_with_index.h b/include/rocksdb/utilities/write_batch_with_index.h index ecc8ef059..e0536712c 100644 --- a/include/rocksdb/utilities/write_batch_with_index.h +++ b/include/rocksdb/utilities/write_batch_with_index.h @@ -112,6 +112,14 @@ class WriteBatchWithIndex : public WriteBatchBase { Status Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& ts, const Slice& value) override; + using WriteBatchBase::TimedPut; + Status TimedPut(ColumnFamilyHandle* /* column_family */, + const Slice& /* key */, const Slice& /* value */, + uint64_t /* write_unix_time */) override { + return Status::NotSupported( + "TimedPut not supported by WriteBatchWithIndex"); + } + Status PutEntity(ColumnFamilyHandle* column_family, const Slice& /* key */, const WideColumns& /* columns */) override { if (!column_family) { diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index dfc2bfdf4..48119f108 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -100,6 +100,17 @@ class WriteBatch : public WriteBatchBase { return Put(nullptr, key, value); } + using WriteBatchBase::TimedPut; + // DO NOT USE, UNDER CONSTRUCTION + // Stores the mapping "key->value" in the database with the specified write + // time in the column family. + Status TimedPut(ColumnFamilyHandle* /* column_family */, + const Slice& /* key */, const Slice& /* value */, + uint64_t /* write_unix_time */) override { + // TODO(yuzhangyu): implement take in the write time. + return Status::NotSupported("TimedPut is under construction"); + } + // Store the mapping "key->{column1:value1, column2:value2, ...}" in the // column family specified by "column_family". using WriteBatchBase::PutEntity; diff --git a/include/rocksdb/write_batch_base.h b/include/rocksdb/write_batch_base.h index d82eefd49..5b26ee543 100644 --- a/include/rocksdb/write_batch_base.h +++ b/include/rocksdb/write_batch_base.h @@ -42,6 +42,22 @@ class WriteBatchBase { const SliceParts& value); virtual Status Put(const SliceParts& key, const SliceParts& value); + // Store the mapping "key->value" in the database with the specified write + // time in the column family. Using some write time that is in the past to + // fast track data to their correct placement and preservation is the intended + // usage of this API. The DB makes a reasonable best effort to treat the data + // as having the given write time for this purpose but doesn't currently make + // any guarantees. + // + // When a regular Put("foo", "v1") is followed by a + // TimedPut("foo", "v2", some_time_before_first_put), the behavior of read + // queries are undefined and can change over time, for example due to + // compactions. + // Note: this feature is currently not compatible with user-defined timestamps + // and wide columns. + virtual Status TimedPut(ColumnFamilyHandle* column_family, const Slice& key, + const Slice& value, uint64_t write_unix_time) = 0; + // Store the mapping "key->{column1:value1, column2:value2, ...}" in the // column family specified by "column_family". virtual Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key, From 73d223c4e2e8390a029d7ce38580ef194554ef2b Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 10 Nov 2023 09:53:09 -0800 Subject: [PATCH 273/386] Add auto_tuned option to RateLimiter C API (#12058) Summary: #### Problem While the RocksDB C API does have the RateLimiter API, it does not expose the auto_tuned option. #### Summary of Change This PR exposes auto_tuned RateLimiter option in RocksDB C API. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12058 Test Plan: Augment the C API existing test to cover the new API. Reviewed By: cbi42 Differential Revision: D51201933 Pulled By: ajkr fbshipit-source-id: 5bc595a9cf9f88f50fee797b729ba96f09ed8266 --- db/c.cc | 10 ++++++++++ db/c_test.c | 5 +++++ include/rocksdb/c.h | 4 ++++ .../add_auto_tuned_rate_limiter_to_c_api.md | 1 + 4 files changed, 20 insertions(+) create mode 100644 unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md diff --git a/db/c.cc b/db/c.cc index 847f5af34..a2dce4644 100644 --- a/db/c.cc +++ b/db/c.cc @@ -3967,6 +3967,16 @@ rocksdb_ratelimiter_t* rocksdb_ratelimiter_create(int64_t rate_bytes_per_sec, return rate_limiter; } +rocksdb_ratelimiter_t* rocksdb_ratelimiter_create_auto_tuned( + int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness) { + rocksdb_ratelimiter_t* rate_limiter = new rocksdb_ratelimiter_t; + rate_limiter->rep.reset(NewGenericRateLimiter(rate_bytes_per_sec, + refill_period_us, fairness, + RateLimiter::Mode::kWritesOnly, + true)); // auto_tuned + return rate_limiter; +} + void rocksdb_ratelimiter_destroy(rocksdb_ratelimiter_t* limiter) { delete limiter; } diff --git a/db/c_test.c b/db/c_test.c index 33dc5240d..10da375db 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -713,6 +713,11 @@ int main(int argc, char** argv) { rocksdb_options_set_ratelimiter(options, rate_limiter); rocksdb_ratelimiter_destroy(rate_limiter); + rate_limiter = + rocksdb_ratelimiter_create_auto_tuned(1000 * 1024 * 1024, 100 * 1000, 10); + rocksdb_options_set_ratelimiter(options, rate_limiter); + rocksdb_ratelimiter_destroy(rate_limiter); + roptions = rocksdb_readoptions_create(); rocksdb_readoptions_set_verify_checksums(roptions, 1); rocksdb_readoptions_set_fill_cache(roptions, 1); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index d57b6cfdc..572c41132 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1672,6 +1672,10 @@ extern ROCKSDB_LIBRARY_API int rocksdb_options_get_wal_compression( /* RateLimiter */ extern ROCKSDB_LIBRARY_API rocksdb_ratelimiter_t* rocksdb_ratelimiter_create( int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness); +extern ROCKSDB_LIBRARY_API rocksdb_ratelimiter_t* +rocksdb_ratelimiter_create_auto_tuned(int64_t rate_bytes_per_sec, + int64_t refill_period_us, + int32_t fairness); extern ROCKSDB_LIBRARY_API void rocksdb_ratelimiter_destroy( rocksdb_ratelimiter_t*); diff --git a/unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md b/unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md new file mode 100644 index 000000000..980ca7868 --- /dev/null +++ b/unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md @@ -0,0 +1 @@ +Added rocksdb_ratelimiter_create_auto_tuned API to create an auto-tuned GenericRateLimiter. From 5ef92b8ea4aed557280284fcb8dba6ddb8558985 Mon Sep 17 00:00:00 2001 From: Yueh-Hsuan Chiang Date: Fri, 10 Nov 2023 11:36:11 -0800 Subject: [PATCH 274/386] Add rocksdb_options_set_cf_paths (#11151) Summary: This PR adds a missing set function for rocksdb_options in the C-API: rocksdb_options_set_cf_paths(). Without this function, users cannot specify different paths for different column families as it will fall back to db_paths. As a bonus, this PR also includes rocksdb_sst_file_metadata_get_directory() to the C api -- a missing public function that will also make the test easier to write. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11151 Test Plan: Augment existing c_test to verify the specified cf_path. Reviewed By: hx235 Differential Revision: D51201888 Pulled By: ajkr fbshipit-source-id: 62a96451f26fab60ada2005ede3eea8e9b431f30 --- db/c.cc | 15 ++++++++++++++ db/c_test.c | 49 ++++++++++++++++++++++++++++++++++++++++++--- include/rocksdb/c.h | 5 +++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/db/c.cc b/db/c.cc index a2dce4644..5c7ffed48 100644 --- a/db/c.cc +++ b/db/c.cc @@ -2904,6 +2904,16 @@ void rocksdb_options_set_db_paths(rocksdb_options_t* opt, opt->rep.db_paths = db_paths; } +void rocksdb_options_set_cf_paths(rocksdb_options_t* opt, + const rocksdb_dbpath_t** dbpath_values, + size_t num_paths) { + std::vector cf_paths(num_paths); + for (size_t i = 0; i < num_paths; ++i) { + cf_paths[i] = dbpath_values[i]->rep; + } + opt->rep.cf_paths = cf_paths; +} + void rocksdb_options_set_env(rocksdb_options_t* opt, rocksdb_env_t* env) { opt->rep.env = (env ? env->rep : nullptr); } @@ -5484,6 +5494,11 @@ char* rocksdb_sst_file_metadata_get_relative_filename( return strdup(file_meta->rep->relative_filename.c_str()); } +char* rocksdb_sst_file_metadata_get_directory( + rocksdb_sst_file_metadata_t* file_meta) { + return strdup(file_meta->rep->directory.c_str()); +} + uint64_t rocksdb_sst_file_metadata_get_size( rocksdb_sst_file_metadata_t* file_meta) { return file_meta->rep->size; diff --git a/db/c_test.c b/db/c_test.c index 10da375db..802dc093d 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -1479,10 +1479,20 @@ int main(int argc, char** argv) { CheckCondition(cflen == 2); rocksdb_list_column_families_destroy(column_fams, cflen); - rocksdb_options_t* cf_options = rocksdb_options_create(); + rocksdb_options_t* cf_options_1 = rocksdb_options_create(); + rocksdb_options_t* cf_options_2 = rocksdb_options_create(); + + // use dbpathname2 as the cf_path for "cf1" + rocksdb_dbpath_t* dbpath2; + char dbpathname2[200]; + snprintf(dbpathname2, sizeof(dbpathname2), "%s/rocksdb_c_test-%d-dbpath2", + GetTempDir(), ((int)geteuid())); + dbpath2 = rocksdb_dbpath_create(dbpathname2, 1024 * 1024); + const rocksdb_dbpath_t* cf_paths[1] = {dbpath2}; + rocksdb_options_set_cf_paths(cf_options_2, cf_paths, 1); const char* cf_names[2] = {"default", "cf1"}; - const rocksdb_options_t* cf_opts[2] = {cf_options, cf_options}; + const rocksdb_options_t* cf_opts[2] = {cf_options_1, cf_options_2}; rocksdb_column_family_handle_t* handles[2]; LoadAndCheckLatestOptions(dbname, env, false, cache, NULL, 2, cf_names, @@ -1510,6 +1520,37 @@ int main(int argc, char** argv) { rocksdb_flushoptions_t* flush_options = rocksdb_flushoptions_create(); rocksdb_flushoptions_set_wait(flush_options, 1); rocksdb_flush_cf(db, flush_options, handles[1], &err); + + // make sure all files in "cf1" are under the specified cf path + { + rocksdb_column_family_metadata_t* cf_meta = + rocksdb_get_column_family_metadata_cf(db, handles[1]); + size_t cf_file_count = rocksdb_column_family_metadata_get_size(cf_meta); + assert(cf_file_count > 0); + size_t level_count = + rocksdb_column_family_metadata_get_level_count(cf_meta); + assert(level_count > 0); + for (size_t l = 0; l < level_count; ++l) { + rocksdb_level_metadata_t* level_meta = + rocksdb_column_family_metadata_get_level_metadata(cf_meta, l); + assert(level_meta); + + size_t file_count = rocksdb_level_metadata_get_file_count(level_meta); + for (size_t f = 0; f < file_count; ++f) { + rocksdb_sst_file_metadata_t* file_meta = + rocksdb_level_metadata_get_sst_file_metadata(level_meta, f); + assert(file_meta); + char* file_path = rocksdb_sst_file_metadata_get_directory(file_meta); + assert(strcmp(file_path, dbpathname2) == 0); + Free(&file_path); + rocksdb_sst_file_metadata_destroy(file_meta); + } + rocksdb_level_metadata_destroy(level_meta); + } + + rocksdb_column_family_metadata_destroy(cf_meta); + } + CheckNoError(err) rocksdb_flushoptions_destroy(flush_options); CheckGetCF(db, roptions, handles[1], "foo", "hello"); @@ -1673,7 +1714,9 @@ int main(int argc, char** argv) { } rocksdb_destroy_db(options, dbname, &err); rocksdb_options_destroy(db_options); - rocksdb_options_destroy(cf_options); + rocksdb_options_destroy(cf_options_1); + rocksdb_options_destroy(cf_options_2); + rocksdb_dbpath_destroy(dbpath2); } StartPhase("prefix"); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 572c41132..15e8f8685 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1143,6 +1143,8 @@ extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_paranoid_checks( rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_db_paths( rocksdb_options_t*, const rocksdb_dbpath_t** path_values, size_t num_paths); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_cf_paths( + rocksdb_options_t*, const rocksdb_dbpath_t** path_values, size_t num_paths); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_env(rocksdb_options_t*, rocksdb_env_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_info_log(rocksdb_options_t*, @@ -2435,6 +2437,9 @@ extern ROCKSDB_LIBRARY_API char* rocksdb_sst_file_metadata_get_relative_filename( rocksdb_sst_file_metadata_t* file_meta); +extern ROCKSDB_LIBRARY_API char* rocksdb_sst_file_metadata_get_directory( + rocksdb_sst_file_metadata_t* file_meta); + extern ROCKSDB_LIBRARY_API uint64_t rocksdb_sst_file_metadata_get_size(rocksdb_sst_file_metadata_t* file_meta); From c6c683a0ca090c8ae39468fb6f1a2d17ffbac2f8 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 10 Nov 2023 14:35:54 -0800 Subject: [PATCH 275/386] Remove the default force behavior for `EnableFileDeletion` API (#12001) Summary: Disabling file deletion can be critical for operations like making a backup, recovery from manifest IO error (for now). Ideally as long as there is one caller requesting file deletion disabled, it should be kept disabled until all callers agree to re-enable it. So this PR removes the default forcing behavior for the `EnableFileDeletion` API, and users need to explicitly pass the argument if they insisted on doing so knowing the consequence of what can be potentially disrupted. This PR removes the API's default argument value so it will cause breakage for all users that are relying on the default value, regardless of whether the forcing behavior is critical for them. When fixing this breakage, it's good to check if the forcing behavior is indeed needed and potential disruption is OK. This PR also makes unit test that do not need force behavior to do a regular enable file deletion. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12001 Reviewed By: ajkr Differential Revision: D51214683 Pulled By: jowlyzhang fbshipit-source-id: ca7b1ebf15c09eed00f954da2f75c00d2c6a97e4 --- db/db_filesnapshot.cc | 2 +- db/db_log_iter_test.cc | 2 +- db/db_properties_test.cc | 6 +++--- db/db_test2.cc | 2 +- db/db_wal_test.cc | 2 +- db/obsolete_files_test.cc | 2 +- include/rocksdb/db.h | 14 ++++++++++++-- java/src/main/java/org/rocksdb/RocksDB.java | 2 +- tools/ldb_cmd_test.cc | 2 +- .../enable_file_deletion_not_default_force.md | 2 ++ utilities/backup/backup_engine.cc | 2 +- utilities/blob_db/blob_db_impl.h | 2 +- utilities/blob_db/blob_db_test.cc | 2 +- utilities/checkpoint/checkpoint_impl.cc | 4 ++-- 14 files changed, 29 insertions(+), 17 deletions(-) create mode 100644 unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md diff --git a/db/db_filesnapshot.cc b/db/db_filesnapshot.cc index cb95a1676..40e7ac155 100644 --- a/db/db_filesnapshot.cc +++ b/db/db_filesnapshot.cc @@ -121,7 +121,7 @@ Status DBImpl::GetSortedWalFiles(VectorLogPtr& files) { // DisableFileDeletions / EnableFileDeletions not supported in read-only DB if (deletions_disabled.ok()) { - Status s2 = EnableFileDeletions(/*force*/ false); + Status s2 = EnableFileDeletions(/*force=*/false); assert(s2.ok()); s2.PermitUncheckedError(); } else { diff --git a/db/db_log_iter_test.cc b/db/db_log_iter_test.cc index 41f5fe4d1..87313971a 100644 --- a/db/db_log_iter_test.cc +++ b/db/db_log_iter_test.cc @@ -236,7 +236,7 @@ TEST_F(DBTestXactLogIterator, TransactionLogIteratorCorruptedLog) { ASSERT_OK(test::TruncateFile(env_, logfile_path, wal_files.front()->SizeFileBytes() / 2)); - ASSERT_OK(db_->EnableFileDeletions()); + ASSERT_OK(db_->EnableFileDeletions(/*force=*/false)); // Insert a new entry to a new log file ASSERT_OK(Put("key1025", DummyString(10))); diff --git a/db/db_properties_test.cc b/db/db_properties_test.cc index f3281ec74..e761f96d9 100644 --- a/db/db_properties_test.cc +++ b/db/db_properties_test.cc @@ -107,12 +107,12 @@ TEST_F(DBPropertiesTest, Empty) { dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("0", num); - ASSERT_OK(db_->EnableFileDeletions(false)); + ASSERT_OK(db_->EnableFileDeletions(/*force=*/false)); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("0", num); - ASSERT_OK(db_->EnableFileDeletions()); + ASSERT_OK(db_->EnableFileDeletions(/*force=*/true)); ASSERT_TRUE( dbfull()->GetProperty("rocksdb.is-file-deletions-enabled", &num)); ASSERT_EQ("1", num); @@ -1744,7 +1744,7 @@ TEST_F(DBPropertiesTest, SstFilesSize) { ASSERT_EQ(obsolete_sst_size, sst_size); // Let the obsolete files be deleted. - ASSERT_OK(db_->EnableFileDeletions()); + ASSERT_OK(db_->EnableFileDeletions(/*force=*/false)); ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kObsoleteSstFilesSize, &obsolete_sst_size)); ASSERT_EQ(obsolete_sst_size, 0); diff --git a/db/db_test2.cc b/db/db_test2.cc index fa9da48d2..e471685b2 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -4135,7 +4135,7 @@ TEST_F(DBTest2, LiveFilesOmitObsoleteFiles) { ASSERT_OK(env_->FileExists(LogFileName(dbname_, log_file->LogNumber()))); } - ASSERT_OK(db_->EnableFileDeletions()); + ASSERT_OK(db_->EnableFileDeletions(/*force=*/false)); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 7e7a89cdf..ee11c873a 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -1195,7 +1195,7 @@ TEST_F(DBWALTest, DISABLED_FullPurgePreservesLogPendingReuse) { ROCKSDB_NAMESPACE::port::Thread thread([&]() { TEST_SYNC_POINT( "DBWALTest::FullPurgePreservesLogPendingReuse:PreFullPurge"); - ASSERT_OK(db_->EnableFileDeletions(true)); + ASSERT_OK(db_->EnableFileDeletions(/*force=*/true)); TEST_SYNC_POINT( "DBWALTest::FullPurgePreservesLogPendingReuse:PostFullPurge"); }); diff --git a/db/obsolete_files_test.cc b/db/obsolete_files_test.cc index 03f38c09f..eec1486c1 100644 --- a/db/obsolete_files_test.cc +++ b/db/obsolete_files_test.cc @@ -165,7 +165,7 @@ TEST_F(ObsoleteFilesTest, DeleteObsoleteOptionsFile) { {{"paranoid_file_checks", "true"}})); } } - ASSERT_OK(dbfull()->EnableFileDeletions(true /* force */)); + ASSERT_OK(dbfull()->EnableFileDeletions(/*force=*/false)); Close(); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index d3f4d4349..c33776164 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1650,7 +1650,17 @@ class DB { virtual Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family, std::string* ts_low) = 0; - // Allow compactions to delete obsolete files. + // Enable deleting obsolete files. + // Usually users should only need to call this if they have previously called + // `DisableFileDeletions`. + // File deletions disabling and enabling is not controlled by a binary flag, + // instead it's represented as a counter to allow different callers to + // independently disable file deletion. Disabling file deletion can be + // critical for operations like making a backup. So the counter implementation + // makes the file deletion disabled as long as there is one caller requesting + // so, and only when every caller agrees to re-enable file deletion, it will + // be enabled. So be careful when calling this function with force = true as + // explained below. // If force == true, the call to EnableFileDeletions() will guarantee that // file deletions are enabled after the call, even if DisableFileDeletions() // was called multiple times before. @@ -1659,7 +1669,7 @@ class DB { // enabling the two methods to be called by two threads concurrently without // synchronization -- i.e., file deletions will be enabled only after both // threads call EnableFileDeletions() - virtual Status EnableFileDeletions(bool force = true) = 0; + virtual Status EnableFileDeletions(bool force) = 0; // Retrieves the creation time of the oldest file in the DB. // This API only works if max_open_files = -1, if it is not then diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 93c3cf1c1..54e95e6e8 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -4199,7 +4199,7 @@ public void disableFileDeletions() throws RocksDBException { } /** - *

    Allow compactions to delete obsolete files. + *

    Enable deleting obsolete files. * If force == true, the call to EnableFileDeletions() * will guarantee that file deletions are enabled after * the call, even if DisableFileDeletions() was called diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index 05d433dbe..65ccd23ce 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -269,7 +269,7 @@ class FileChecksumTestHelper { break; } } - EXPECT_OK(db_->EnableFileDeletions()); + EXPECT_OK(db_->EnableFileDeletions(/*force=*/false)); return cs; } }; diff --git a/unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md b/unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md new file mode 100644 index 000000000..e4d032613 --- /dev/null +++ b/unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md @@ -0,0 +1,2 @@ +Make the `EnableFileDeletion` API not default to force enabling. For users that rely on this default behavior and still +want to continue to use force enabling, they need to explicitly pass a `true` to `EnableFileDeletion`. \ No newline at end of file diff --git a/utilities/backup/backup_engine.cc b/utilities/backup/backup_engine.cc index e74218d45..31a733731 100644 --- a/utilities/backup/backup_engine.cc +++ b/utilities/backup/backup_engine.cc @@ -1583,7 +1583,7 @@ IOStatus BackupEngineImpl::CreateNewBackupWithMetadata( // we copied all the files, enable file deletions if (disabled.ok()) { // If we successfully disabled file deletions - db->EnableFileDeletions(false).PermitUncheckedError(); + db->EnableFileDeletions(/*force=*/false).PermitUncheckedError(); } auto backup_time = backup_env_->NowMicros() - start_backup; diff --git a/utilities/blob_db/blob_db_impl.h b/utilities/blob_db/blob_db_impl.h index 2d0afc639..d491108d3 100644 --- a/utilities/blob_db/blob_db_impl.h +++ b/utilities/blob_db/blob_db_impl.h @@ -490,7 +490,7 @@ class BlobDBImpl : public BlobDB { // Each call of DisableFileDeletions will increase disable_file_deletion_ // by 1. EnableFileDeletions will either decrease the count by 1 or reset - // it to zeor, depending on the force flag. + // it to zero, depending on the force flag. // // REQUIRES: access with delete_file_mutex_ held. int disable_file_deletions_ = 0; diff --git a/utilities/blob_db/blob_db_test.cc b/utilities/blob_db/blob_db_test.cc index 015ceb907..07f0cc89e 100644 --- a/utilities/blob_db/blob_db_test.cc +++ b/utilities/blob_db/blob_db_test.cc @@ -2037,7 +2037,7 @@ TEST_F(BlobDBTest, DisableFileDeletions) { ASSERT_EQ(1, blob_db_impl()->TEST_GetObsoleteFiles().size()); VerifyDB(data); // Call EnableFileDeletions a second time. - ASSERT_OK(blob_db_->EnableFileDeletions(false)); + ASSERT_OK(blob_db_->EnableFileDeletions(/*force=*/false)); blob_db_impl()->TEST_DeleteObsoleteFiles(); } // Regardless of value of `force`, file should be deleted by now. diff --git a/utilities/checkpoint/checkpoint_impl.cc b/utilities/checkpoint/checkpoint_impl.cc index 4a0cc7159..e1f094513 100644 --- a/utilities/checkpoint/checkpoint_impl.cc +++ b/utilities/checkpoint/checkpoint_impl.cc @@ -148,7 +148,7 @@ Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir, // we copied all the files, enable file deletions if (disabled_file_deletions) { - Status ss = db_->EnableFileDeletions(false); + Status ss = db_->EnableFileDeletions(/*force=*/false); assert(ss.ok()); ss.PermitUncheckedError(); } @@ -337,7 +337,7 @@ Status CheckpointImpl::ExportColumnFamily( nullptr, Temperature::kUnknown); } /*copy_file_cb*/); - const auto enable_status = db_->EnableFileDeletions(false /*force*/); + const auto enable_status = db_->EnableFileDeletions(/*force=*/false); if (s.ok()) { s = enable_status; } From 0ffc0c7db19efd2915be7156b37ac0cd9e1eea4c Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 10 Nov 2023 16:57:17 -0800 Subject: [PATCH 276/386] Allow `TtlMergeOperator` to wrap an unregistered `MergeOperator` (#12056) Summary: Followed mrambacher's first suggestion in https://github.com/facebook/rocksdb/pull/12044#issuecomment-1800706148. This change allows serializing a `TtlMergeOperator` that wraps an unregistered `MergeOperator`. Such a `TtlMergeOperator` cannot be loaded (validation will fail in `TtlMergeOperator::ValidateOptions()`), but that is OK for us currently. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12056 Reviewed By: hx235 Differential Revision: D51125097 Pulled By: ajkr fbshipit-source-id: 8ed3705e8d36ab473673b9198eea6db64397ed15 --- utilities/ttl/db_ttl_impl.cc | 6 +++--- utilities/ttl/ttl_test.cc | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/utilities/ttl/db_ttl_impl.cc b/utilities/ttl/db_ttl_impl.cc index f45a6f0c1..e4bff7826 100644 --- a/utilities/ttl/db_ttl_impl.cc +++ b/utilities/ttl/db_ttl_impl.cc @@ -19,9 +19,9 @@ namespace ROCKSDB_NAMESPACE { static std::unordered_map ttl_merge_op_type_info = - {{"user_operator", - OptionTypeInfo::AsCustomSharedPtr( - 0, OptionVerificationType::kByName, OptionTypeFlags::kNone)}}; + {{"user_operator", OptionTypeInfo::AsCustomSharedPtr( + 0, OptionVerificationType::kByNameAllowNull, + OptionTypeFlags::kNone)}}; TtlMergeOperator::TtlMergeOperator( const std::shared_ptr& merge_op, SystemClock* clock) diff --git a/utilities/ttl/ttl_test.cc b/utilities/ttl/ttl_test.cc index 723de25e0..da1d2d0da 100644 --- a/utilities/ttl/ttl_test.cc +++ b/utilities/ttl/ttl_test.cc @@ -403,8 +403,10 @@ class TtlTest : public testing::Test { DBWithTTL* db_ttl_; std::unique_ptr env_; - private: + protected: Options options_; + + private: KVMap kvmap_; KVMap::iterator kv_it_; const std::string kNewValue_ = "new_value"; @@ -611,6 +613,17 @@ TEST_F(TtlTest, CompactionFilter) { CloseTtl(); } +TEST_F(TtlTest, UnregisteredMergeOperator) { + class UnregisteredMergeOperator : public MergeOperator { + public: + const char* Name() const override { return "UnregisteredMergeOperator"; } + }; + options_.fail_if_options_file_error = true; + options_.merge_operator = std::make_shared(); + OpenTtl(); + CloseTtl(); +} + // Insert some key-values which KeyMayExist should be able to get and check that // values returned are fine TEST_F(TtlTest, KeyMayExist) { @@ -901,6 +914,14 @@ TEST_F(TtlOptionsTest, LoadTtlMergeOperator) { std::shared_ptr copy; ASSERT_OK(MergeOperator::CreateFromString(config_options_, opts_str, ©)); ASSERT_TRUE(mo->AreEquivalent(config_options_, copy.get(), &mismatch)); + + // An unregistered user_operator will be null, which is not supported by the + // `TtlMergeOperator` implementation. + ASSERT_OK(MergeOperator::CreateFromString( + config_options_, "id=TtlMergeOperator; user_operator=unknown", &mo)); + ASSERT_NE(mo.get(), nullptr); + ASSERT_STREQ(mo->Name(), TtlMergeOperator::kClassName()); + ASSERT_NOK(mo->ValidateOptions(DBOptions(), ColumnFamilyOptions())); } } // namespace ROCKSDB_NAMESPACE From 509947ce2c970d296fd0d868455d560c7f778a57 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Sat, 11 Nov 2023 08:11:11 -0800 Subject: [PATCH 277/386] Quarantine files in a limbo state after a manifest error (#12030) Summary: Part of the procedures to handle manifest IO error is to disable file deletion in case some files in limbo state get deleted prematurely. This is not ideal because: 1) not all the VersionEdits whose commit encounter such an error contain updates for files, disabling file deletion sometimes are not necessary. 2) `EnableFileDeletion` has a force mode that could make other threads accidentally disrupt this procedure in recovery. 3) Disabling file deletion as a whole is also not as efficient as more precisely tracking impacted files from being prematurely deleted. This PR replaces this mechanism with tracking such files and quarantine them from being deleted in `ErrorHandler`. These are the types of files being actively tracked in quarantine in this PR: 1) new table files and blob files from a background job 2) old manifest file whose immediately following new manifest file's CURRENT file creation gets into unclear state. Current handling is not sufficient to make sure the old manifest file is kept in case it's needed. Note that WAL logs are not part of the quarantine because `min_log_number_to_keep` is a safe mechanism and it's only updated after successful manifest commits so it can prevent this premature deletion issue from happening. We track these files' file numbers because they share the same file number space. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12030 Test Plan: Modified existing unit tests Reviewed By: ajkr Differential Revision: D51036774 Pulled By: jowlyzhang fbshipit-source-id: 84ef26271fbbc888ef70da5c40fe843bd7038716 --- db/compaction/compaction_job_test.cc | 8 +++-- db/db_basic_test.cc | 7 +++++ db/db_impl/db_impl.cc | 7 ++--- db/db_impl/db_impl.h | 5 +-- db/db_impl/db_impl_debug.cc | 4 +++ db/db_impl/db_impl_files.cc | 15 +++++---- db/db_wal_test.cc | 5 +-- db/error_handler.cc | 47 +++++++++++++++------------- db/error_handler.h | 25 ++++++++++++--- db/error_handler_fs_test.cc | 15 +++++++++ db/flush_job_test.cc | 3 +- db/job_context.h | 11 +++++++ db/memtable_list_test.cc | 10 +++--- db/repair.cc | 3 +- db/version_edit.h | 24 ++++++++++++++ db/version_set.cc | 29 ++++++++++++++--- db/version_set.h | 7 ++++- db/version_set_test.cc | 30 ++++++++++++------ db/version_util.h | 5 +-- db/wal_manager_test.cc | 3 +- tools/ldb_cmd.cc | 15 +++++---- tools/ldb_cmd_test.cc | 2 +- 22 files changed, 201 insertions(+), 79 deletions(-) diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index eccd57701..c4e985c8b 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -215,8 +215,9 @@ class CompactionJobTestBase : public testing::Test { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "", - /*daily_offpeak_time_utc*/ "")), + /*io_tracer=*/nullptr, /*db_id=*/"", /*db_session_id=*/"", + /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)), shutting_down_(false), mock_table_factory_(new mock::MockTableFactory()), error_handler_(nullptr, db_options_, &mutex_), @@ -545,7 +546,8 @@ class CompactionJobTestBase : public testing::Test { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); compaction_job_stats_.Reset(); ASSERT_OK(SetIdentityFile(env_, dbname_)); diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 2d6835dcc..0c8ae6033 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -4415,6 +4415,8 @@ TEST_F(DBBasicTest, ManifestWriteFailure) { options.create_if_missing = true; options.disable_auto_compactions = true; options.env = env_; + options.enable_blob_files = true; + options.blob_file_size = 0; DestroyAndReopen(options); ASSERT_OK(Put("foo", "bar")); ASSERT_OK(Flush()); @@ -4435,6 +4437,11 @@ TEST_F(DBBasicTest, ManifestWriteFailure) { SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->EnableProcessing(); Reopen(options); + // The IO error was a mocked one from the `AfterSyncManifest` callback. The + // Flush's VersionEdit actually made it into the Manifest. So these keys can + // be read back. Read them to check all live sst files and blob files. + ASSERT_EQ("bar", Get("foo")); + ASSERT_EQ("value", Get("key")); } TEST_F(DBBasicTest, DestroyDefaultCfHandle) { diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a36eda16f..55e2355c8 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -279,7 +279,8 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, versions_.reset(new VersionSet( dbname_, &immutable_db_options_, file_options_, table_cache_.get(), write_buffer_manager_, &write_controller_, &block_cache_tracer_, - io_tracer_, db_id_, db_session_id_, options.daily_offpeak_time_utc)); + io_tracer_, db_id_, db_session_id_, options.daily_offpeak_time_utc, + &error_handler_)); column_family_memtables_.reset( new ColumnFamilyMemTablesImpl(versions_->GetColumnFamilySet())); @@ -359,10 +360,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { if (io_s.IsIOError()) { // If resuming from IOError resulted from MANIFEST write, then assert // that we must have already set the MANIFEST writer to nullptr during - // clean-up phase MANIFEST writing. We must have also disabled file - // deletions. + // clean-up phase MANIFEST writing. assert(!versions_->descriptor_log_); - assert(!IsFileDeletionsEnabled()); // Since we are trying to recover from MANIFEST write error, we need to // switch to a new MANIFEST anyway. The old MANIFEST can be corrupted. // Therefore, force writing a dummy version edit because we do not know diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 86d81a9dc..72927bd6d 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1190,6 +1190,7 @@ class DBImpl : public DB { size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; void TEST_WaitForPeriodicTaskRun(std::function callback) const; SeqnoToTimeMapping TEST_GetSeqnoToTimeMapping() const; + const autovector& TEST_GetFilesToQuarantine() const; size_t TEST_EstimateInMemoryStatsHistorySize() const; uint64_t TEST_GetCurrentLogNumber() const { @@ -2380,10 +2381,6 @@ class DBImpl : public DB { Status DisableFileDeletionsWithLock(); - // Safely decrease `disable_delete_obsolete_files_` by one while holding lock - // and return its remaning value. - int EnableFileDeletionsWithLock(); - Status IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, std::string ts_low); diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index 8be960c7b..17050e465 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -309,6 +309,10 @@ SeqnoToTimeMapping DBImpl::TEST_GetSeqnoToTimeMapping() const { return seqno_to_time_mapping_; } +const autovector& DBImpl::TEST_GetFilesToQuarantine() const { + InstrumentedMutexLock l(&mutex_); + return error_handler_.GetFilesToQuarantine(); +} size_t DBImpl::TEST_EstimateInMemoryStatsHistorySize() const { InstrumentedMutexLock l(&const_cast(this)->stats_history_mutex_); diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index f1e8813f0..bd4879647 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -100,14 +100,6 @@ Status DBImpl::EnableFileDeletions(bool force) { return Status::OK(); } -int DBImpl::EnableFileDeletionsWithLock() { - mutex_.AssertHeld(); - // In case others have called EnableFileDeletions(true /* force */) in between - disable_delete_obsolete_files_ = - std::max(0, disable_delete_obsolete_files_ - 1); - return disable_delete_obsolete_files_; -} - bool DBImpl::IsFileDeletionsEnabled() const { return 0 == disable_delete_obsolete_files_; } @@ -154,6 +146,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force, // mutex_ cannot be released. Otherwise, we might see no min_pending_output // here but later find newer generated unfinalized files while scanning. job_context->min_pending_output = MinObsoleteSstNumberToKeep(); + job_context->files_to_quarantine = error_handler_.GetFilesToQuarantine(); // Get obsolete files. This function will also update the list of // pending files in VersionSet(). @@ -427,6 +420,8 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) { state.blob_live.end()); std::unordered_set log_recycle_files_set( state.log_recycle_files.begin(), state.log_recycle_files.end()); + std::unordered_set quarantine_files_set( + state.files_to_quarantine.begin(), state.files_to_quarantine.end()); auto candidate_files = state.full_scan_candidate_files; candidate_files.reserve( @@ -530,6 +525,10 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) { continue; } + if (quarantine_files_set.find(number) != quarantine_files_set.end()) { + continue; + } + bool keep = true; switch (type) { case kWalFile: diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index ee11c873a..fbc01131e 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -1541,8 +1541,9 @@ class RecoveryTestHelper { test->dbname_, &db_options, file_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "", - options.daily_offpeak_time_utc)); + /*io_tracer=*/nullptr, /*db_id=*/"", /*db_session_id=*/"", + options.daily_offpeak_time_utc, + /*error_handler=*/nullptr)); wal_manager.reset( new WalManager(db_options, file_options, /*io_tracer=*/nullptr)); diff --git a/db/error_handler.cc b/db/error_handler.cc index 6797371b9..f43261001 100644 --- a/db/error_handler.cc +++ b/db/error_handler.cc @@ -396,15 +396,6 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s", bg_io_err.ToString().c_str()); - if (!recovery_disabled_file_deletion_ && - (BackgroundErrorReason::kManifestWrite == reason || - BackgroundErrorReason::kManifestWriteNoWAL == reason)) { - // Always returns ok - ROCKS_LOG_INFO(db_options_.info_log, "Disabling File Deletions"); - db_->DisableFileDeletionsWithLock().PermitUncheckedError(); - recovery_disabled_file_deletion_ = true; - } - Status new_bg_io_err = bg_io_err; DBRecoverContext context; if (bg_io_err.GetScope() != IOStatus::IOErrorScope::kIOErrorScopeFile && @@ -505,6 +496,31 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status, } } +void ErrorHandler::AddFilesToQuarantine( + autovector*> files_to_quarantine) { + db_mutex_->AssertHeld(); + std::ostringstream quarantine_files_oss; + bool is_first_one = true; + for (const auto* files : files_to_quarantine) { + assert(files); + for (uint64_t file_number : *files) { + files_to_quarantine_.push_back(file_number); + quarantine_files_oss << (is_first_one ? "" : ", ") << file_number; + is_first_one = false; + } + } + ROCKS_LOG_INFO(db_options_.info_log, + "ErrorHandler: added file numbers %s to quarantine.\n", + quarantine_files_oss.str().c_str()); +} + +void ErrorHandler::ClearFilesToQuarantine() { + db_mutex_->AssertHeld(); + files_to_quarantine_.clear(); + ROCKS_LOG_INFO(db_options_.info_log, + "ErrorHandler: cleared files in quarantine.\n"); +} + Status ErrorHandler::OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery) { if (bg_error.severity() >= Status::Severity::kFatalError) { @@ -552,6 +568,7 @@ Status ErrorHandler::ClearBGError() { // Signal that recovery succeeded if (recovery_error_.ok()) { + assert(files_to_quarantine_.empty()); Status old_bg_error = bg_error_; // old_bg_error is only for notifying listeners, so may not be checked old_bg_error.PermitUncheckedError(); @@ -563,18 +580,6 @@ Status ErrorHandler::ClearBGError() { recovery_error_.PermitUncheckedError(); recovery_in_prog_ = false; soft_error_no_bg_work_ = false; - if (recovery_disabled_file_deletion_) { - recovery_disabled_file_deletion_ = false; - int remain_counter = db_->EnableFileDeletionsWithLock(); - if (remain_counter == 0) { - ROCKS_LOG_INFO(db_options_.info_log, "File Deletions Enabled"); - } else { - ROCKS_LOG_WARN( - db_options_.info_log, - "File Deletions Enable, but not really enabled. Counter: %d", - remain_counter); - } - } EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, old_bg_error, bg_error_, db_mutex_); } diff --git a/db/error_handler.h b/db/error_handler.h index ace28962d..1168d91fa 100644 --- a/db/error_handler.h +++ b/db/error_handler.h @@ -4,11 +4,14 @@ // (found in the LICENSE.Apache file in the root directory). #pragma once +#include + #include "monitoring/instrumented_mutex.h" #include "options/db_options.h" #include "rocksdb/io_status.h" #include "rocksdb/listener.h" #include "rocksdb/status.h" +#include "util/autovector.h" namespace ROCKSDB_NAMESPACE { @@ -42,8 +45,7 @@ class ErrorHandler { recovery_in_prog_(false), soft_error_no_bg_work_(false), is_db_stopped_(false), - bg_error_stats_(db_options.statistics), - recovery_disabled_file_deletion_(false) { + bg_error_stats_(db_options.statistics) { // Clear the checked flag for uninitialized errors bg_error_.PermitUncheckedError(); recovery_error_.PermitUncheckedError(); @@ -81,6 +83,16 @@ class ErrorHandler { void EndAutoRecovery(); + void AddFilesToQuarantine( + autovector*> files_to_quarantine); + + const autovector& GetFilesToQuarantine() const { + db_mutex_->AssertHeld(); + return files_to_quarantine_; + } + + void ClearFilesToQuarantine(); + private: DBImpl* db_; const ImmutableDBOptions& db_options_; @@ -109,9 +121,12 @@ class ErrorHandler { // The pointer of DB statistics. std::shared_ptr bg_error_stats_; - // Tracks whether the recovery has disabled file deletion. This boolean flag - // is updated while holding db mutex. - bool recovery_disabled_file_deletion_; + // During recovery from manifest IO errors, files whose VersionEdits entries + // could be in an ambiguous state are quarantined and file deletion refrain + // from deleting them. Successful recovery will clear this vector. Files are + // added to this vector while DB mutex was locked, this data structure is + // unsorted. + autovector files_to_quarantine_; const Status& HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason); diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 56d2fcfc0..2d33a7a69 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -661,6 +661,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteError) { SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); fault_fs_->SetFilesystemActive(true); @@ -669,6 +670,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); @@ -704,6 +706,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); fault_fs_->SetFilesystemActive(true); @@ -712,6 +715,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); @@ -749,6 +753,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) { [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); @@ -758,6 +763,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); @@ -795,6 +801,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) { SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); fault_fs_->SetFilesystemActive(true); @@ -803,6 +810,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); @@ -836,11 +844,13 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); fault_fs_->SetFilesystemActive(true); // This Resume() will attempt to create a new manifest file and fail again s = dbfull()->Resume(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); fault_fs_->SetFilesystemActive(true); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); @@ -851,6 +861,7 @@ TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); @@ -917,6 +928,7 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteError) { fault_fs_->SetFilesystemActive(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); TEST_SYNC_POINT("CompactionManifestWriteError:1"); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); TEST_SYNC_POINT("CompactionManifestWriteError:2"); s = dbfull()->TEST_WaitForCompact(); @@ -925,6 +937,7 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); ASSERT_EQ("val", Get(Key(1))); @@ -983,6 +996,7 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableError) { ASSERT_OK(s); TEST_SYNC_POINT("CompactionManifestWriteError:0"); + ASSERT_FALSE(dbfull()->TEST_GetFilesToQuarantine().empty()); TEST_SYNC_POINT("CompactionManifestWriteError:1"); s = dbfull()->TEST_WaitForCompact(); @@ -996,6 +1010,7 @@ TEST_F(DBErrorHandlingFSTest, CompactionManifestWriteRetryableError) { new_manifest = GetManifestNameFromLiveFiles(); ASSERT_NE(new_manifest, old_manifest); + ASSERT_TRUE(dbfull()->TEST_GetFilesToQuarantine().empty()); Reopen(options); ASSERT_EQ("val", Get(Key(0))); diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 9a626eac8..21d1571a0 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -131,7 +131,8 @@ class FlushJobTestBase : public testing::Test { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); EXPECT_OK(versions_->Recover(column_families, false)); } diff --git a/db/job_context.h b/db/job_context.h index a550ba2d4..48728f48d 100644 --- a/db/job_context.h +++ b/db/job_context.h @@ -15,6 +15,7 @@ #include "db/column_family.h" #include "db/log_writer.h" #include "db/version_set.h" +#include "util/autovector.h" namespace ROCKSDB_NAMESPACE { @@ -170,6 +171,16 @@ struct JobContext { // will be reused later std::vector log_recycle_files; + // Files quarantined from deletion. This list contains file numbers for files + // that are in an ambiguous states. This includes newly generated SST files + // and blob files from flush and compaction job whose VersionEdits' persist + // state in Manifest are unclear. An old manifest file whose immediately + // following new manifest file's CURRENT file creation is in an unclear state. + // WAL logs don't have this premature deletion risk since + // min_log_number_to_keep is only updated after successful manifest commits. + // So this data structure doesn't track log files. + autovector files_to_quarantine; + // a list of manifest files that we need to delete std::vector manifest_delete_files; diff --git a/db/memtable_list_test.cc b/db/memtable_list_test.cc index 6292f46e4..9a5b7557f 100644 --- a/db/memtable_list_test.cc +++ b/db/memtable_list_test.cc @@ -106,8 +106,9 @@ class MemTableListTest : public testing::Test { VersionSet versions(dbname, &immutable_db_options, env_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", - /*db_session_id*/ "", /*daily_offpeak_time_utc*/ ""); + /*io_tracer=*/nullptr, /*db_id=*/"", + /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr); std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); cf_descs.emplace_back("one", ColumnFamilyOptions()); @@ -157,8 +158,9 @@ class MemTableListTest : public testing::Test { VersionSet versions(dbname, &immutable_db_options, env_options, table_cache.get(), &write_buffer_manager, &write_controller, /*block_cache_tracer=*/nullptr, - /*io_tracer=*/nullptr, /*db_id*/ "", - /*db_session_id*/ "", /*daily_offpeak_time_utc*/ ""); + /*io_tracer=*/nullptr, /*db_id=*/"", + /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr); std::vector cf_descs; cf_descs.emplace_back(kDefaultColumnFamilyName, ColumnFamilyOptions()); cf_descs.emplace_back("one", ColumnFamilyOptions()); diff --git a/db/repair.cc b/db/repair.cc index cc77ef8e6..ef21f7ea6 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -122,7 +122,8 @@ class Repairer { vset_(dbname_, &immutable_db_options_, file_options_, raw_table_cache_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id=*/"", db_session_id_, db_options.daily_offpeak_time_utc), + /*db_id=*/"", db_session_id_, db_options.daily_offpeak_time_utc, + /*error_handler=*/nullptr), next_file_number_(1), db_lock_(nullptr), closed_(false) { diff --git a/db/version_edit.h b/db/version_edit.h index 80792e496..8e14e76da 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -492,6 +492,7 @@ class VersionEdit { file_checksum_func_name, unique_id, compensated_range_deletion_size, tail_size, user_defined_timestamps_persisted)); + files_to_quarantine_.push_back(file); if (!HasLastSequence() || largest_seqno > GetLastSequence()) { SetLastSequence(largest_seqno); } @@ -500,6 +501,7 @@ class VersionEdit { void AddFile(int level, const FileMetaData& f) { assert(f.fd.smallest_seqno <= f.fd.largest_seqno); new_files_.emplace_back(level, f); + files_to_quarantine_.push_back(f.fd.GetNumber()); if (!HasLastSequence() || f.fd.largest_seqno > GetLastSequence()) { SetLastSequence(f.fd.largest_seqno); } @@ -536,10 +538,13 @@ class VersionEdit { blob_file_additions_.emplace_back( blob_file_number, total_blob_count, total_blob_bytes, std::move(checksum_method), std::move(checksum_value)); + files_to_quarantine_.push_back(blob_file_number); } void AddBlobFile(BlobFileAddition blob_file_addition) { blob_file_additions_.emplace_back(std::move(blob_file_addition)); + files_to_quarantine_.push_back( + blob_file_additions_.back().GetBlobFileNumber()); } // Retrieve all the blob files added. @@ -551,6 +556,11 @@ class VersionEdit { void SetBlobFileAdditions(BlobFileAdditions blob_file_additions) { assert(blob_file_additions_.empty()); blob_file_additions_ = std::move(blob_file_additions); + std::for_each( + blob_file_additions_.begin(), blob_file_additions_.end(), + [&](const BlobFileAddition& blob_file) { + files_to_quarantine_.push_back(blob_file.GetBlobFileNumber()); + }); } // Add garbage for an existing blob file. Note: intentionally broken English @@ -679,6 +689,10 @@ class VersionEdit { std::optional ts_sz = std::nullopt) const; Status DecodeFrom(const Slice& src); + const autovector* GetFilesToQuarantineIfCommitFail() const { + return &files_to_quarantine_; + } + std::string DebugString(bool hex_key = false) const; std::string DebugJSON(int edit_num, bool hex_key = false) const; @@ -740,6 +754,16 @@ class VersionEdit { std::string full_history_ts_low_; bool persist_user_defined_timestamps_ = true; + + // Newly created table files and blob files are eligible for deletion if they + // are not registered as live files after the background jobs creating them + // have finished. In case committing the VersionEdit containing such changes + // to manifest encountered an error, we want to quarantine these files from + // deletion to avoid prematurely deleting files that ended up getting recorded + // in Manifest as live files. + // Since table files and blob files share the same file number space, we just + // record the file number here. + autovector files_to_quarantine_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/version_set.cc b/db/version_set.cc index 0055d3968..329322ccb 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5061,7 +5061,8 @@ VersionSet::VersionSet( WriteBufferManager* write_buffer_manager, WriteController* write_controller, BlockCacheTracer* const block_cache_tracer, const std::shared_ptr& io_tracer, const std::string& db_id, - const std::string& db_session_id, const std::string& daily_offpeak_time_utc) + const std::string& db_session_id, const std::string& daily_offpeak_time_utc, + ErrorHandler* const error_handler) : column_family_set_(new ColumnFamilySet( dbname, _db_options, storage_options, table_cache, write_buffer_manager, write_controller, block_cache_tracer, io_tracer, @@ -5087,7 +5088,8 @@ VersionSet::VersionSet( block_cache_tracer_(block_cache_tracer), io_tracer_(io_tracer), db_session_id_(db_session_id), - offpeak_time_option_(OffpeakTimeOption(daily_offpeak_time_utc)) {} + offpeak_time_option_(OffpeakTimeOption(daily_offpeak_time_utc)), + error_handler_(error_handler) {} VersionSet::~VersionSet() { // we need to delete column_family_set_ because its destructor depends on @@ -5186,6 +5188,8 @@ Status VersionSet::ProcessManifestWrites( autovector versions; autovector mutable_cf_options_ptrs; std::vector> builder_guards; + autovector*> files_to_quarantine_if_commit_fail; + autovector limbo_descriptor_log_file_number; // Tracking `max_last_sequence` is needed to ensure we write // `VersionEdit::last_sequence_`s in non-decreasing order according to the @@ -5469,6 +5473,8 @@ Status VersionSet::ProcessManifestWrites( assert(batch_edits.size() == batch_edits_ts_sz.size()); for (size_t bidx = 0; bidx < batch_edits.size(); bidx++) { auto& e = batch_edits[bidx]; + files_to_quarantine_if_commit_fail.push_back( + e->GetFilesToQuarantineIfCommitFail()); std::string record; if (!e->EncodeTo(&record, batch_edits_ts_sz[bidx])) { s = Status::Corruption("Unable to encode VersionEdit:" + @@ -5518,6 +5524,11 @@ Status VersionSet::ProcessManifestWrites( dir_contains_current_file); if (!io_s.ok()) { s = io_s; + // Quarantine old manifest file in case new manifest file's CURRENT file + // wasn't created successfully and the old manifest is needed. + limbo_descriptor_log_file_number.push_back(manifest_file_number_); + files_to_quarantine_if_commit_fail.push_back( + &limbo_descriptor_log_file_number); } } @@ -5554,9 +5565,16 @@ Status VersionSet::ProcessManifestWrites( if (!io_s.ok()) { if (io_status_.ok()) { io_status_ = io_s; + if (error_handler_) { + error_handler_->AddFilesToQuarantine( + files_to_quarantine_if_commit_fail); + } } } else if (!io_status_.ok()) { io_status_ = io_s; + if (error_handler_) { + error_handler_->ClearFilesToQuarantine(); + } } // Append the old manifest file to the obsolete_manifest_ list to be deleted @@ -6214,7 +6232,8 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname, VersionSet versions(dbname, &db_options, file_options, tc.get(), &wb, &wc, nullptr /*BlockCacheTracer*/, nullptr /*IOTracer*/, /*db_id*/ "", - /*db_session_id*/ "", options->daily_offpeak_time_utc); + /*db_session_id*/ "", options->daily_offpeak_time_utc, + /*error_handler_*/ nullptr); Status status; std::vector dummy; @@ -7255,8 +7274,8 @@ ReactiveVersionSet::ReactiveVersionSet( : VersionSet(dbname, _db_options, _file_options, table_cache, write_buffer_manager, write_controller, /*block_cache_tracer=*/nullptr, io_tracer, /*db_id*/ "", - /*db_session_id*/ "", - /*daily_offpeak_time_utc*/ "") {} + /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "", + /*error_handler=*/nullptr) {} ReactiveVersionSet::~ReactiveVersionSet() {} diff --git a/db/version_set.h b/db/version_set.h index 9aba238a4..6a6522ec9 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -38,6 +38,7 @@ #include "db/compaction/compaction.h" #include "db/compaction/compaction_picker.h" #include "db/dbformat.h" +#include "db/error_handler.h" #include "db/file_indexer.h" #include "db/log_reader.h" #include "db/range_del_aggregator.h" @@ -1152,7 +1153,8 @@ class VersionSet { BlockCacheTracer* const block_cache_tracer, const std::shared_ptr& io_tracer, const std::string& db_id, const std::string& db_session_id, - const std::string& daily_offpeak_time_utc); + const std::string& daily_offpeak_time_utc, + ErrorHandler* const error_handler); // No copying allowed VersionSet(const VersionSet&) = delete; void operator=(const VersionSet&) = delete; @@ -1668,6 +1670,9 @@ class VersionSet { // Off-peak time option used for compaction scoring OffpeakTimeOption offpeak_time_option_; + // Pointer to the DB's ErrorHandler. + ErrorHandler* const error_handler_; + private: // REQUIRES db mutex at beginning. may release and re-acquire db mutex Status ProcessManifestWrites(std::deque& writers, diff --git a/db/version_set_test.cc b/db/version_set_test.cc index aaf331c57..5eb910c9f 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1205,7 +1205,8 @@ class VersionSetTestBase { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); reactive_versions_ = std::make_shared( dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, nullptr); @@ -1309,7 +1310,8 @@ class VersionSetTestBase { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); EXPECT_OK(versions_->Recover(column_families_, false)); } @@ -1821,7 +1823,8 @@ TEST_F(VersionSetTest, WalAddition) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(new_versions->Recover(column_families_, /*read_only=*/false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -1888,7 +1891,8 @@ TEST_F(VersionSetTest, WalCloseWithoutSync) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 2); @@ -1941,7 +1945,8 @@ TEST_F(VersionSetTest, WalDeletion) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -1979,7 +1984,8 @@ TEST_F(VersionSetTest, WalDeletion) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -2099,7 +2105,8 @@ TEST_F(VersionSetTest, DeleteWalsBeforeNonExistingWalNumber) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 1); @@ -2135,7 +2142,8 @@ TEST_F(VersionSetTest, DeleteAllWals) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(new_versions->Recover(column_families_, false)); const auto& wals = new_versions->GetWalSet().GetWals(); ASSERT_EQ(wals.size(), 0); @@ -2177,7 +2185,8 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); std::string db_id; ASSERT_OK( new_versions->Recover(column_families_, /*read_only=*/false, &db_id)); @@ -2335,7 +2344,8 @@ class VersionSetWithTimestampTest : public VersionSetTest { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); ASSERT_OK(vset->Recover(column_families_, /*read_only=*/false, /*db_id=*/nullptr)); for (auto* cfd : *(vset->GetColumnFamilySet())) { diff --git a/db/version_util.h b/db/version_util.h index f6042fa03..acb27749b 100644 --- a/db/version_util.h +++ b/db/version_util.h @@ -25,8 +25,9 @@ class OfflineManifestWriter { options.table_cache_numshardbits)), versions_(db_path, &immutable_db_options_, sopt_, tc_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", - options.daily_offpeak_time_utc) {} + /*db_id=*/"", /*db_session_id=*/"", + options.daily_offpeak_time_utc, + /*error_handler=*/nullptr) {} Status Recover(const std::vector& column_families) { return versions_.Recover(column_families, /*read_only*/ false, diff --git a/db/wal_manager_test.cc b/db/wal_manager_test.cc index abd7cd7ef..0ead57ae8 100644 --- a/db/wal_manager_test.cc +++ b/db/wal_manager_test.cc @@ -54,7 +54,8 @@ class WalManagerTest : public testing::Test { dbname_, &db_options_, env_options_, table_cache_.get(), &write_buffer_manager_, &write_controller_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "")); + /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr)); wal_manager_.reset( new WalManager(db_options_, env_options_, nullptr /*IOTracer*/)); diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index b467ab6d3..0e983bd8a 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -1364,8 +1364,9 @@ void DumpManifestFile(Options options, std::string file, bool verbose, bool hex, ImmutableDBOptions immutable_db_options(options); VersionSet versions(dbname, &immutable_db_options, sopt, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", - options.daily_offpeak_time_utc); + /*db_id=*/"", /*db_session_id=*/"", + options.daily_offpeak_time_utc, + /*error_handler=*/nullptr); Status s = versions.DumpManifest(options, file, verbose, hex, json, cf_descs); if (!s.ok()) { fprintf(stderr, "Error in processing file %s %s\n", file.c_str(), @@ -1508,8 +1509,9 @@ Status GetLiveFilesChecksumInfoFromVersionSet(Options options, ImmutableDBOptions immutable_db_options(options); VersionSet versions(dbname, &immutable_db_options, sopt, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", - options.daily_offpeak_time_utc); + /*db_id=*/"", /*db_session_id=*/"", + options.daily_offpeak_time_utc, + /*error_handler=*/nullptr); std::vector cf_name_list; s = versions.ListColumnFamilies(&cf_name_list, db_path, immutable_db_options.fs.get()); @@ -2330,8 +2332,9 @@ Status ReduceDBLevelsCommand::GetOldNumOfLevels(Options& opt, int* levels) { WriteBufferManager wb(opt.db_write_buffer_size); VersionSet versions(db_path_, &db_options, soptions, tc.get(), &wb, &wc, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id*/ "", /*db_session_id*/ "", - opt.daily_offpeak_time_utc); + /*db_id=*/"", /*db_session_id=*/"", + opt.daily_offpeak_time_utc, + /*error_handler=*/nullptr); std::vector dummy; ColumnFamilyDescriptor dummy_descriptor(kDefaultColumnFamilyName, ColumnFamilyOptions(opt)); diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index 65ccd23ce..465d1eb31 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -208,7 +208,7 @@ class FileChecksumTestHelper { ImmutableDBOptions immutable_db_options(options_); VersionSet versions(dbname_, &immutable_db_options, sopt, tc.get(), &wb, &wc, nullptr, nullptr, "", "", - options_.daily_offpeak_time_utc); + options_.daily_offpeak_time_utc, nullptr); std::vector cf_name_list; Status s; s = versions.ListColumnFamilies(&cf_name_list, dbname_, From 9fb6851918e12f5c14dd017803d93e2ad6f592ad Mon Sep 17 00:00:00 2001 From: brodyhuang Date: Mon, 13 Nov 2023 12:09:58 -0800 Subject: [PATCH 278/386] fix(StackableDB): Resume API (#12060) Summary: When I call `DBWithTTLImpl::Resume()`, it returns `Status::NotSupported`. Did `StackableDB` miss this API ? Thanks ! Pull Request resolved: https://github.com/facebook/rocksdb/pull/12060 Reviewed By: jaykorean Differential Revision: D51202742 Pulled By: ajkr fbshipit-source-id: 5e01a54a42efd81fd57b3c992b9af8bc45c59c9c --- include/rocksdb/utilities/stackable_db.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h index e377d7ed2..86e1477a4 100644 --- a/include/rocksdb/utilities/stackable_db.h +++ b/include/rocksdb/utilities/stackable_db.h @@ -597,6 +597,8 @@ class StackableDB : public DB { return db_->TryCatchUpWithPrimary(); } + virtual Status Resume() override { return db_->Resume(); } + protected: DB* db_; std::shared_ptr shared_db_ptr_; From b3ffca0e298225631ed131805acb8b1335130413 Mon Sep 17 00:00:00 2001 From: leipeng Date: Mon, 13 Nov 2023 12:48:44 -0800 Subject: [PATCH 279/386] DBImpl::DelayWrite: Remove bad WRITE_STALL histogram (#12067) Summary: When delay didn't happen, histogram WRITE_STALL is still recorded, and ticker STALL_MICROS is not recorded. This is a bug, neither WRITE_STALL or STALL_MICROS should not be recorded when delay did not happen. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12067 Reviewed By: cbi42 Differential Revision: D51263133 Pulled By: ajkr fbshipit-source-id: bd82d8328fe088d613991966e83854afdabc6a25 --- db/db_impl/db_impl_write.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index ee2c24046..f72b9e481 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1825,11 +1825,9 @@ uint64_t DBImpl::GetMaxTotalWalSize() const { Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread, const WriteOptions& write_options) { mutex_.AssertHeld(); - uint64_t time_delayed = 0; + uint64_t start_time = 0; bool delayed = false; { - StopWatch sw(immutable_db_options_.clock, stats_, WRITE_STALL, - Histograms::HISTOGRAM_ENUM_MAX, &time_delayed); // To avoid parallel timed delays (bad throttling), only support them // on the primary write queue. uint64_t delay; @@ -1845,6 +1843,7 @@ Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread, if (write_options.no_slowdown) { return Status::Incomplete("Write stall"); } + start_time = immutable_db_options_.clock->NowMicros(); TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep"); // Notify write_thread about the stall so it can setup a barrier and @@ -1857,7 +1856,7 @@ Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread, // (slightly longer because WriteController minimum delay is 1ms, in // case of sleep imprecision, rounding, etc.) const uint64_t kDelayInterval = 1001; - uint64_t stall_end = sw.start_time() + delay; + uint64_t stall_end = start_time + delay; while (write_controller_.NeedsDelay()) { if (immutable_db_options_.clock->NowMicros() >= stall_end) { // We already delayed this write `delay` microseconds @@ -1898,9 +1897,11 @@ Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread, } assert(!delayed || !write_options.no_slowdown); if (delayed) { + auto time_delayed = immutable_db_options_.clock->NowMicros() - start_time; default_cf_internal_stats_->AddDBStats( InternalStats::kIntStatsWriteStallMicros, time_delayed); RecordTick(stats_, STALL_MICROS, time_delayed); + RecordInHistogram(stats_, WRITE_STALL, time_delayed); } // If DB is not in read-only mode and write_controller is not stopping From 8b8f6c63ef7a0c3ce4fe987e91f79380202a110c Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Mon, 13 Nov 2023 14:30:04 -0800 Subject: [PATCH 280/386] ColumnFamilyHandle Nullcheck in GetEntity and MultiGetEntity (#12057) Summary: - Add missing null check for ColumnFamilyHandle in `GetEntity()` - `FailIfCfHasTs()` now returns `Status::InvalidArgument()` if `column_family` is null. `MultiGetEntity()` can rely on this for cfh null check. - Added `DeleteRange` API using Default Column Family to be consistent with other major APIs (This was also causing Java Test failure after the `FailIfCfHasTs()` change) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12057 Test Plan: - Updated `DBWideBasicTest::GetEntityAsPinnableAttributeGroups` to include null CF case - Updated `DBWideBasicTest::MultiCFMultiGetEntityAsPinnableAttributeGroups` to include null CF case Reviewed By: jowlyzhang Differential Revision: D51167445 Pulled By: jaykorean fbshipit-source-id: 1c1e44fd7b7df4d2dc3bb2d7d251da85bad7d664 --- db/db_impl/db_impl.cc | 27 +++++++++++++--- db/db_impl/db_impl.h | 4 ++- db/wide/db_wide_basic_test.cc | 58 ++++++++++++++++++++++++++++++++--- include/rocksdb/db.h | 9 ++++++ java/rocksjni/rocksjni.cc | 9 ++++-- 5 files changed, 96 insertions(+), 11 deletions(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 55e2355c8..297c6aceb 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2062,10 +2062,11 @@ Status DBImpl::GetEntity(const ReadOptions& _read_options, const Slice& key, return Status::InvalidArgument( "Cannot call GetEntity without PinnableAttributeGroups object"); } + Status s; const size_t num_column_families = result->size(); if (_read_options.io_activity != Env::IOActivity::kUnknown && _read_options.io_activity != Env::IOActivity::kGetEntity) { - Status s = Status::InvalidArgument( + s = Status::InvalidArgument( "Cannot call GetEntity with `ReadOptions::io_activity` != " "`Env::IOActivity::kUnknown` or `Env::IOActivity::kGetEntity`"); for (size_t i = 0; i < num_column_families; ++i) { @@ -2075,7 +2076,7 @@ Status DBImpl::GetEntity(const ReadOptions& _read_options, const Slice& key, } // return early if no CF was passed in if (num_column_families == 0) { - return Status::OK(); + return s; } ReadOptions read_options(_read_options); if (read_options.io_activity == Env::IOActivity::kUnknown) { @@ -2084,10 +2085,29 @@ Status DBImpl::GetEntity(const ReadOptions& _read_options, const Slice& key, std::vector keys; std::vector column_families; for (size_t i = 0; i < num_column_families; ++i) { + // If any of the CFH is null, break early since the entire query will fail + if (!(*result)[i].column_family()) { + s = Status::InvalidArgument( + "DB failed to query because one or more group(s) have null column " + "family handle"); + (*result)[i].SetStatus( + Status::InvalidArgument("Column family handle cannot be null")); + break; + } // Adding the same key slice for different CFs keys.emplace_back(key); column_families.emplace_back((*result)[i].column_family()); } + if (!s.ok()) { + for (size_t i = 0; i < num_column_families; ++i) { + if ((*result)[i].status().ok()) { + (*result)[i].SetStatus( + Status::Incomplete("DB not queried due to invalid argument(s) in " + "one or more of the attribute groups")); + } + } + return s; + } std::vector columns(num_column_families); std::vector statuses(num_column_families); MultiGetCommon( @@ -2100,7 +2120,7 @@ Status DBImpl::GetEntity(const ReadOptions& _read_options, const Slice& key, (*result)[i].SetStatus(statuses[i]); (*result)[i].SetColumns(std::move(columns[i])); } - return Status::OK(); + return s; } bool DBImpl::ShouldReferenceSuperVersion(const MergeContext& merge_context) { @@ -2882,7 +2902,6 @@ void DBImpl::MultiGetCommon(const ReadOptions& read_options, bool should_fail = false; for (size_t i = 0; i < num_keys; ++i) { ColumnFamilyHandle* cfh = column_families[i]; - assert(cfh); if (read_options.timestamp) { statuses[i] = FailIfTsMismatchCf(cfh, *(read_options.timestamp)); if (!statuses[i].ok()) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 72927bd6d..34a5f3398 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -2870,7 +2870,9 @@ static void ClipToRange(T* ptr, V minvalue, V maxvalue) { inline Status DBImpl::FailIfCfHasTs( const ColumnFamilyHandle* column_family) const { - column_family = column_family ? column_family : DefaultColumnFamily(); + if (!column_family) { + return Status::InvalidArgument("column family handle cannot be null"); + } assert(column_family); const Comparator* const ucmp = column_family->GetComparator(); assert(ucmp); diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 413bfc19f..2280a3ed2 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -277,6 +277,9 @@ TEST_F(DBWideBasicTest, GetEntityAsPinnableAttributeGroups) { {handles_[kDefaultCfHandleIndex], handles_[kHotCfHandleIndex]}}; std::vector hot_and_cold_cfs{ {handles_[kHotCfHandleIndex], handles_[kColdCfHandleIndex]}}; + std::vector default_null_and_hot_cfs{ + handles_[kDefaultCfHandleIndex], nullptr, handles_[kHotCfHandleIndex], + nullptr}; auto create_result = [](const std::vector& column_families) -> PinnableAttributeGroups { @@ -286,9 +289,29 @@ TEST_F(DBWideBasicTest, GetEntityAsPinnableAttributeGroups) { } return result; }; - { - // Case 1. Get first key from default cf and hot_cf and second key from + // Case 1. Invalid Argument (passing in null CF) + AttributeGroups ag{ + AttributeGroup(nullptr, first_default_columns), + AttributeGroup(handles_[kHotCfHandleIndex], first_hot_columns)}; + ASSERT_NOK(db_->PutEntity(WriteOptions(), first_key, ag)); + + PinnableAttributeGroups result = create_result(default_null_and_hot_cfs); + Status s = db_->GetEntity(ReadOptions(), first_key, &result); + ASSERT_NOK(s); + ASSERT_TRUE(s.IsInvalidArgument()); + // Valid CF, but failed with Incomplete status due to other attribute groups + ASSERT_TRUE(result[0].status().IsIncomplete()); + // Null CF + ASSERT_TRUE(result[1].status().IsInvalidArgument()); + // Valid CF, but failed with Incomplete status due to other attribute groups + ASSERT_TRUE(result[2].status().IsIncomplete()); + // Null CF, but failed with Incomplete status because the nullcheck break + // out early in the loop + ASSERT_TRUE(result[3].status().IsIncomplete()); + } + { + // Case 2. Get first key from default cf and hot_cf and second key from // hot_cf and cold_cf constexpr size_t num_column_families = 2; PinnableAttributeGroups first_key_result = @@ -318,7 +341,7 @@ TEST_F(DBWideBasicTest, GetEntityAsPinnableAttributeGroups) { ASSERT_EQ(second_cold_columns, second_key_result[1].columns()); } { - // Case 2. Get first key and second key from all cfs. For the second key, we + // Case 3. Get first key and second key from all cfs. For the second key, we // don't expect to get columns from default cf. constexpr size_t num_column_families = 3; PinnableAttributeGroups first_key_result = create_result(all_cfs); @@ -428,6 +451,8 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { {handles_[kDefaultCfHandleIndex], handles_[kHotCfHandleIndex]}}; std::vector hot_and_cold_cfs{ {handles_[kHotCfHandleIndex], handles_[kColdCfHandleIndex]}}; + std::vector null_and_hot_cfs{ + nullptr, handles_[kHotCfHandleIndex], nullptr}; auto create_result = [](const std::vector& column_families) -> PinnableAttributeGroups { @@ -438,7 +463,7 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { return result; }; { - // Check for invalid argument + // Check for invalid read option argument ReadOptions read_options; read_options.io_activity = Env::IOActivity::kGetEntity; std::vector results; @@ -452,6 +477,31 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntityAsPinnableAttributeGroups) { ASSERT_TRUE(results[i][j].status().IsInvalidArgument()); } } + // Check for invalid column family in Attribute Group result + results.clear(); + results.emplace_back(create_result(null_and_hot_cfs)); + results.emplace_back(create_result(all_cfs)); + db_->MultiGetEntity(ReadOptions(), num_keys, keys.data(), results.data()); + + // First one failed due to null CFs in the AttributeGroup + // Null CF + ASSERT_NOK(results[0][0].status()); + ASSERT_TRUE(results[0][0].status().IsInvalidArgument()); + // Valid CF, but failed with incomplete status because of other attribute + // groups + ASSERT_NOK(results[0][1].status()); + ASSERT_TRUE(results[0][1].status().IsIncomplete()); + // Null CF + ASSERT_NOK(results[0][2].status()); + ASSERT_TRUE(results[0][2].status().IsInvalidArgument()); + + // Second one failed with Incomplete because first one failed + ASSERT_NOK(results[1][0].status()); + ASSERT_TRUE(results[1][0].status().IsIncomplete()); + ASSERT_NOK(results[1][1].status()); + ASSERT_TRUE(results[1][1].status().IsIncomplete()); + ASSERT_NOK(results[1][2].status()); + ASSERT_TRUE(results[1][2].status().IsIncomplete()); } { // Case 1. Get first key from default cf and hot_cf and second key from diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index c33776164..5ae73182b 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -509,6 +509,15 @@ class DB { ColumnFamilyHandle* column_family, const Slice& begin_key, const Slice& end_key, const Slice& ts); + virtual Status DeleteRange(const WriteOptions& options, + const Slice& begin_key, const Slice& end_key) { + return DeleteRange(options, DefaultColumnFamily(), begin_key, end_key); + } + virtual Status DeleteRange(const WriteOptions& options, + const Slice& begin_key, const Slice& end_key, + const Slice& ts) { + return DeleteRange(options, DefaultColumnFamily(), begin_key, end_key, ts); + } // Merge the database entry for "key" with "value". Returns OK on success, // and a non-OK status on error. The semantics of this operation is diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 8d445838e..66eb2488b 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -1036,8 +1036,13 @@ bool rocksdb_delete_range_helper( ROCKSDB_NAMESPACE::Slice end_key_slice(reinterpret_cast(end_key), jend_key_len); - ROCKSDB_NAMESPACE::Status s = - db->DeleteRange(write_options, cf_handle, begin_key_slice, end_key_slice); + ROCKSDB_NAMESPACE::Status s; + if (cf_handle != nullptr) { + s = db->DeleteRange(write_options, cf_handle, begin_key_slice, + end_key_slice); + } else { + s = db->DeleteRange(write_options, begin_key_slice, end_key_slice); + } // cleanup delete[] begin_key; From e7896f03ad8fc50962909483fc0069d6ecead50d Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 13 Nov 2023 15:26:52 -0800 Subject: [PATCH 281/386] Enable unit test `PrecludeLastLevelTest.RangeDelsCauseFileEndpointsToOverlap` (#12064) Summary: Fixes https://github.com/facebook/rocksdb/issues/11909. The test passes after the change in https://github.com/facebook/rocksdb/issues/11917 to start mock clock from a non-zero time. The reason for test failing is a bit complicated: - The Put here https://github.com/pdillinger/rocksdb/blob/e4ad4a0ef1b852dc203311fb885c673c891f08e0/db/compaction/tiered_compaction_test.cc#L2045 happens before mock clock advances beyond 0. - This causes oldest_key_time_ to be 0 for memtable. - oldest_ancester_time of the first L0 file becomes 0 - L0 -> L5/6 compaction output files sets `oldest_ancestoer_time` to the current time due to these lines: https://github.com/facebook/rocksdb/blob/509947ce2c970d296fd0d868455d560c7f778a57/db/compaction/compaction_job.cc#L1898C34-L1904. - This causes some small sequence number to be mapped to current time: https://github.com/facebook/rocksdb/blob/509947ce2c970d296fd0d868455d560c7f778a57/db/compaction/compaction_job.cc#L301 - Keys in L6 is being moved up to L5 due to the unexpected seqno_to_time mapping - When compacting keys from last level to the penultimate level, we only check keys to be within user key range of penultimate level input files. If we compact the following file 3 with file 1 and output keys to L5, we can get the reported inconsistency bug. ``` L5: file 1 [K5@20, K10@kMaxSeqno], file 2 [K10@30, K14@34) L6: file 3 [K6@5, K10@20] ``` https://github.com/facebook/rocksdb/issues/12063 will add fixes to check internal key range when compacting keys from last level up to the penultimate level. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12064 Test Plan: the unit test passes Reviewed By: ajkr Differential Revision: D51281149 Pulled By: cbi42 fbshipit-source-id: 00b7f026c453454d9f3af5b2de441383a96f0c62 --- db/compaction/tiered_compaction_test.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index 654bd0829..0c6d1aa35 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -1971,13 +1971,7 @@ TEST_F(PrecludeLastLevelTest, PartialPenultimateLevelCompaction) { Close(); } -// FIXME broken test: -// dbfull()->TEST_WaitForCompact() -// Corruption: force_consistency_checks(DEBUG): VersionBuilder: L5 has -// overlapping ranges: -// file #14 largest key: '6B6579303030303134' seq:32, type:1 vs. -// file #19 smallest key: '6B6579303030303130' seq:10, type:1 -TEST_F(PrecludeLastLevelTest, DISABLED_RangeDelsCauseFileEndpointsToOverlap) { +TEST_F(PrecludeLastLevelTest, RangeDelsCauseFileEndpointsToOverlap) { const int kNumLevels = 7; const int kSecondsPerKey = 10; const int kNumFiles = 3; From 65d71ee3719772e21ef7d7e247bf3c53e8c36310 Mon Sep 17 00:00:00 2001 From: Dzmitry Ivaniuk Date: Mon, 13 Nov 2023 20:03:44 -0800 Subject: [PATCH 282/386] Fix warnings when using API (#12066) Summary: Fixes https://github.com/facebook/rocksdb/issues/11457. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12066 Reviewed By: cbi42 Differential Revision: D51259966 Pulled By: ajkr fbshipit-source-id: a158b6f341b6b48233d917bfe4d00b639dbd8619 --- include/rocksdb/advanced_options.h | 2 +- include/rocksdb/sst_file_writer.h | 2 +- include/rocksdb/write_batch.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index 794d70be5..e5ffe8944 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -714,7 +714,7 @@ struct AdvancedColumnFamilyOptions { // // Dynamically changeable through SetOptions() API std::vector max_bytes_for_level_multiplier_additional = - std::vector(num_levels, 1); + std::vector(static_cast(num_levels), 1); // We try to limit number of bytes in one compaction to be lower than this // threshold. But it's not guaranteed. diff --git a/include/rocksdb/sst_file_writer.h b/include/rocksdb/sst_file_writer.h index a17e575b7..a2d52733d 100644 --- a/include/rocksdb/sst_file_writer.h +++ b/include/rocksdb/sst_file_writer.h @@ -46,7 +46,7 @@ struct ExternalSstFileInfo { const std::string& _smallest_key, const std::string& _largest_key, SequenceNumber _sequence_number, uint64_t _file_size, - int32_t _num_entries, int32_t _version) + uint64_t _num_entries, int32_t _version) : file_path(_file_path), smallest_key(_smallest_key), largest_key(_largest_key), diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index 48119f108..5c87f9405 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -43,13 +43,13 @@ struct SavePoints; struct SliceParts; struct SavePoint { - size_t size; // size of rep_ - int count; // count of elements in rep_ + size_t size; // size of rep_ + uint32_t count; // count of elements in rep_ uint32_t content_flags; SavePoint() : size(0), count(0), content_flags(0) {} - SavePoint(size_t _size, int _count, uint32_t _flags) + SavePoint(size_t _size, uint32_t _count, uint32_t _flags) : size(_size), count(_count), content_flags(_flags) {} void clear() { From 37064d631bff69167afbe759f32aa915027ab192 Mon Sep 17 00:00:00 2001 From: Yingchun Lai Date: Tue, 14 Nov 2023 07:33:21 -0800 Subject: [PATCH 283/386] Add encfs plugin link (#12070) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12070 Reviewed By: jaykorean Differential Revision: D51307148 Pulled By: ajkr fbshipit-source-id: d04335506becd5970802f87ab0573b6307479222 --- PLUGINS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PLUGINS.md b/PLUGINS.md index fefacbede..37fc68b86 100644 --- a/PLUGINS.md +++ b/PLUGINS.md @@ -5,4 +5,5 @@ This is the list of all known third-party plugins for RocksDB. If something is m * [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices * [RADOS](https://github.com/riversand963/rocksdb-rados-env): an Env used for interacting with RADOS. Migrated from RocksDB main repo. * [PMEM](https://github.com/pmem/pmem-rocksdb-plugin): a collection of plugins to enable Persistent Memory on RocksDB. -* [IPPCP](https://github.com/intel/ippcp-plugin-rocksdb): a plugin to enable encryption on RocksDB based on Intel optimized open source IPP-Crypto library. \ No newline at end of file +* [IPPCP](https://github.com/intel/ippcp-plugin-rocksdb): a plugin to enable encryption on RocksDB based on Intel optimized open source IPP-Crypto library. +* [encfs](https://github.com/pegasus-kv/encfs): a plugin to enable encryption on RocksDB based on OpenSSL library. \ No newline at end of file From a660e074cdb47b8d8887d599a9a58b64812036d1 Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Tue, 14 Nov 2023 14:39:31 -0800 Subject: [PATCH 284/386] Build RocksDBJava on Windows with Java8. (#12068) Summary: At the moment RocksDBJava uses the default CIrcleCI JVM on Windows builds. This can and has changed in the past and can cause some incompatibilities. This PR addresses the problem of explicitly installing and using Liberica JDK 8 as Java 8 Is the primary target for RocksdbJava. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12068 Reviewed By: cbi42 Differential Revision: D51307233 Pulled By: ajkr fbshipit-source-id: 9cb4e173d8a9ac42e5f9fda1daf012302942fdbc --- .circleci/config.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index aaeb8538e..f12de88b2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -54,6 +54,7 @@ commands: command: | echo "Installing CMake..." choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' -y + choco install liberica8jdk -y mkdir $Env:THIRDPARTY_HOME cd $Env:THIRDPARTY_HOME echo "Building Snappy dependency..." @@ -67,6 +68,7 @@ commands: - run: name: "Build RocksDB" command: | + $env:Path = $env:JAVA_HOME + ";" + $env:Path mkdir build cd build & $Env:CMAKE_BIN -G "$Env:CMAKE_GENERATOR" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE="$Env:CMAKE_PORTABLE" -DSNAPPY=1 -DJNI=1 .. @@ -575,6 +577,7 @@ jobs: CMAKE_HOME: C:/Program Files/CMake CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe CTEST_BIN: C:/Program Files/CMake/bin/ctest.exe + JAVA_HOME: C:/Program Files/BellSoft/LibericaJDK-8 SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib @@ -592,6 +595,7 @@ jobs: CMAKE_HOME: C:/Program Files/CMake CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe CTEST_BIN: C:/Program Files/CMake/bin/ctest.exe + JAVA_HOME: C:/Program Files/BellSoft/LibericaJDK-8 SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib @@ -609,6 +613,7 @@ jobs: CMAKE_HOME: C:/Program Files/CMake CMAKE_BIN: C:/Program Files/CMake/bin/cmake.exe CTEST_BIN: C:/Program Files/CMake/bin/ctest.exe + JAVA_HOME: C:/Program Files/BellSoft/LibericaJDK-8 SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.8 SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.8;C:/Users/circleci/thirdparty/snappy-1.1.8/build SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.8/build/Debug/snappy.lib From 2222caec9e0648f97b50bbcfffbd94c4f38e43b9 Mon Sep 17 00:00:00 2001 From: anand76 Date: Tue, 14 Nov 2023 16:25:52 -0800 Subject: [PATCH 285/386] Make CacheWithSecondaryAdapter reservation accounting more robust (#12059) Summary: `CacheWithSecondaryAdapter` can distribute placeholder reservations across the primary and secondary caches. The current implementation of the accounting is quite complicated in order to avoid using a mutex. This may cause the accounting to be slightly off after changes to the cache capacity and ratio, resulting in assertion failures. There's also a bug in the unlikely event that the total reservation exceeds the cache capacity. Furthermore, the current implementation is difficult to reason about. This PR simplifies it by doing the accounting while holding a mutex. The reservations are processed in 1MB chunks in order to avoid taking a lock too frequently. As a side effect, this also removes the restriction of not allowing to increase the compressed secondary cache capacity after decreasing it to 0. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12059 Test Plan: Existing unit tests, and a new test for capacity increase from 0 Reviewed By: pdillinger Differential Revision: D51278686 Pulled By: anand1976 fbshipit-source-id: 7e1ad2c50694772997072dd59cab35c93c12ba4f --- cache/compressed_secondary_cache_test.cc | 68 +++++++---- cache/secondary_cache_adapter.cc | 145 ++++++++++++----------- cache/secondary_cache_adapter.h | 19 ++- db_stress_tool/db_stress_common.cc | 2 +- 4 files changed, 138 insertions(+), 96 deletions(-) diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index 84633941d..d72680b84 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -1224,13 +1224,11 @@ TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdate) { ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); ASSERT_EQ(sec_capacity, 0); - ASSERT_NOK(UpdateTieredCache(tiered_cache, -1, 0.3)); - // Only check usage for LRU cache. HCC shows a 64KB usage for some reason - if (std::get<0>(GetParam()) == PrimaryCacheType::kCacheTypeLRU) { - ASSERT_EQ(GetCache()->GetUsage(), 0); - } + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.3)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (30 << 20), + GetPercent(30 << 20, 1)); ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); - ASSERT_EQ(sec_capacity, 0); + ASSERT_EQ(sec_capacity, (30 << 20)); } TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) { @@ -1316,28 +1314,50 @@ TEST_P(CompressedSecCacheTestWithTiered, DynamicUpdateWithReservation) { ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); ASSERT_EQ(sec_capacity, 0); + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.3)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (37 << 20), + GetPercent(37 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (3 << 20), + GetPercent(3 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, 30 << 20); + ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(0)); } -TEST_P(CompressedSecCacheTestWithTiered, - DynamicUpdateWithReservationUnderflow) { +TEST_P(CompressedSecCacheTestWithTiered, ReservationOverCapacity) { + CompressedSecondaryCache* sec_cache = + reinterpret_cast(GetSecondaryCache()); std::shared_ptr tiered_cache = GetTieredCache(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"CacheWithSecondaryAdapter::Release:ChargeSecCache1", - "CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"}, - {"CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End", - "CacheWithSecondaryAdapter::Release:ChargeSecCache2"}}); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); - - port::Thread reserve_release_thread([&]() { - EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(50), Status::OK()); - EXPECT_EQ(cache_res_mgr()->UpdateCacheReservation(0), Status::OK()); - }); - ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.01)); - reserve_release_thread.join(); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); - - ASSERT_OK(UpdateTieredCache(tiered_cache, 100 << 20, 0.3)); + + ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(110 << 20)); + // Use EXPECT_PRED3 instead of EXPECT_NEAR to void too many size_t to + // double explicit casts + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (110 << 20), + GetPercent(110 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (30 << 20), + GetPercent(30 << 20, 1)); + size_t sec_capacity; + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (30 << 20)); + + ASSERT_OK(UpdateTieredCache(tiered_cache, -1, 0.39)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (110 << 20), + GetPercent(110 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (39 << 20), + GetPercent(39 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (39 << 20)); + + ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(90 << 20)); + EXPECT_PRED3(CacheUsageWithinBounds, GetCache()->GetUsage(), (94 << 20), + GetPercent(94 << 20, 1)); + EXPECT_PRED3(CacheUsageWithinBounds, sec_cache->TEST_GetUsage(), (35 << 20), + GetPercent(35 << 20, 1)); + ASSERT_OK(sec_cache->GetCapacity(sec_capacity)); + ASSERT_EQ(sec_capacity, (39 << 20)); + + ASSERT_OK(cache_res_mgr()->UpdateCacheReservation(0)); } INSTANTIATE_TEST_CASE_P( diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index 84b4437e8..6e7716754 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -83,7 +83,10 @@ CacheWithSecondaryAdapter::CacheWithSecondaryAdapter( : CacheWrapper(std::move(target)), secondary_cache_(std::move(secondary_cache)), adm_policy_(adm_policy), - distribute_cache_res_(distribute_cache_res) { + distribute_cache_res_(distribute_cache_res), + placeholder_usage_(0), + reserved_usage_(0), + sec_reserved_(0) { target_->SetEvictionCallback( [this](const Slice& key, Handle* handle, bool was_hit) { return EvictionHandler(key, handle, was_hit); @@ -103,8 +106,7 @@ CacheWithSecondaryAdapter::CacheWithSecondaryAdapter( // secondary cache is freed from the reservation. s = pri_cache_res_->UpdateCacheReservation(sec_capacity); assert(s.ok()); - sec_cache_res_ratio_.store((double)sec_capacity / target_->GetCapacity(), - std::memory_order_relaxed); + sec_cache_res_ratio_ = (double)sec_capacity / target_->GetCapacity(); } } @@ -113,7 +115,7 @@ CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() { // use after free target_->SetEvictionCallback({}); #ifndef NDEBUG - if (distribute_cache_res_ && !ratio_changed_) { + if (distribute_cache_res_) { size_t sec_capacity = 0; Status s = secondary_cache_->GetCapacity(sec_capacity); assert(s.ok()); @@ -236,13 +238,31 @@ Status CacheWithSecondaryAdapter::Insert(const Slice& key, ObjectPtr value, const Slice& compressed_value, CompressionType type) { Status s = target_->Insert(key, value, helper, charge, handle, priority); - if (s.ok() && value == nullptr && distribute_cache_res_) { - size_t sec_charge = static_cast( - charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed))); - s = secondary_cache_->Deflate(sec_charge); - assert(s.ok()); - s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/false); - assert(s.ok()); + if (s.ok() && value == nullptr && distribute_cache_res_ && handle) { + charge = target_->GetCharge(*handle); + + MutexLock l(&cache_res_mutex_); + placeholder_usage_ += charge; + // Check if total placeholder reservation is more than the overall + // cache capacity. If it is, then we don't try to charge the + // secondary cache because we don't want to overcharge it (beyond + // its capacity). + // In order to make this a bit more lightweight, we also check if + // the difference between placeholder_usage_ and reserved_usage_ is + // atleast kReservationChunkSize and avoid any adjustments if not. + if ((placeholder_usage_ <= target_->GetCapacity()) && + ((placeholder_usage_ - reserved_usage_) >= kReservationChunkSize)) { + reserved_usage_ = placeholder_usage_ & ~(kReservationChunkSize - 1); + size_t new_sec_reserved = + static_cast(reserved_usage_ * sec_cache_res_ratio_); + size_t sec_charge = new_sec_reserved - sec_reserved_; + s = secondary_cache_->Deflate(sec_charge); + assert(s.ok()); + s = pri_cache_res_->UpdateCacheReservation(sec_charge, + /*increase=*/false); + assert(s.ok()); + sec_reserved_ += sec_charge; + } } // Warm up the secondary cache with the compressed block. The secondary // cache may choose to ignore it based on the admission policy. @@ -287,14 +307,27 @@ bool CacheWithSecondaryAdapter::Release(Handle* handle, ObjectPtr v = target_->Value(handle); if (v == nullptr && distribute_cache_res_) { size_t charge = target_->GetCharge(handle); - size_t sec_charge = static_cast( - charge * (sec_cache_res_ratio_.load(std::memory_order_relaxed))); - TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache1"); - TEST_SYNC_POINT("CacheWithSecondaryAdapter::Release:ChargeSecCache2"); - Status s = secondary_cache_->Inflate(sec_charge); - assert(s.ok()); - s = pri_cache_res_->UpdateCacheReservation(sec_charge, /*increase=*/true); - assert(s.ok()); + + MutexLock l(&cache_res_mutex_); + placeholder_usage_ -= charge; + // Check if total placeholder reservation is more than the overall + // cache capacity. If it is, then we do nothing as reserved_usage_ must + // be already maxed out + if ((placeholder_usage_ <= target_->GetCapacity()) && + (placeholder_usage_ < reserved_usage_)) { + // Adjust reserved_usage_ in chunks of kReservationChunkSize, so + // we don't hit this slow path too often. + reserved_usage_ = placeholder_usage_ & ~(kReservationChunkSize - 1); + size_t new_sec_reserved = + static_cast(reserved_usage_ * sec_cache_res_ratio_); + size_t sec_charge = sec_reserved_ - new_sec_reserved; + Status s = secondary_cache_->Inflate(sec_charge); + assert(s.ok()); + s = pri_cache_res_->UpdateCacheReservation(sec_charge, + /*increase=*/true); + assert(s.ok()); + sec_reserved_ -= sec_charge; + } } } return target_->Release(handle, erase_if_last_ref); @@ -441,13 +474,11 @@ const char* CacheWithSecondaryAdapter::Name() const { // where the new capacity < total cache reservations. void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { size_t sec_capacity = static_cast( - capacity * (distribute_cache_res_ - ? sec_cache_res_ratio_.load(std::memory_order_relaxed) - : 0.0)); + capacity * (distribute_cache_res_ ? sec_cache_res_ratio_ : 0.0)); size_t old_sec_capacity = 0; if (distribute_cache_res_) { - MutexLock m(&mutex_); + MutexLock m(&cache_res_mutex_); Status s = secondary_cache_->GetCapacity(old_sec_capacity); if (!s.ok()) { @@ -462,9 +493,17 @@ void CacheWithSecondaryAdapter::SetCapacity(size_t capacity) { // 3. Decrease the primary cache capacity to the total budget s = secondary_cache_->SetCapacity(sec_capacity); if (s.ok()) { + if (placeholder_usage_ > capacity) { + // Adjust reserved_usage_ down + reserved_usage_ = capacity & ~(kReservationChunkSize - 1); + } + size_t new_sec_reserved = + static_cast(reserved_usage_ * sec_cache_res_ratio_); s = pri_cache_res_->UpdateCacheReservation( - old_sec_capacity - sec_capacity, + (old_sec_capacity - sec_capacity) - + (sec_reserved_ - new_sec_reserved), /*increase=*/false); + sec_reserved_ = new_sec_reserved; assert(s.ok()); target_->SetCapacity(capacity); } @@ -498,7 +537,7 @@ Status CacheWithSecondaryAdapter::GetSecondaryCachePinnedUsage( size_t& size) const { Status s; if (distribute_cache_res_) { - MutexLock m(&mutex_); + MutexLock m(&cache_res_mutex_); size_t capacity = 0; s = secondary_cache_->GetCapacity(capacity); if (s.ok()) { @@ -526,12 +565,11 @@ Status CacheWithSecondaryAdapter::GetSecondaryCachePinnedUsage( // in the future. Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( double compressed_secondary_ratio) { - if (!distribute_cache_res_ || - sec_cache_res_ratio_.load(std::memory_order_relaxed) == 0.0) { + if (!distribute_cache_res_) { return Status::NotSupported(); } - MutexLock m(&mutex_); + MutexLock m(&cache_res_mutex_); size_t pri_capacity = target_->GetCapacity(); size_t sec_capacity = static_cast(pri_capacity * compressed_secondary_ratio); @@ -541,38 +579,17 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( return s; } - TEST_SYNC_POINT( - "CacheWithSecondaryAdapter::UpdateCacheReservationRatio:Begin"); - - // There's a possible race condition here. Since the read of pri_cache_res_ - // memory used (secondary cache usage charged to primary cache), and the - // change to sec_cache_res_ratio_ are not guarded by a mutex, its possible - // that an Insert/Release in another thread might decrease/increase the - // pri_cache_res_ reservation by the wrong amount. This should not be a - // problem because updating the sec/pri ratio is a rare operation, and - // the worst that can happen is we may over/under charge the secondary - // cache usage by a little bit. But we do need to protect against - // underflow of old_sec_reserved. - // TODO: Make the accounting more accurate by tracking the total memory - // reservation on the primary cache. This will also allow us to remove - // the restriction of not being able to change the sec/pri ratio from - // 0.0 to higher. - size_t sec_charge_to_pri = pri_cache_res_->GetTotalMemoryUsed(); - size_t old_sec_reserved = (old_sec_capacity > sec_charge_to_pri) - ? (old_sec_capacity - sec_charge_to_pri) - : 0; // Calculate the new secondary cache reservation - size_t sec_reserved = static_cast( - old_sec_reserved * - (double)(compressed_secondary_ratio / - sec_cache_res_ratio_.load(std::memory_order_relaxed))); - sec_cache_res_ratio_.store(compressed_secondary_ratio, - std::memory_order_relaxed); + // reserved_usage_ will never be > the cache capacity, so we don't + // have to worry about adjusting it here. + sec_cache_res_ratio_ = compressed_secondary_ratio; + size_t new_sec_reserved = + static_cast(reserved_usage_ * sec_cache_res_ratio_); if (sec_capacity > old_sec_capacity) { // We're increasing the ratio, thus ending up with a larger secondary // cache and a smaller usable primary cache capacity. Similar to // SetCapacity(), we try to avoid a temporary increase in total usage - // beyond teh configured capacity - + // beyond the configured capacity - // 1. A higher secondary cache ratio means it gets a higher share of // cache reservations. So first account for that by deflating the // secondary cache @@ -580,12 +597,13 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( // cache utilization (increase in capacity - increase in share of cache // reservation) // 3. Increase secondary cache capacity - s = secondary_cache_->Deflate(sec_reserved - old_sec_reserved); + s = secondary_cache_->Deflate(new_sec_reserved - sec_reserved_); assert(s.ok()); s = pri_cache_res_->UpdateCacheReservation( - (sec_capacity - old_sec_capacity) - (sec_reserved - old_sec_reserved), + (sec_capacity - old_sec_capacity) - (new_sec_reserved - sec_reserved_), /*increase=*/true); assert(s.ok()); + sec_reserved_ = new_sec_reserved; s = secondary_cache_->SetCapacity(sec_capacity); assert(s.ok()); } else { @@ -599,21 +617,16 @@ Status CacheWithSecondaryAdapter::UpdateCacheReservationRatio( s = secondary_cache_->SetCapacity(sec_capacity); if (s.ok()) { s = pri_cache_res_->UpdateCacheReservation( - (old_sec_capacity - sec_capacity) - (old_sec_reserved - sec_reserved), + (old_sec_capacity - sec_capacity) - + (sec_reserved_ - new_sec_reserved), /*increase=*/false); assert(s.ok()); - s = secondary_cache_->Inflate(old_sec_reserved - sec_reserved); + s = secondary_cache_->Inflate(sec_reserved_ - new_sec_reserved); assert(s.ok()); + sec_reserved_ = new_sec_reserved; } } - TEST_SYNC_POINT("CacheWithSecondaryAdapter::UpdateCacheReservationRatio:End"); -#ifndef NDEBUG - // As mentioned in the function comments, we may accumulate some erros when - // the ratio is changed. We set a flag here which disables some assertions - // in the destructor - ratio_changed_ = true; -#endif return s; } diff --git a/cache/secondary_cache_adapter.h b/cache/secondary_cache_adapter.h index 6b06d0829..f0a514e78 100644 --- a/cache/secondary_cache_adapter.h +++ b/cache/secondary_cache_adapter.h @@ -60,6 +60,8 @@ class CacheWithSecondaryAdapter : public CacheWrapper { SecondaryCache* TEST_GetSecondaryCache() { return secondary_cache_.get(); } private: + static constexpr size_t kReservationChunkSize = 1 << 20; + bool EvictionHandler(const Slice& key, Handle* handle, bool was_hit); void StartAsyncLookupOnMySecondary(AsyncLookupHandle& async_handle); @@ -84,11 +86,18 @@ class CacheWithSecondaryAdapter : public CacheWrapper { std::shared_ptr pri_cache_res_; // Fraction of a cache memory reservation to be assigned to the secondary // cache - std::atomic sec_cache_res_ratio_; - mutable port::Mutex mutex_; -#ifndef NDEBUG - bool ratio_changed_ = false; -#endif + double sec_cache_res_ratio_; + // Mutex for use when managing cache memory reservations. Should not be used + // for other purposes, as it may risk causing deadlocks. + mutable port::Mutex cache_res_mutex_; + // Total memory reserved by placeholder entriesin the cache + size_t placeholder_usage_; + // Total placeholoder memory charged to both the primary and secondary + // caches. Will be <= placeholder_usage_. + size_t reserved_usage_; + // Amount of memory reserved in the secondary cache. This should be + // reserved_usage_ * sec_cache_res_ratio_ in steady state. + size_t sec_reserved_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/db_stress_tool/db_stress_common.cc b/db_stress_tool/db_stress_common.cc index c7a8ef0a2..c0087dc5c 100644 --- a/db_stress_tool/db_stress_common.cc +++ b/db_stress_tool/db_stress_common.cc @@ -189,7 +189,7 @@ void CompressedCacheSetCapacityThread(void* v) { s.ToString().c_str()); } } else if (FLAGS_compressed_secondary_cache_ratio > 0.0) { - if (thread->rand.OneIn(2)) { + if (thread->rand.OneIn(2)) { // if (thread->rand.OneIn(2)) { size_t capacity = block_cache->GetCapacity(); size_t adjustment; if (FLAGS_use_write_buffer_manager && FLAGS_db_write_buffer_size > 0) { From 9202db1867e412e51e72fc04062ca3664deb097b Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 15 Nov 2023 15:42:28 -0800 Subject: [PATCH 286/386] Consider archived WALs for deletion more frequently (#12069) Summary: Fixes https://github.com/facebook/rocksdb/issues/11000. That issue pointed out that RocksDB was slow to delete archived WALs in case time-based and size-based expiration were enabled, and the time-based threshold (`WAL_ttl_seconds`) was small. This PR prevents the delay by taking into account `WAL_ttl_seconds` when deciding the frequency to process archived WALs for deletion. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12069 Reviewed By: pdillinger Differential Revision: D51262589 Pulled By: ajkr fbshipit-source-id: e65431a06ee96f4c599ba84a27d1aedebecbb003 --- db/wal_manager.cc | 9 +- include/rocksdb/options.h | 29 ++-- .../java/org/rocksdb/DBOptionsInterface.java | 129 ++++++++++-------- .../WAL_ttl_seconds_expiration.md | 1 + 4 files changed, 94 insertions(+), 74 deletions(-) create mode 100644 unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md diff --git a/db/wal_manager.cc b/db/wal_manager.cc index 400b2e58b..2b384e7d2 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -153,10 +153,11 @@ void WalManager::PurgeObsoleteWALFiles() { return; } uint64_t const now_seconds = static_cast(current_time); - uint64_t const time_to_check = (ttl_enabled && !size_limit_enabled) - ? db_options_.WAL_ttl_seconds / 2 - : kDefaultIntervalToDeleteObsoleteWAL; - + uint64_t const time_to_check = + ttl_enabled + ? std::min(kDefaultIntervalToDeleteObsoleteWAL, + std::max(uint64_t{1}, db_options_.WAL_ttl_seconds / 2)) + : kDefaultIntervalToDeleteObsoleteWAL; if (purge_wal_files_last_run_ + time_to_check > now_seconds) { return; } diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index ae6b5cf6d..be4eb8fba 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -814,18 +814,23 @@ struct DBOptions { // Number of shards used for table cache. int table_cache_numshardbits = 6; - // The following two fields affect how archived logs will be deleted. - // 1. If both set to 0, logs will be deleted asap and will not get into - // the archive. - // 2. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - // WAL files will be checked every 10 min and if total size is greater - // then WAL_size_limit_MB, they will be deleted starting with the - // earliest until size_limit is met. All empty files will be deleted. - // 3. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - // WAL files will be checked every WAL_ttl_seconds / 2 and those that - // are older than WAL_ttl_seconds will be deleted. - // 4. If both are not 0, WAL files will be checked every 10 min and both - // checks will be performed with ttl being first. + // The following two fields affect when WALs will be archived and deleted. + // + // When both are zero, obsolete WALs will not be archived and will be deleted + // immediately. Otherwise, obsolete WALs will be archived prior to deletion. + // + // When `WAL_size_limit_MB` is nonzero, archived WALs starting with the + // earliest will be deleted until the total size of the archive falls below + // this limit. All empty WALs will be deleted. + // + // When `WAL_ttl_seconds` is nonzero, archived WALs older than + // `WAL_ttl_seconds` will be deleted. + // + // When only `WAL_ttl_seconds` is nonzero, the frequency at which archived + // WALs are deleted is every `WAL_ttl_seconds / 2` seconds. When only + // `WAL_size_limit_MB` is nonzero, the deletion frequency is every ten + // minutes. When both are nonzero, the deletion frequency is the minimum of + // those two values. uint64_t WAL_ttl_seconds = 0; uint64_t WAL_size_limit_MB = 0; diff --git a/java/src/main/java/org/rocksdb/DBOptionsInterface.java b/java/src/main/java/org/rocksdb/DBOptionsInterface.java index 326da98d2..084a399cd 100644 --- a/java/src/main/java/org/rocksdb/DBOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/DBOptionsInterface.java @@ -615,21 +615,24 @@ public interface DBOptionsInterface> { int tableCacheNumshardbits(); /** - * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect how archived logs - * will be deleted. - *

      - *
    1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
    2. - *
    3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
    4. - *
    5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_seconds / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
    6. - *
    7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
    8. - *
    + * {@link #walTtlSeconds()} and {@link #walSizeLimitMB()} affect when WALs + * will be archived and deleted. + * + * When both are zero, obsolete WALs will not be archived and will be deleted + * immediately. Otherwise, obsolete WALs will be archived prior to deletion. + * + * When `WAL_size_limit_MB` is nonzero, archived WALs starting with the + * earliest will be deleted until the total size of the archive falls below + * this limit. All empty WALs will be deleted. + * + * When `WAL_ttl_seconds` is nonzero, archived WALs older than + * `WAL_ttl_seconds` will be deleted. + * + * When only `WAL_ttl_seconds` is nonzero, the frequency at which archived + * WALs are deleted is every `WAL_ttl_seconds / 2` seconds. When only + * `WAL_size_limit_MB` is nonzero, the deletion frequency is every ten + * minutes. When both are nonzero, the deletion frequency is the minimum of + * those two values. * * @param walTtlSeconds the ttl seconds * @return the instance of the current object. @@ -638,21 +641,24 @@ public interface DBOptionsInterface> { T setWalTtlSeconds(long walTtlSeconds); /** - * WalTtlSeconds() and walSizeLimitMB() affect how archived logs - * will be deleted. - *
      - *
    1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
    2. - *
    3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
    4. - *
    5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_seconds / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
    6. - *
    7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
    8. - *
    + * WalTtlSeconds() and walSizeLimitMB() affect when WALs will be archived and + * deleted. + * + * When both are zero, obsolete WALs will not be archived and will be deleted + * immediately. Otherwise, obsolete WALs will be archived prior to deletion. + * + * When `WAL_size_limit_MB` is nonzero, archived WALs starting with the + * earliest will be deleted until the total size of the archive falls below + * this limit. All empty WALs will be deleted. + * + * When `WAL_ttl_seconds` is nonzero, archived WALs older than + * `WAL_ttl_seconds` will be deleted. + * + * When only `WAL_ttl_seconds` is nonzero, the frequency at which archived + * WALs are deleted is every `WAL_ttl_seconds / 2` seconds. When only + * `WAL_size_limit_MB` is nonzero, the deletion frequency is every ten + * minutes. When both are nonzero, the deletion frequency is the minimum of + * those two values. * * @return the wal-ttl seconds * @see #walSizeLimitMB() @@ -662,19 +668,22 @@ public interface DBOptionsInterface> { /** * WalTtlSeconds() and walSizeLimitMB() affect how archived logs * will be deleted. - *
      - *
    1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
    2. - *
    3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
    4. - *
    5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_secondsi / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
    6. - *
    7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
    8. - *
    + * + * When both are zero, obsolete WALs will not be archived and will be deleted + * immediately. Otherwise, obsolete WALs will be archived prior to deletion. + * + * When `WAL_size_limit_MB` is nonzero, archived WALs starting with the + * earliest will be deleted until the total size of the archive falls below + * this limit. All empty WALs will be deleted. + * + * When `WAL_ttl_seconds` is nonzero, archived WALs older than + * `WAL_ttl_seconds` will be deleted. + * + * When only `WAL_ttl_seconds` is nonzero, the frequency at which archived + * WALs are deleted is every `WAL_ttl_seconds / 2` seconds. When only + * `WAL_size_limit_MB` is nonzero, the deletion frequency is every ten + * minutes. When both are nonzero, the deletion frequency is the minimum of + * those two values. * * @param sizeLimitMB size limit in mega-bytes. * @return the instance of the current object. @@ -683,21 +692,25 @@ public interface DBOptionsInterface> { T setWalSizeLimitMB(long sizeLimitMB); /** - * {@link #walTtlSeconds()} and {@code #walSizeLimitMB()} affect how archived logs - * will be deleted. - *
      - *
    1. If both set to 0, logs will be deleted asap and will not get into - * the archive.
    2. - *
    3. If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0, - * WAL files will be checked every 10 min and if total size is greater - * then WAL_size_limit_MB, they will be deleted starting with the - * earliest until size_limit is met. All empty files will be deleted.
    4. - *
    5. If WAL_ttl_seconds is not 0 and WAL_size_limit_MB is 0, then - * WAL files will be checked every WAL_ttl_seconds i / 2 and those that - * are older than WAL_ttl_seconds will be deleted.
    6. - *
    7. If both are not 0, WAL files will be checked every 10 min and both - * checks will be performed with ttl being first.
    8. - *
    + * WalTtlSeconds() and walSizeLimitMB() affect when WALs will be archived and + * deleted. + * + * When both are zero, obsolete WALs will not be archived and will be deleted + * immediately. Otherwise, obsolete WALs will be archived prior to deletion. + * + * When `WAL_size_limit_MB` is nonzero, archived WALs starting with the + * earliest will be deleted until the total size of the archive falls below + * this limit. All empty WALs will be deleted. + * + * When `WAL_ttl_seconds` is nonzero, archived WALs older than + * `WAL_ttl_seconds` will be deleted. + * + * When only `WAL_ttl_seconds` is nonzero, the frequency at which archived + * WALs are deleted is every `WAL_ttl_seconds / 2` seconds. When only + * `WAL_size_limit_MB` is nonzero, the deletion frequency is every ten + * minutes. When both are nonzero, the deletion frequency is the minimum of + * those two values. + * * @return size limit in mega-bytes. * @see #walSizeLimitMB() */ diff --git a/unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md b/unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md new file mode 100644 index 000000000..3d55f41c8 --- /dev/null +++ b/unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md @@ -0,0 +1 @@ +* When `WAL_ttl_seconds > 0`, we now process archived WALs for deletion at least every `WAL_ttl_seconds / 2` seconds. Previously it could be less frequent in case of small `WAL_ttl_seconds` values when size-based expiration (`WAL_size_limit_MB > 0 `) was simultaneously enabled. From 6d10f8d69065c84d505528e281e3de151c6d9e83 Mon Sep 17 00:00:00 2001 From: Gus Wynn Date: Thu, 16 Nov 2023 10:34:00 -0800 Subject: [PATCH 287/386] add WriteBufferManager to c api (#11710) Summary: I want to use the `WriteBufferManager` in my rust project, which requires exposing it through the c api, just like `Cache` is. Hopefully the changes are fairly straightfoward! Pull Request resolved: https://github.com/facebook/rocksdb/pull/11710 Reviewed By: cbi42 Differential Revision: D51166518 Pulled By: ajkr fbshipit-source-id: cd266ff1e4a7ab145d05385cd125a8390f51f3fc --- db/c.cc | 59 +++++++++++++++++++++++++++++++++++++++++++++ include/rocksdb/c.h | 31 ++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/db/c.cc b/db/c.cc index 5c7ffed48..e071ef7a4 100644 --- a/db/c.cc +++ b/db/c.cc @@ -45,6 +45,7 @@ #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/write_batch.h" +#include "rocksdb/write_buffer_manager.h" #include "utilities/merge_operators.h" using ROCKSDB_NAMESPACE::BackupEngine; @@ -125,6 +126,7 @@ using ROCKSDB_NAMESPACE::WALRecoveryMode; using ROCKSDB_NAMESPACE::WritableFile; using ROCKSDB_NAMESPACE::WriteBatch; using ROCKSDB_NAMESPACE::WriteBatchWithIndex; +using ROCKSDB_NAMESPACE::WriteBufferManager; using ROCKSDB_NAMESPACE::WriteOptions; using std::unordered_set; @@ -219,6 +221,9 @@ struct rocksdb_memory_allocator_t { struct rocksdb_cache_t { std::shared_ptr rep; }; +struct rocksdb_write_buffer_manager_t { + std::shared_ptr rep; +}; struct rocksdb_livefiles_t { std::vector rep; }; @@ -4896,6 +4901,60 @@ size_t rocksdb_cache_get_occupancy_count(const rocksdb_cache_t* cache) { return cache->rep->GetOccupancyCount(); } +rocksdb_write_buffer_manager_t* rocksdb_write_buffer_manager_create( + size_t buffer_size, bool allow_stall) { + rocksdb_write_buffer_manager_t* wbm = new rocksdb_write_buffer_manager_t; + wbm->rep.reset(new WriteBufferManager(buffer_size, {}, allow_stall)); + return wbm; +} + +rocksdb_write_buffer_manager_t* rocksdb_write_buffer_manager_create_with_cache( + size_t buffer_size, const rocksdb_cache_t* cache, bool allow_stall) { + rocksdb_write_buffer_manager_t* wbm = new rocksdb_write_buffer_manager_t; + wbm->rep.reset(new WriteBufferManager(buffer_size, cache->rep, allow_stall)); + return wbm; +} + +void rocksdb_write_buffer_manager_destroy(rocksdb_write_buffer_manager_t* wbm) { + delete wbm; +} + +bool rocksdb_write_buffer_manager_enabled(rocksdb_write_buffer_manager_t* wbm) { + return wbm->rep->enabled(); +} + +bool rocksdb_write_buffer_manager_cost_to_cache( + rocksdb_write_buffer_manager_t* wbm) { + return wbm->rep->cost_to_cache(); +} + +size_t rocksdb_write_buffer_manager_memory_usage( + rocksdb_write_buffer_manager_t* wbm) { + return wbm->rep->memory_usage(); +} + +size_t rocksdb_write_buffer_manager_mutable_memtable_memory_usage( + rocksdb_write_buffer_manager_t* wbm) { + return wbm->rep->mutable_memtable_memory_usage(); +} + +size_t rocksdb_write_buffer_manager_dummy_entries_in_cache_usage( + rocksdb_write_buffer_manager_t* wbm) { + return wbm->rep->dummy_entries_in_cache_usage(); +} +size_t rocksdb_write_buffer_manager_buffer_size( + rocksdb_write_buffer_manager_t* wbm) { + return wbm->rep->buffer_size(); +} +void rocksdb_write_buffer_manager_set_buffer_size( + rocksdb_write_buffer_manager_t* wbm, size_t new_size) { + wbm->rep->SetBufferSize(new_size); +} +ROCKSDB_LIBRARY_API void rocksdb_write_buffer_manager_set_allow_stall( + rocksdb_write_buffer_manager_t* wbm, bool new_allow_stall) { + wbm->rep->SetAllowStall(new_allow_stall); +} + rocksdb_dbpath_t* rocksdb_dbpath_create(const char* path, uint64_t target_size) { rocksdb_dbpath_t* result = new rocksdb_dbpath_t; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 15e8f8685..35e01f4c0 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -78,6 +78,7 @@ typedef struct rocksdb_lru_cache_options_t rocksdb_lru_cache_options_t; typedef struct rocksdb_hyper_clock_cache_options_t rocksdb_hyper_clock_cache_options_t; typedef struct rocksdb_cache_t rocksdb_cache_t; +typedef struct rocksdb_write_buffer_manager_t rocksdb_write_buffer_manager_t; typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t; typedef struct rocksdb_compactionfiltercontext_t rocksdb_compactionfiltercontext_t; @@ -2077,6 +2078,36 @@ rocksdb_cache_get_table_address_count(const rocksdb_cache_t* cache); extern ROCKSDB_LIBRARY_API size_t rocksdb_cache_get_occupancy_count(const rocksdb_cache_t* cache); +/* WriteBufferManager */ + +extern ROCKSDB_LIBRARY_API rocksdb_write_buffer_manager_t* +rocksdb_write_buffer_manager_create(size_t buffer_size, bool allow_stall); +extern ROCKSDB_LIBRARY_API rocksdb_write_buffer_manager_t* +rocksdb_write_buffer_manager_create_with_cache(size_t buffer_size, + const rocksdb_cache_t* cache, + bool allow_stall); + +extern ROCKSDB_LIBRARY_API void rocksdb_write_buffer_manager_destroy( + rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API bool rocksdb_write_buffer_manager_enabled( + rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API bool rocksdb_write_buffer_manager_cost_to_cache( + rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API size_t +rocksdb_write_buffer_manager_memory_usage(rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API size_t +rocksdb_write_buffer_manager_mutable_memtable_memory_usage( + rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API size_t +rocksdb_write_buffer_manager_dummy_entries_in_cache_usage( + rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API size_t +rocksdb_write_buffer_manager_buffer_size(rocksdb_write_buffer_manager_t* wbm); +extern ROCKSDB_LIBRARY_API void rocksdb_write_buffer_manager_set_buffer_size( + rocksdb_write_buffer_manager_t* wbm, size_t new_size); +extern ROCKSDB_LIBRARY_API void rocksdb_write_buffer_manager_set_allow_stall( + rocksdb_write_buffer_manager_t* wbm, bool new_allow_stall); + /* HyperClockCache */ extern ROCKSDB_LIBRARY_API rocksdb_hyper_clock_cache_options_t* From a9bd525b52dd3a44fdb4d38030ab314d6623950d Mon Sep 17 00:00:00 2001 From: nccx <105946854+nccx@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:35:08 -0800 Subject: [PATCH 288/386] Add Qdrant to USERS.md (#12072) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12072 Reviewed By: cbi42 Differential Revision: D51398080 Pulled By: ajkr fbshipit-source-id: 1043f2b012bd744e9c53c638e1ba56a3e0392e11 --- USERS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/USERS.md b/USERS.md index ac91f7ede..086cab90d 100644 --- a/USERS.md +++ b/USERS.md @@ -152,6 +152,9 @@ LzLabs is using RocksDB as a storage engine in their multi-database distributed ## ArangoDB [ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its storage engine. +## Qdrant +[Qdrant](https://qdrant.tech/) is an open source vector database, it [uses](https://qdrant.tech/documentation/concepts/storage/) RocksDB as its persistent storage. + ## Milvus [Milvus](https://milvus.io/) is an open source vector database for unstructured data. It uses RocksDB not only as one of the supported kv storage engines, but also as a message queue. From 2f9ea8193f641c536f4a9ada869611a06708956f Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Thu, 16 Nov 2023 15:46:31 -0800 Subject: [PATCH 289/386] Add HyperClockCache Java API. (#12065) Summary: Fix https://github.com/facebook/rocksdb/issues/11510 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12065 Reviewed By: ajkr Differential Revision: D51406695 Pulled By: cbi42 fbshipit-source-id: b9e32da5f9bcafb5365e4349f7295be90d5aa7ba --- java/CMakeLists.txt | 5 ++ java/rocksjni/hyper_clock_cache.cc | 42 +++++++++++++++ .../src/main/java/org/rocksdb/ClockCache.java | 12 ++++- .../java/org/rocksdb/HyperClockCache.java | 54 +++++++++++++++++++ .../java/org/rocksdb/HyperClockCacheTest.java | 30 +++++++++++ src.mk | 1 + 6 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 java/rocksjni/hyper_clock_cache.cc create mode 100644 java/src/main/java/org/rocksdb/HyperClockCache.java create mode 100644 java/src/test/java/org/rocksdb/HyperClockCacheTest.java diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index ee158016b..0fc503e69 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -43,6 +43,7 @@ set(JNI_NATIVE_SOURCES rocksjni/export_import_files_metadatajni.cc rocksjni/filter.cc rocksjni/import_column_family_options.cc + rocksjni/hyper_clock_cache.cc rocksjni/ingest_external_file_options.cc rocksjni/iterator.cc rocksjni/jnicallback.cc @@ -173,6 +174,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/HistogramType.java src/main/java/org/rocksdb/Holder.java src/main/java/org/rocksdb/ImportColumnFamilyOptions.java + src/main/java/org/rocksdb/HyperClockCache.java src/main/java/org/rocksdb/IndexShorteningMode.java src/main/java/org/rocksdb/IndexType.java src/main/java/org/rocksdb/InfoLogLevel.java @@ -336,6 +338,7 @@ set(JAVA_TEST_CLASSES src/test/java/org/rocksdb/VerifyChecksumsTest.java src/test/java/org/rocksdb/MultiColumnRegressionTest.java src/test/java/org/rocksdb/FlushTest.java + src/test/java/org/rocksdb/HyperClockCacheTest.java src/test/java/org/rocksdb/PutMultiplePartsTest.java src/test/java/org/rocksdb/StatisticsCollectorTest.java src/test/java/org/rocksdb/LRUCacheTest.java @@ -445,6 +448,7 @@ set(JAVA_TEST_RUNNING_CLASSES org.rocksdb.VerifyChecksumsTest org.rocksdb.MultiColumnRegressionTest org.rocksdb.FlushTest + org.rocksdb.HyperClockCacheTest org.rocksdb.PutMultiplePartsTest org.rocksdb.StatisticsCollectorTest org.rocksdb.LRUCacheTest @@ -682,6 +686,7 @@ if(${CMAKE_VERSION} VERSION_LESS "3.11.4") org.rocksdb.FlushOptions org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig + org.rocksdb.HyperClockCache org.rocksdb.IngestExternalFileOptions org.rocksdb.Logger org.rocksdb.LRUCache diff --git a/java/rocksjni/hyper_clock_cache.cc b/java/rocksjni/hyper_clock_cache.cc new file mode 100644 index 000000000..782f123a5 --- /dev/null +++ b/java/rocksjni/hyper_clock_cache.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// This file implements the "bridge" between Java and C++ for +// ROCKSDB_NAMESPACE::HyperClockCache. + +#include + +#include "cache/clock_cache.h" +#include "include/org_rocksdb_HyperClockCache.h" +#include "rocksjni/cplusplus_to_java_convert.h" + +/* + * Class: org_rocksdb_HyperClockCache + * Method: newHyperClockCache + * Signature: (JJIZ)J + */ +jlong Java_org_rocksdb_HyperClockCache_newHyperClockCache( + JNIEnv*, jclass, jlong capacity, jlong estimatedEntryCharge, + jint numShardBits, jboolean strictCapacityLimit) { + ROCKSDB_NAMESPACE::HyperClockCacheOptions cacheOptions = + ROCKSDB_NAMESPACE::HyperClockCacheOptions( + capacity, estimatedEntryCharge, numShardBits, strictCapacityLimit); + + auto* cache = new std::shared_ptr( + cacheOptions.MakeSharedCache()); + return GET_CPLUSPLUS_POINTER(cache); +} + +/* + * Class: org_rocksdb_HyperClockCache + * Method: disposeInternalJni + * Signature: (J)V + */ +void Java_org_rocksdb_HyperClockCache_disposeInternalJni(JNIEnv*, jclass, + jlong jhandle) { + auto* hyper_clock_cache = + reinterpret_cast*>(jhandle); + delete hyper_clock_cache; // delete std::shared_ptr +} \ No newline at end of file diff --git a/java/src/main/java/org/rocksdb/ClockCache.java b/java/src/main/java/org/rocksdb/ClockCache.java index e4251db8e..f9f6da74c 100644 --- a/java/src/main/java/org/rocksdb/ClockCache.java +++ b/java/src/main/java/org/rocksdb/ClockCache.java @@ -8,12 +8,18 @@ /** * Similar to {@link LRUCache}, but based on the CLOCK algorithm with * better concurrent performance in some cases + * + * @deprecated The old Clock Cache implementation had an unresolved bug and + * has been removed. The new HyperClockCache requires an additional + * configuration parameter that is not provided by this API. This function + * simply returns a new LRUCache for functional compatibility. */ public class ClockCache extends Cache { - /** * Create a new cache with a fixed size capacity. * + * @deprecated The old Clock Cache implementation had an unresolved bug and has been removed. + * * @param capacity The fixed size capacity of the cache */ public ClockCache(final long capacity) { @@ -27,6 +33,8 @@ public ClockCache(final long capacity) { * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * + * @deprecated The old Clock Cache implementation had an unresolved bug and has been removed. + * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key @@ -43,6 +51,8 @@ public ClockCache(final long capacity, final int numShardBits) { * numShardBits = -1 means it is automatically determined: every shard * will be at least 512KB and number of shard bits will not exceed 6. * + * @deprecated The old Clock Cache implementation had an unresolved bug and has been removed. + * * @param capacity The fixed size capacity of the cache * @param numShardBits The cache is sharded to 2^numShardBits shards, * by hash of the key diff --git a/java/src/main/java/org/rocksdb/HyperClockCache.java b/java/src/main/java/org/rocksdb/HyperClockCache.java new file mode 100644 index 000000000..cd9cc1551 --- /dev/null +++ b/java/src/main/java/org/rocksdb/HyperClockCache.java @@ -0,0 +1,54 @@ +package org.rocksdb; + +/** + * HyperClockCache - A lock-free Cache alternative for RocksDB block cache + * that offers much improved CPU efficiency vs. LRUCache under high parallel + * load or high contention, with some caveats: + *
      + *
    • + * Not a general Cache implementation: can only be used for + * BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is + * compatible with HyperClockCache. + *
    • + *
    • + * Requires an extra tuning parameter: see estimated_entry_charge below. + * Similarly, substantially changing the capacity with SetCapacity could + * harm efficiency. -> EXPERIMENTAL: the tuning parameter can be set to 0 + * to find the appropriate balance automatically. + *
    • + *
    • + * Cache priorities are less aggressively enforced, which could cause + * cache dilution from long range scans (unless they use fill_cache=false). + *
    • + *
    • + * Can be worse for small caches, because if almost all of a cache shard is + * pinned (more likely with non-partitioned filters), then CLOCK eviction + * becomes very CPU intensive. + *
    • + *
    + */ +@Experimental("HyperClockCache is still experimental and this API may change in future.") +public class HyperClockCache extends Cache { + /** + * + * @param capacity The fixed size capacity of the cache + * @param estimatedEntryCharge EXPERIMENTAL: the field can be set to 0 to size the table + * dynamically and automatically. See C++ Api for more info. + * @param numShardBits The cache is sharded to 2^numShardBits shards, by hash of the key + * @param strictCapacityLimit insert to the cache will fail when cache is full + */ + public HyperClockCache(final long capacity, final long estimatedEntryCharge, int numShardBits, + boolean strictCapacityLimit) { + super(newHyperClockCache(capacity, estimatedEntryCharge, numShardBits, strictCapacityLimit)); + } + + @Override + protected void disposeInternal(long handle) { + disposeInternalJni(handle); + } + + private static native void disposeInternalJni(long handle); + + private static native long newHyperClockCache(final long capacity, + final long estimatedEntryCharge, int numShardBits, boolean strictCapacityLimit); +} diff --git a/java/src/test/java/org/rocksdb/HyperClockCacheTest.java b/java/src/test/java/org/rocksdb/HyperClockCacheTest.java new file mode 100644 index 000000000..2de111d99 --- /dev/null +++ b/java/src/test/java/org/rocksdb/HyperClockCacheTest.java @@ -0,0 +1,30 @@ +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class HyperClockCacheTest { + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Test + public void newHyperClockCache() throws RocksDBException { + RocksDB.loadLibrary(); + try (Cache cache = new HyperClockCache(1024 * 1024, 0, 8, false)) { + BlockBasedTableConfig tableConfing = new BlockBasedTableConfig(); + tableConfing.setBlockCache(cache); + try (Options options = new Options()) { + options.setTableFormatConfig(tableConfing); + options.setCreateIfMissing(true); + try (RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("testKey".getBytes(), "testData".getBytes()); + // no op + assertThat(cache.getUsage()).isGreaterThanOrEqualTo(0); + assertThat(cache.getPinnedUsage()).isGreaterThanOrEqualTo(0); + } + } + } + } +} diff --git a/src.mk b/src.mk index a16f16b87..a03a476ff 100644 --- a/src.mk +++ b/src.mk @@ -670,6 +670,7 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/import_column_family_options.cc \ java/rocksjni/ingest_external_file_options.cc \ java/rocksjni/filter.cc \ + java/rocksjni/hyper_clock_cache.cc \ java/rocksjni/iterator.cc \ java/rocksjni/jni_perf_context.cc \ java/rocksjni/jnicallback.cc \ From 4e58cc64375ce079ee973150e6d8291aed9df30a Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Fri, 17 Nov 2023 10:50:40 -0800 Subject: [PATCH 290/386] Check internal key range when compacting from last level to penultimate level (#12063) Summary: The test failure in https://github.com/facebook/rocksdb/issues/11909 shows that we may compact keys outside of internal key range of penultimate level input files from last level to penultimate level, which can potentially cause overlapping files in the penultimate level. This PR updates the `Compaction::WithinPenultimateLevelOutputRange()` to check internal key range instead of user key. Other fixes: * skip range del sentinels when deciding output level for tiered compaction Pull Request resolved: https://github.com/facebook/rocksdb/pull/12063 Test Plan: - existing unit tests - apply the fix to https://github.com/facebook/rocksdb/issues/11905 and run `./tiered_compaction_test --gtest_filter="*RangeDelsCauseFileEndpointsToOverlap*"` Reviewed By: ajkr Differential Revision: D51288985 Pulled By: cbi42 fbshipit-source-id: 70085db5f5c3b15300bcbc39057d57b83fd9902a --- db/compaction/compaction.cc | 75 ++++++++++++--- db/compaction/compaction.h | 23 ++--- db/compaction/compaction_iterator.cc | 11 ++- db/compaction/compaction_iterator.h | 8 +- db/compaction/compaction_iterator_test.cc | 5 +- db/compaction/compaction_job.cc | 2 + db/compaction/compaction_job_test.cc | 12 ++- db/compaction/tiered_compaction_test.cc | 107 +++++++++++++++++++++- db/db_sst_test.cc | 5 + 9 files changed, 206 insertions(+), 42 deletions(-) diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index 99e5dd5ac..e868d785a 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -115,6 +115,42 @@ void Compaction::GetBoundaryKeys( } } +void Compaction::GetBoundaryInternalKeys( + VersionStorageInfo* vstorage, + const std::vector& inputs, InternalKey* smallest_key, + InternalKey* largest_key, int exclude_level) { + bool initialized = false; + const InternalKeyComparator* icmp = vstorage->InternalComparator(); + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[i].files.empty() || inputs[i].level == exclude_level) { + continue; + } + if (inputs[i].level == 0) { + // we need to consider all files on level 0 + for (const auto* f : inputs[i].files) { + if (!initialized || icmp->Compare(f->smallest, *smallest_key) < 0) { + *smallest_key = f->smallest; + } + if (!initialized || icmp->Compare(f->largest, *largest_key) > 0) { + *largest_key = f->largest; + } + initialized = true; + } + } else { + // we only need to consider the first and last file + if (!initialized || + icmp->Compare(inputs[i].files[0]->smallest, *smallest_key) < 0) { + *smallest_key = inputs[i].files[0]->smallest; + } + if (!initialized || + icmp->Compare(inputs[i].files.back()->largest, *largest_key) > 0) { + *largest_key = inputs[i].files.back()->largest; + } + initialized = true; + } + } +} + std::vector Compaction::PopulateWithAtomicBoundaries( VersionStorageInfo* vstorage, std::vector inputs) { const Comparator* ucmp = vstorage->InternalComparator()->user_comparator(); @@ -399,9 +435,14 @@ void Compaction::PopulatePenultimateLevelOutputRange() { } } - GetBoundaryKeys(input_vstorage_, inputs_, - &penultimate_level_smallest_user_key_, - &penultimate_level_largest_user_key_, exclude_level); + // FIXME: should make use of `penultimate_output_range_type_`. + // FIXME: when last level's input range does not overlap with + // penultimate level, and penultimate level input is empty, + // this call will not set penultimate_level_smallest_ or + // penultimate_level_largest_. No keys will be compacted up. + GetBoundaryInternalKeys(input_vstorage_, inputs_, + &penultimate_level_smallest_, + &penultimate_level_largest_, exclude_level); } Compaction::~Compaction() { @@ -426,33 +467,39 @@ bool Compaction::OverlapPenultimateLevelOutputRange( if (!SupportsPerKeyPlacement()) { return false; } + + // See FIXME in Compaction::PopulatePenultimateLevelOutputRange(). + // We do not compact any key up in this case. + if (penultimate_level_smallest_.size() == 0 || + penultimate_level_largest_.size() == 0) { + return false; + } + const Comparator* ucmp = input_vstorage_->InternalComparator()->user_comparator(); return ucmp->CompareWithoutTimestamp( - smallest_key, penultimate_level_largest_user_key_) <= 0 && + smallest_key, penultimate_level_largest_.user_key()) <= 0 && ucmp->CompareWithoutTimestamp( - largest_key, penultimate_level_smallest_user_key_) >= 0; + largest_key, penultimate_level_smallest_.user_key()) >= 0; } // key includes timestamp if user-defined timestamp is enabled. -bool Compaction::WithinPenultimateLevelOutputRange(const Slice& key) const { +bool Compaction::WithinPenultimateLevelOutputRange( + const ParsedInternalKey& ikey) const { if (!SupportsPerKeyPlacement()) { return false; } - if (penultimate_level_smallest_user_key_.empty() || - penultimate_level_largest_user_key_.empty()) { + if (penultimate_level_smallest_.size() == 0 || + penultimate_level_largest_.size() == 0) { return false; } - const Comparator* ucmp = - input_vstorage_->InternalComparator()->user_comparator(); + const InternalKeyComparator* icmp = input_vstorage_->InternalComparator(); - return ucmp->CompareWithoutTimestamp( - key, penultimate_level_smallest_user_key_) >= 0 && - ucmp->CompareWithoutTimestamp( - key, penultimate_level_largest_user_key_) <= 0; + return icmp->Compare(ikey, penultimate_level_smallest_.Encode()) >= 0 && + icmp->Compare(ikey, penultimate_level_largest_.Encode()) <= 0; } bool Compaction::InputCompressionMatchesOutput() const { diff --git a/db/compaction/compaction.h b/db/compaction/compaction.h index 22ce20259..50c75f70b 100644 --- a/db/compaction/compaction.h +++ b/db/compaction/compaction.h @@ -353,14 +353,6 @@ class Compaction { Slice GetLargestUserKey() const { return largest_user_key_; } - Slice GetPenultimateLevelSmallestUserKey() const { - return penultimate_level_smallest_user_key_; - } - - Slice GetPenultimateLevelLargestUserKey() const { - return penultimate_level_largest_user_key_; - } - PenultimateOutputRangeType GetPenultimateOutputRangeType() const { return penultimate_output_range_type_; } @@ -383,10 +375,8 @@ class Compaction { // per_key_placement feature, which is safe to place the key to the // penultimate level. different compaction strategy has different rules. // If per_key_placement is not supported, always return false. - // TODO: currently it doesn't support moving data from the last level to the - // penultimate level // key includes timestamp if user-defined timestamp is enabled. - bool WithinPenultimateLevelOutputRange(const Slice& key) const; + bool WithinPenultimateLevelOutputRange(const ParsedInternalKey& ikey) const; CompactionReason compaction_reason() const { return compaction_reason_; } @@ -456,6 +446,13 @@ class Compaction { Slice* smallest_key, Slice* largest_key, int exclude_level = -1); + // get the smallest and largest internal key present in files to be compacted + static void GetBoundaryInternalKeys( + VersionStorageInfo* vstorage, + const std::vector& inputs, + InternalKey* smallest_key, InternalKey* largest_key, + int exclude_level = -1); + // populate penultimate level output range, which will be used to determine if // a key is safe to output to the penultimate level (details see // `Compaction::WithinPenultimateLevelOutputRange()`. @@ -568,8 +565,8 @@ class Compaction { // Key range for penultimate level output // includes timestamp if user-defined timestamp is enabled. // penultimate_output_range_type_ shows the range type - Slice penultimate_level_smallest_user_key_; - Slice penultimate_level_largest_user_key_; + InternalKey penultimate_level_smallest_; + InternalKey penultimate_level_largest_; PenultimateOutputRangeType penultimate_output_range_type_ = PenultimateOutputRangeType::kNotSupported; }; diff --git a/db/compaction/compaction_iterator.cc b/db/compaction/compaction_iterator.cc index abfa7a692..85d1c039b 100644 --- a/db/compaction/compaction_iterator.cc +++ b/db/compaction/compaction_iterator.cc @@ -1227,7 +1227,7 @@ void CompactionIterator::DecideOutputLevel() { // not from this compaction. // TODO: add statistic for declined output_to_penultimate_level bool safe_to_penultimate_level = - compaction_->WithinPenultimateLevelOutputRange(ikey_.user_key); + compaction_->WithinPenultimateLevelOutputRange(ikey_); if (!safe_to_penultimate_level) { output_to_penultimate_level_ = false; // It could happen when disable/enable `last_level_temperature` while @@ -1256,10 +1256,13 @@ void CompactionIterator::PrepareOutput() { } else if (ikey_.type == kTypeBlobIndex) { GarbageCollectBlobIfNeeded(); } - } - if (compaction_ != nullptr && compaction_->SupportsPerKeyPlacement()) { - DecideOutputLevel(); + // For range del sentinel, we don't use it to cut files for bottommost + // compaction. So it should not make a difference which output level we + // decide. + if (compaction_ != nullptr && compaction_->SupportsPerKeyPlacement()) { + DecideOutputLevel(); + } } // Zeroing out the sequence number leads to better compression. diff --git a/db/compaction/compaction_iterator.h b/db/compaction/compaction_iterator.h index 15193b587..1ff9c8869 100644 --- a/db/compaction/compaction_iterator.h +++ b/db/compaction/compaction_iterator.h @@ -119,7 +119,8 @@ class CompactionIterator { virtual bool SupportsPerKeyPlacement() const = 0; // `key` includes timestamp if user-defined timestamp is enabled. - virtual bool WithinPenultimateLevelOutputRange(const Slice& key) const = 0; + virtual bool WithinPenultimateLevelOutputRange( + const ParsedInternalKey&) const = 0; }; class RealCompaction : public CompactionProxy { @@ -186,8 +187,9 @@ class CompactionIterator { // Check if key is within penultimate level output range, to see if it's // safe to output to the penultimate level for per_key_placement feature. // `key` includes timestamp if user-defined timestamp is enabled. - bool WithinPenultimateLevelOutputRange(const Slice& key) const override { - return compaction_->WithinPenultimateLevelOutputRange(key); + bool WithinPenultimateLevelOutputRange( + const ParsedInternalKey& ikey) const override { + return compaction_->WithinPenultimateLevelOutputRange(ikey); } private: diff --git a/db/compaction/compaction_iterator_test.cc b/db/compaction/compaction_iterator_test.cc index 20428a586..699e62969 100644 --- a/db/compaction/compaction_iterator_test.cc +++ b/db/compaction/compaction_iterator_test.cc @@ -184,8 +184,9 @@ class FakeCompaction : public CompactionIterator::CompactionProxy { return supports_per_key_placement; } - bool WithinPenultimateLevelOutputRange(const Slice& key) const override { - return (!key.starts_with("unsafe_pb")); + bool WithinPenultimateLevelOutputRange( + const ParsedInternalKey& key) const override { + return (!key.user_key.starts_with("unsafe_pb")); } bool key_not_exists_beyond_output_level = false; diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 257848e46..99b099759 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1900,6 +1900,8 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, sub_compact->start.has_value() ? &tmp_start : nullptr, sub_compact->end.has_value() ? &tmp_end : nullptr); if (oldest_ancester_time == std::numeric_limits::max()) { + // TODO: fix DBSSTTest.GetTotalSstFilesSize and use + // kUnknownOldestAncesterTime oldest_ancester_time = current_time; } diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index c4e985c8b..a16891110 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -1527,13 +1527,15 @@ TEST_F(CompactionJobTest, VerifyPenultimateLevelOutput) { {files0, files1, files2, files3}, input_levels, /*verify_func=*/[&](Compaction& comp) { for (char c = 'a'; c <= 'z'; c++) { - std::string c_str; - c_str = c; - const Slice key(c_str); if (c == 'a') { - ASSERT_FALSE(comp.WithinPenultimateLevelOutputRange(key)); + ParsedInternalKey pik("a", 0U, kTypeValue); + ASSERT_FALSE(comp.WithinPenultimateLevelOutputRange(pik)); } else { - ASSERT_TRUE(comp.WithinPenultimateLevelOutputRange(key)); + std::string c_str{c}; + // WithinPenultimateLevelOutputRange checks internal key range. + // 'z' is the last key, so set seqno properly. + ParsedInternalKey pik(c_str, c == 'z' ? 12U : 0U, kTypeValue); + ASSERT_TRUE(comp.WithinPenultimateLevelOutputRange(pik)); } } }); diff --git a/db/compaction/tiered_compaction_test.cc b/db/compaction/tiered_compaction_test.cc index 0c6d1aa35..779b980d8 100644 --- a/db/compaction/tiered_compaction_test.cc +++ b/db/compaction/tiered_compaction_test.cc @@ -1202,15 +1202,120 @@ TEST_P(TieredCompactionTest, RangeBasedTieredStorageLevel) { ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel()); ASSERT_EQ(GetSstSizeHelper(Temperature::kUnknown), 0); ASSERT_GT(GetSstSizeHelper(Temperature::kCold), 0); - ASSERT_EQ( options.statistics->getTickerCount(COMPACTION_RANGE_DEL_DROP_OBSOLETE), 1); + + // Tests that we only compact keys up to penultimate level + // that are within penultimate level input's internal key range. + { + MutexLock l(&mutex); + hot_start = Key(0); + hot_end = Key(100); + } + const Snapshot* temp_snap = db_->GetSnapshot(); + // Key(0) and Key(1) here are inserted with higher sequence number + // than Key(0) and Key(1) inserted above. + // Only Key(0) in last level will be compacted up, not Key(1). + ASSERT_OK(Put(Key(0), "value" + std::to_string(0))); + ASSERT_OK(Put(Key(1), "value" + std::to_string(100))); + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + ASSERT_EQ("0,0,0,0,0,1,1", FilesPerLevel()); + { + std::vector metas; + db_->GetLiveFilesMetaData(&metas); + for (const auto& f : metas) { + if (f.temperature == Temperature::kUnknown) { + // Expect Key(0), Key(0), Key(1) + ASSERT_EQ(f.num_entries, 3); + ASSERT_EQ(f.smallestkey, Key(0)); + ASSERT_EQ(f.largestkey, Key(1)); + } else { + ASSERT_EQ(f.temperature, Temperature::kCold); + // Key(2)-Key(49) and Key(100). + ASSERT_EQ(f.num_entries, 50); + } + } + } + db_->ReleaseSnapshot(temp_snap); } INSTANTIATE_TEST_CASE_P(TieredCompactionTest, TieredCompactionTest, testing::Bool()); +TEST_P(TieredCompactionTest, CheckInternalKeyRange) { + // When compacting keys from the last level to penultimate level, + // output to penultimate level should be within internal key range + // of input files from penultimate level. + // Set up: + // L5: + // File 1: DeleteRange[1, 3)@4, File 2: [3@5, 100@6] + // L6: + // File 3: [2@1, 3@2], File 4: [50@3] + // + // When File 1 and File 3 are being compacted, + // Key(3) cannot be compacted up, otherwise it causes + // inconsistency where File 3's Key(3) has a lower sequence number + // than File 2's Key(3). + const int kNumLevels = 7; + auto options = CurrentOptions(); + SetColdTemperature(options); + options.level_compaction_dynamic_level_bytes = true; + options.num_levels = kNumLevels; + options.statistics = CreateDBStatistics(); + options.max_subcompactions = 10; + options.preclude_last_level_data_seconds = 10000; + DestroyAndReopen(options); + auto cmp = options.comparator; + + std::string hot_start = Key(0); + std::string hot_end = Key(0); + SyncPoint::GetInstance()->SetCallBack( + "CompactionIterator::PrepareOutput.context", [&](void* arg) { + auto context = static_cast(arg); + context->output_to_penultimate_level = + cmp->Compare(context->key, hot_start) >= 0 && + cmp->Compare(context->key, hot_end) < 0; + }); + SyncPoint::GetInstance()->EnableProcessing(); + // File 1 + ASSERT_OK(Put(Key(2), "val2")); + ASSERT_OK(Put(Key(3), "val3")); + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + // File 2 + ASSERT_OK(Put(Key(50), "val50")); + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + + const Snapshot* snapshot = db_->GetSnapshot(); + hot_end = Key(100); + std::string start = Key(1); + std::string end = Key(3); + ASSERT_OK( + db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), start, end)); + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + // File 3 + ASSERT_OK(Put(Key(3), "vall")); + ASSERT_OK(Put(Key(100), "val100")); + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + // Try to compact keys up + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + start = Key(1); + end = Key(2); + Slice begin_slice(start); + Slice end_slice(end); + ASSERT_OK(db_->CompactRange(cro, &begin_slice, &end_slice)); + // Without internal key range checking, we get the following error: + // Corruption: force_consistency_checks(DEBUG): VersionBuilder: L5 has + // overlapping ranges: file #18 largest key: '6B6579303030303033' seq:102, + // type:1 vs. file #15 smallest key: '6B6579303030303033' seq:104, type:1 + db_->ReleaseSnapshot(snapshot); +} + class PrecludeLastLevelTest : public DBTestBase { public: PrecludeLastLevelTest() diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index 95ed405a2..7590aa2f1 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -1538,6 +1538,11 @@ TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFilesSubjectToMemoryLimit) { } TEST_F(DBSSTTest, GetTotalSstFilesSize) { + // FIXME: L0 file and L1+ file also differ in size of `oldest_key_time`. + // L0 file has non-zero `oldest_key_time` while L1+ files have 0. + // The test passes since L1+ file uses current time instead of 0 + // as oldest_ancestor_time. + // // We don't propagate oldest-key-time table property on compaction and // just write 0 as default value. This affect the exact table size, since // we encode table properties as varint64. Force time to be 0 to work around From 7780e98268769a80bb611aad8ada9d4eaeeea528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20M=C3=A9riaux?= Date: Fri, 17 Nov 2023 11:34:05 -0800 Subject: [PATCH 291/386] add write_buffer_manager setter into options and tests in c bindings, (#12007) Summary: following https://github.com/facebook/rocksdb/pull/11710 - add test on wbm c api - add a setter for WBM in `DBOptions` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12007 Reviewed By: cbi42 Differential Revision: D51430042 Pulled By: ajkr fbshipit-source-id: 608bc4d3ed35a84200459d0230b35be64b3475f7 --- db/c.cc | 5 +++++ db/c_test.c | 24 ++++++++++++++++++++++++ include/rocksdb/c.h | 2 ++ 3 files changed, 31 insertions(+) diff --git a/db/c.cc b/db/c.cc index e071ef7a4..5555ae198 100644 --- a/db/c.cc +++ b/db/c.cc @@ -2950,6 +2950,11 @@ void rocksdb_options_set_write_buffer_size(rocksdb_options_t* opt, size_t s) { opt->rep.write_buffer_size = s; } +void rocksdb_options_set_write_buffer_manager( + rocksdb_options_t* opt, rocksdb_write_buffer_manager_t* wbm) { + opt->rep.write_buffer_manager = wbm->rep; +} + size_t rocksdb_options_get_write_buffer_size(rocksdb_options_t* opt) { return opt->rep.write_buffer_size; } diff --git a/db/c_test.c b/db/c_test.c index 802dc093d..667220496 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -3781,6 +3781,30 @@ int main(int argc, char** argv) { rocksdb_wait_for_compact_options_destroy(wco); } + StartPhase("write_buffer_manager"); + { + rocksdb_cache_t* lru; + lru = rocksdb_cache_create_lru(100); + + rocksdb_write_buffer_manager_t* write_buffer_manager; + write_buffer_manager = + rocksdb_write_buffer_manager_create_with_cache(200, lru, false); + + CheckCondition(true == + rocksdb_write_buffer_manager_enabled(write_buffer_manager)); + CheckCondition(true == rocksdb_write_buffer_manager_cost_to_cache( + write_buffer_manager)); + CheckCondition( + 200 == rocksdb_write_buffer_manager_buffer_size(write_buffer_manager)); + + rocksdb_write_buffer_manager_set_buffer_size(write_buffer_manager, 300); + CheckCondition( + 300 == rocksdb_write_buffer_manager_buffer_size(write_buffer_manager)); + + rocksdb_write_buffer_manager_destroy(write_buffer_manager); + rocksdb_cache_destroy(lru); + } + StartPhase("cancel_all_background_work"); rocksdb_cancel_all_background_work(db, 1); diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 35e01f4c0..8a26585fe 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1061,6 +1061,8 @@ rocksdb_block_based_options_set_pin_top_level_index_and_filter( rocksdb_block_based_table_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_block_based_table_factory( rocksdb_options_t* opt, rocksdb_block_based_table_options_t* table_options); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_write_buffer_manager( + rocksdb_options_t* opt, rocksdb_write_buffer_manager_t* wbm); /* Cuckoo table options */ From b059c5680ed3c7252853cfe3e5a478f883959405 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Sun, 19 Nov 2023 09:50:59 -0800 Subject: [PATCH 292/386] Add missing copyright header (#12076) Summary: Required for open source repo. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12076 Reviewed By: ajkr Differential Revision: D51449839 Pulled By: cbi42 fbshipit-source-id: 4a25a3422880db3f28a2834d966341935db32530 --- java/src/main/java/org/rocksdb/HyperClockCache.java | 6 ++++++ java/src/test/java/org/rocksdb/HyperClockCacheTest.java | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/java/src/main/java/org/rocksdb/HyperClockCache.java b/java/src/main/java/org/rocksdb/HyperClockCache.java index cd9cc1551..f8fe42be7 100644 --- a/java/src/main/java/org/rocksdb/HyperClockCache.java +++ b/java/src/main/java/org/rocksdb/HyperClockCache.java @@ -1,3 +1,9 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + package org.rocksdb; /** diff --git a/java/src/test/java/org/rocksdb/HyperClockCacheTest.java b/java/src/test/java/org/rocksdb/HyperClockCacheTest.java index 2de111d99..132d69351 100644 --- a/java/src/test/java/org/rocksdb/HyperClockCacheTest.java +++ b/java/src/test/java/org/rocksdb/HyperClockCacheTest.java @@ -1,3 +1,9 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; From 39d33475da7b6db8372ebd3c4db74993aacc18a2 Mon Sep 17 00:00:00 2001 From: Timo Riski Date: Mon, 20 Nov 2023 10:11:16 -0800 Subject: [PATCH 293/386] Fix build on FreeBSD (#11218) (#12078) Summary: Fixes https://github.com/facebook/rocksdb/issues/11218 Changes from https://github.com/facebook/rocksdb/issues/10881 broke FreeBSD builds with: env/io_posix.h:39:9: error: 'POSIX_MADV_NORMAL' macro redefined [-Werror,-Wmacro-redefined] This commit fixes FreeBSD builds by ignoring MADV defines. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12078 Reviewed By: cbi42 Differential Revision: D51452802 Pulled By: ajkr fbshipit-source-id: 0a1f5a90954e7d257a95794277a843ac77f3a709 --- env/io_posix.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/env/io_posix.h b/env/io_posix.h index f129668ea..8c51ba645 100644 --- a/env/io_posix.h +++ b/env/io_posix.h @@ -29,7 +29,8 @@ // For non linux platform, the following macros are used only as place // holder. -#if !(defined OS_LINUX) && !(defined CYGWIN) && !(defined OS_AIX) +#if !(defined OS_LINUX) && !(defined OS_FREEBSD) && !(defined CYGWIN) && \ + !(defined OS_AIX) #define POSIX_FADV_NORMAL 0 /* [MC1] no further special treatment */ #define POSIX_FADV_RANDOM 1 /* [MC1] expect random page refs */ #define POSIX_FADV_SEQUENTIAL 2 /* [MC1] expect sequential page refs */ From fb5c8c7ea370af84135316a6a9a402e4b8f194b6 Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Mon, 20 Nov 2023 17:07:28 -0800 Subject: [PATCH 294/386] Do not compare op_type in `WithinPenultimateLevelOutputRange()` (#12081) Summary: `WithinPenultimateLevelOutputRange()` is updated in https://github.com/facebook/rocksdb/issues/12063 to check internal key range. However, op_type of a key can change during compaction, e.g. MERGE -> PUT, which makes a key larger and becomes out of penultimate output range. This has caused stress test failures with error message "Unsafe to store Seq later than snapshot in the last level if per_key_placement is enabled". So update `WithinPenultimateLevelOutputRange()` to only check user key and sequence number. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12081 Test Plan: * This repro can produce the corruption within a few runs. Ran it a few times after the fix and did not see Corruption failure. ``` python3 ./tools/db_crashtest.py whitebox --test_tiered_storage --random_kill_odd=888887 --use_merge=1 --writepercent=100 --readpercent=0 --prefixpercent=0 --delpercent=0 --delrangepercent=0 --iterpercent=0 --write_buffer_size=419430 --column_families=1 --read_fault_one_in=0 --write_fault_one_in=0 ``` Reviewed By: ajkr Differential Revision: D51481202 Pulled By: cbi42 fbshipit-source-id: cad6b65099733e03071b496e752bbdb09cf4db82 --- db/compaction/compaction.cc | 5 +++-- db/dbformat.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/db/compaction/compaction.cc b/db/compaction/compaction.cc index e868d785a..bbab8f79f 100644 --- a/db/compaction/compaction.cc +++ b/db/compaction/compaction.cc @@ -498,8 +498,9 @@ bool Compaction::WithinPenultimateLevelOutputRange( const InternalKeyComparator* icmp = input_vstorage_->InternalComparator(); - return icmp->Compare(ikey, penultimate_level_smallest_.Encode()) >= 0 && - icmp->Compare(ikey, penultimate_level_largest_.Encode()) <= 0; + // op_type of a key can change during compaction, e.g. Merge -> Put. + return icmp->CompareKeySeq(ikey, penultimate_level_smallest_.Encode()) >= 0 && + icmp->CompareKeySeq(ikey, penultimate_level_largest_.Encode()) <= 0; } bool Compaction::InputCompressionMatchesOutput() const { diff --git a/db/dbformat.h b/db/dbformat.h index 6ce1bafb8..981866c09 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -355,6 +355,7 @@ class InternalKeyComparator // Same as Compare except that it excludes the value type from comparison int CompareKeySeq(const Slice& a, const Slice& b) const; + int CompareKeySeq(const ParsedInternalKey& a, const Slice& b) const; const Comparator* user_comparator() const { return user_comparator_.user_comparator(); @@ -976,6 +977,26 @@ inline int InternalKeyComparator::CompareKeySeq(const Slice& akey, return r; } +inline int InternalKeyComparator::CompareKeySeq(const ParsedInternalKey& a, + const Slice& b) const { + // Order by: + // increasing user key (according to user-supplied comparator) + // decreasing sequence number + int r = user_comparator_.Compare(a.user_key, ExtractUserKey(b)); + if (r == 0) { + // Shift the number to exclude the last byte which contains the value type + const uint64_t anum = a.sequence; + const uint64_t bnum = + DecodeFixed64(b.data() + b.size() - kNumInternalBytes) >> 8; + if (anum > bnum) { + r = -1; + } else if (anum < bnum) { + r = +1; + } + } + return r; +} + inline int InternalKeyComparator::Compare(const Slice& a, SequenceNumber a_global_seqno, const Slice& b, From 336a74db604f10e1a4cc5d2e480fc8a895137f69 Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 20 Nov 2023 17:48:17 -0800 Subject: [PATCH 295/386] Add some asserts in ~CacheWithSecondaryAdapter (#12082) Summary: Add some asserts in the `CacheWithSecondaryAdapter` destructor to help debug a crash test failure. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12082 Reviewed By: cbi42 Differential Revision: D51486041 Pulled By: anand1976 fbshipit-source-id: 76537beed31ba27ab9ac8b4ce6deb775629e3be5 --- cache/secondary_cache_adapter.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index 6e7716754..b36f3a381 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -119,6 +119,8 @@ CacheWithSecondaryAdapter::~CacheWithSecondaryAdapter() { size_t sec_capacity = 0; Status s = secondary_cache_->GetCapacity(sec_capacity); assert(s.ok()); + assert(placeholder_usage_ == 0); + assert(reserved_usage_ == 0); assert(pri_cache_res_->GetTotalMemoryUsed() == sec_capacity); } #endif // NDEBUG From 04cbc77b907e484f50b0b52eaed8878457d23095 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 21 Nov 2023 08:36:30 -0800 Subject: [PATCH 296/386] Add missing license to source files (#12083) Summary: Fixes https://github.com/facebook/rocksdb/issues/12079. Fixed missing licenses in "\*.h" and "\*.cc" files Pull Request resolved: https://github.com/facebook/rocksdb/pull/12083 Reviewed By: cbi42 Differential Revision: D51489634 Pulled By: ajkr fbshipit-source-id: 764bfee257b9d6603fd7606a55664b7537e1898f --- .../merge_operators/string_append/stringappend.cc | 11 ++++++----- .../merge_operators/string_append/stringappend.h | 11 ++++++----- .../merge_operators/string_append/stringappend2.cc | 8 ++++---- utilities/transactions/lock/range/range_tree/lib/db.h | 5 +++++ .../range_tree/lib/portability/toku_assert_subst.h | 4 ++++ .../lib/portability/toku_external_pthread.h | 4 ++++ .../lock/range/range_tree/lib/portability/txn_subst.h | 4 ++++ .../lock/range/range_tree/lib/standalone_port.cc | 5 +++++ 8 files changed, 38 insertions(+), 14 deletions(-) diff --git a/utilities/merge_operators/string_append/stringappend.cc b/utilities/merge_operators/string_append/stringappend.cc index 720dc7200..748e5c89f 100644 --- a/utilities/merge_operators/string_append/stringappend.cc +++ b/utilities/merge_operators/string_append/stringappend.cc @@ -1,8 +1,9 @@ -/** - * A MergeOperator for rocksdb that implements string append. - * @author Deon Nicholas (dnicholas@fb.com) - * Copyright 2013 Facebook - */ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// A MergeOperator for rocksdb that implements string append. #include "stringappend.h" diff --git a/utilities/merge_operators/string_append/stringappend.h b/utilities/merge_operators/string_append/stringappend.h index 153532382..4a7b2b9e5 100644 --- a/utilities/merge_operators/string_append/stringappend.h +++ b/utilities/merge_operators/string_append/stringappend.h @@ -1,8 +1,9 @@ -/** - * A MergeOperator for rocksdb that implements string append. - * @author Deon Nicholas (dnicholas@fb.com) - * Copyright 2013 Facebook - */ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// A MergeOperator for rocksdb that implements string append. #pragma once #include "rocksdb/merge_operator.h" diff --git a/utilities/merge_operators/string_append/stringappend2.cc b/utilities/merge_operators/string_append/stringappend2.cc index 8b6fe9020..bd0716cc3 100644 --- a/utilities/merge_operators/string_append/stringappend2.cc +++ b/utilities/merge_operators/string_append/stringappend2.cc @@ -1,7 +1,7 @@ -/** - * @author Deon Nicholas (dnicholas@fb.com) - * Copyright 2013 Facebook - */ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). #include "stringappend2.h" diff --git a/utilities/transactions/lock/range/range_tree/lib/db.h b/utilities/transactions/lock/range/range_tree/lib/db.h index 5aa826c8e..99cfa1f54 100644 --- a/utilities/transactions/lock/range/range_tree/lib/db.h +++ b/utilities/transactions/lock/range/range_tree/lib/db.h @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + #ifndef _DB_H #define _DB_H diff --git a/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h b/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h index af47800fb..c50a3a07f 100644 --- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h +++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_assert_subst.h @@ -1,3 +1,7 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). // // A replacement for toku_assert.h // diff --git a/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h b/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h index eb8291c1d..ad1d7bf54 100644 --- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h @@ -1,3 +1,7 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). /* A wrapper around ROCKSDB_NAMESPACE::TransactionDBMutexFactory-provided condition and mutex that provides toku_pthread_*-like interface. The functions diff --git a/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h b/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h index 803914862..f4013bb36 100644 --- a/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h +++ b/utilities/transactions/lock/range/range_tree/lib/portability/txn_subst.h @@ -1,3 +1,7 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). // // A substitute for ft/txn/txn.h // diff --git a/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc b/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc index 5d0b5228c..6dc86cc99 100644 --- a/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc +++ b/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc @@ -1,3 +1,8 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + #ifndef OS_WIN /* This is a dump ground to make Lock Tree work without the rest of TokuDB. From d3e015fe06a6ba01e0c93687416deb1d70c57769 Mon Sep 17 00:00:00 2001 From: songqing Date: Tue, 21 Nov 2023 09:34:59 -0800 Subject: [PATCH 297/386] Fix compact_files_example (#12084) Summary: The option "write_buffer_size" has changed from 4MB for 64MB by default, and the compact_files_example will not work as expected, as the test data written is only about 50MB and will not trigger compaction. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12084 Reviewed By: cbi42 Differential Revision: D51499959 Pulled By: ajkr fbshipit-source-id: 4f4b25ebc4b6bb568501adc8e97813edcddceea8 --- examples/compact_files_example.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/compact_files_example.cc b/examples/compact_files_example.cc index 1ecf8c794..544adf8ae 100644 --- a/examples/compact_files_example.cc +++ b/examples/compact_files_example.cc @@ -144,6 +144,8 @@ int main() { options.create_if_missing = true; // Disable RocksDB background compaction. options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleNone; + // Small write buffer size for generating more sst files in level 0. + options.write_buffer_size = 4 << 20; // Small slowdown and stop trigger for experimental purpose. options.level0_slowdown_writes_trigger = 3; options.level0_stop_writes_trigger = 5; From 84a54e1e28881bb5f4973475812a1344107d892b Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 21 Nov 2023 14:05:02 -0800 Subject: [PATCH 298/386] Fix some bugs in index builder and reader for the UDT in memtable only feature (#12062) Summary: These bugs surfaced while I was trying to add the stress test for the feature: Bug 1) On the index building path: the optimization to use user key instead of internal key as separator needed a bit tweak for when user defined timestamps can be removed. Because even though the user key look different now and eligible to be used as separator, when their user-defined timestamps are removed, they could be equal and that invariant no longer stands. Bug 2) On the index reading path: one path that builds the second level index iterator for `PartitionedIndexReader` are not passing the corresponding `user_defined_timestamps_persisted` flag. As a result, the default `true` value be used leading to no minimum timestamps padded when they should be. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12062 Test Plan: For bug 1): added separate unit test `BlockBasedTableReaderTest::Get` to exercise the `Get` API. It's a different code path from `MultiGet` so worth having its own test. Also in order to cover the bug, the test is modified to generate key values with the same user provided key, different timestamps and different sequence numbers. The test reads back different versions of the same user provided key. `MultiGet` takes one `ReadOptions` with one read timestamp so we cannot test retrieving different versions of the same key easily. For bug 2): simply added options `BlockBasedTableOptions.metadata_cache_options.partition_pinning = PinningTier::kAll` to exercise all the index iterator creating paths. Reviewed By: ltamasi Differential Revision: D51508280 Pulled By: jowlyzhang fbshipit-source-id: 8b174d3d70373c0599266ac1f467f2bd4d7ea6e5 --- table/block_based/block_based_table_reader.cc | 4 +- .../block_based_table_reader_test.cc | 198 ++++++++++++++---- table/block_based/index_builder.h | 32 ++- .../index_bug_fix_for_udt_in_memtable_only.md | 1 + 4 files changed, 183 insertions(+), 52 deletions(-) create mode 100644 unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 7658150aa..69a499d32 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -1895,7 +1895,8 @@ BlockBasedTable::PartitionedIndexIteratorState::NewSecondaryIterator( rep->internal_comparator.user_comparator(), rep->get_global_seqno(BlockType::kIndex), nullptr, kNullStats, true, rep->index_has_first_key, rep->index_key_includes_seq, - rep->index_value_is_full); + rep->index_value_is_full, /*block_contents_pinned=*/false, + rep->user_defined_timestamps_persisted); } // This will be broken if the user specifies an unusual implementation @@ -2644,6 +2645,7 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, options, /*need_upper_bound_check=*/false, /*input_iter=*/nullptr, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); iiter->Seek(key); + assert(iiter->status().ok()); assert(iiter->Valid()); return TEST_BlockInCache(iiter->value().handle); diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc index 2aaf505f8..254546893 100644 --- a/table/block_based/block_based_table_reader_test.cc +++ b/table/block_based/block_based_table_reader_test.cc @@ -30,22 +30,42 @@ namespace ROCKSDB_NAMESPACE { class BlockBasedTableReaderBaseTest : public testing::Test { + public: + static constexpr int kBytesPerEntry = 256; + // 16 = (default block size) 4 * 1024 / kBytesPerEntry + static constexpr int kEntriesPerBlock = 16; + protected: // Prepare key-value pairs to occupy multiple blocks. - // Each value is 256B, every 16 pairs constitute 1 block. + // Each (key, value) pair is `kBytesPerEntry` byte, every kEntriesPerBlock + // pairs constitute 1 block. // If mixed_with_human_readable_string_value == true, // then adjacent blocks contain values with different compression // complexity: human readable strings are easier to compress than random - // strings. - static std::map GenerateKVMap( - int num_block = 100, bool mixed_with_human_readable_string_value = false, - size_t ts_sz = 0) { - std::map kv; - + // strings. key is an internal key. + // When ts_sz > 0 and `same_key_diff_ts` is true, this + // function generate keys with the same user provided key, with different + // user defined timestamps and different sequence number to differentiate them + static std::vector> GenerateKVMap( + int num_block = 2, bool mixed_with_human_readable_string_value = false, + size_t ts_sz = 0, bool same_key_diff_ts = false) { + std::vector> kv; + + SequenceNumber seq_no = 0; + uint64_t current_udt = 0; + if (same_key_diff_ts) { + // These numbers are based on the number of keys to create + an arbitrary + // buffer number (100) to avoid overflow. + current_udt = kEntriesPerBlock * num_block + 100; + seq_no = kEntriesPerBlock * num_block + 100; + } Random rnd(101); uint32_t key = 0; + // To make each (key, value) pair occupy exactly kBytesPerEntry bytes. + int value_size = kBytesPerEntry - (8 + static_cast(ts_sz) + + static_cast(kNumInternalBytes)); for (int block = 0; block < num_block; block++) { - for (int i = 0; i < 16; i++) { + for (int i = 0; i < kEntriesPerBlock; i++) { char k[9] = {0}; // Internal key is constructed directly from this key, // and internal key size is required to be >= 8 bytes, @@ -53,19 +73,27 @@ class BlockBasedTableReaderBaseTest : public testing::Test { snprintf(k, sizeof(k), "%08u", key); std::string v; if (mixed_with_human_readable_string_value) { - v = (block % 2) ? rnd.HumanReadableString(256) - : rnd.RandomString(256); + v = (block % 2) ? rnd.HumanReadableString(value_size) + : rnd.RandomString(value_size); } else { - v = rnd.RandomString(256); + v = rnd.RandomString(value_size); } + std::string user_key = std::string(k); if (ts_sz > 0) { - std::string user_key; - AppendKeyWithMinTimestamp(&user_key, std::string(k), ts_sz); - kv[user_key] = v; + if (same_key_diff_ts) { + PutFixed64(&user_key, current_udt); + current_udt -= 1; + } else { + PutFixed64(&user_key, 0); + } + } + InternalKey internal_key(user_key, seq_no, ValueType::kTypeValue); + kv.emplace_back(internal_key.Encode().ToString(), v); + if (same_key_diff_ts) { + seq_no -= 1; } else { - kv[std::string(k)] = v; + key++; } - key++; } } return kv; @@ -88,7 +116,7 @@ class BlockBasedTableReaderBaseTest : public testing::Test { void CreateTable(const std::string& table_name, const ImmutableOptions& ioptions, const CompressionType& compression_type, - const std::map& kv, + const std::vector>& kv, uint32_t compression_parallel_threads = 1, uint32_t compression_dict_bytes = 0) { std::unique_ptr writer; @@ -115,9 +143,8 @@ class BlockBasedTableReaderBaseTest : public testing::Test { // Build table. for (auto it = kv.begin(); it != kv.end(); it++) { - std::string k = ToInternalKey(it->first); std::string v = it->second; - table_builder->Add(k, v); + table_builder->Add(it->first, v); } ASSERT_OK(table_builder->Finish()); } @@ -169,11 +196,6 @@ class BlockBasedTableReaderBaseTest : public testing::Test { std::shared_ptr fs_; Options options_; - std::string ToInternalKey(const std::string& key) { - InternalKey internal_key(key, 0, ValueType::kTypeValue); - return internal_key.Encode().ToString(); - } - private: void WriteToFile(const std::string& content, const std::string& filename) { std::unique_ptr f; @@ -211,11 +233,18 @@ class BlockBasedTableReaderBaseTest : public testing::Test { // Param 7: CompressionOptions.max_dict_bytes and // CompressionOptions.max_dict_buffer_bytes to enable/disable // compression dictionary. +// Param 8: test mode to specify the pattern for generating key / value. When +// true, generate keys with the same user provided key, different +// user-defined timestamps (if udt enabled), different sequence +// numbers. This test mode is used for testing `Get`. When false, +// generate keys with different user provided key, same user-defined +// timestamps (if udt enabled), same sequence number. This test mode is +// used for testing `Get`, `MultiGet`, and `NewIterator`. class BlockBasedTableReaderTest : public BlockBasedTableReaderBaseTest, public testing::WithParamInterface> { + test::UserDefinedTimestampTestMode, uint32_t, uint32_t, bool>> { protected: void SetUp() override { compression_type_ = std::get<0>(GetParam()); @@ -225,6 +254,7 @@ class BlockBasedTableReaderTest persist_udt_ = test::ShouldPersistUDT(udt_test_mode); compression_parallel_threads_ = std::get<5>(GetParam()); compression_dict_bytes_ = std::get<6>(GetParam()); + same_key_diff_ts_ = std::get<7>(GetParam()); BlockBasedTableReaderBaseTest::SetUp(); } @@ -236,6 +266,7 @@ class BlockBasedTableReaderTest opts.partition_filters = opts.index_type == BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + opts.metadata_cache_options.partition_pinning = PinningTier::kAll; options_.table_factory.reset( static_cast(NewBlockBasedTableFactory(opts))); options_.prefix_extractor = @@ -248,8 +279,72 @@ class BlockBasedTableReaderTest bool persist_udt_; uint32_t compression_parallel_threads_; uint32_t compression_dict_bytes_; + bool same_key_diff_ts_; }; +class BlockBasedTableReaderGetTest : public BlockBasedTableReaderTest {}; + +TEST_P(BlockBasedTableReaderGetTest, Get) { + Options options; + if (udt_enabled_) { + options.comparator = test::BytewiseComparatorWithU64TsWrapper(); + } + options.persist_user_defined_timestamps = persist_udt_; + size_t ts_sz = options.comparator->timestamp_size(); + std::vector> kv = + BlockBasedTableReaderBaseTest::GenerateKVMap( + 100 /* num_block */, + true /* mixed_with_human_readable_string_value */, ts_sz, + same_key_diff_ts_); + + std::string table_name = "BlockBasedTableReaderGetTest_Get" + + CompressionTypeToString(compression_type_); + + ImmutableOptions ioptions(options); + CreateTable(table_name, ioptions, compression_type_, kv, + compression_parallel_threads_, compression_dict_bytes_); + + std::unique_ptr table; + FileOptions foptions; + foptions.use_direct_reads = use_direct_reads_; + InternalKeyComparator comparator(options.comparator); + NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table, + true /* prefetch_index_and_filter_in_cache */, + nullptr /* status */, persist_udt_); + + ReadOptions read_opts; + ASSERT_OK( + table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum)); + + for (size_t i = 0; i < kv.size(); i += 1) { + Slice key = kv[i].first; + Slice lkey = key; + std::string lookup_ikey; + if (udt_enabled_ && !persist_udt_) { + // When user-defined timestamps are collapsed to be the minimum timestamp, + // we also read with the minimum timestamp to be able to retrieve each + // value. + ReplaceInternalKeyWithMinTimestamp(&lookup_ikey, key, ts_sz); + lkey = lookup_ikey; + } + // Reading the first entry in a block caches the whole block. + if (i % kEntriesPerBlock == 0) { + ASSERT_FALSE(table->TEST_KeyInCache(read_opts, lkey.ToString())); + } else { + ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString())); + } + PinnableSlice value; + GetContext get_context(options.comparator, nullptr, nullptr, nullptr, + GetContext::kNotFound, ExtractUserKey(key), &value, + nullptr, nullptr, nullptr, nullptr, + true /* do_merge */, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr); + ASSERT_OK(table->Get(read_opts, lkey, &get_context, nullptr)); + ASSERT_EQ(value.ToString(), kv[i].second); + ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString())); + } +} + // Tests MultiGet in both direct IO and non-direct IO mode. // The keys should be in cache after MultiGet. TEST_P(BlockBasedTableReaderTest, MultiGet) { @@ -263,7 +358,7 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); - std::map kv = + std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 100 /* num_block */, true /* mixed_with_human_readable_string_value */, ts_sz); @@ -273,6 +368,8 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { autovector keys_without_timestamps; autovector values; autovector statuses; + autovector + expected_values; { const int step = static_cast(kv.size()) / MultiGetContext::MAX_BATCH_SIZE; @@ -280,13 +377,15 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE; i++) { keys.emplace_back(it->first); if (ts_sz > 0) { - Slice ukey_without_ts = StripTimestampFromUserKey(it->first, ts_sz); + Slice ukey_without_ts = + ExtractUserKeyAndStripTimestamp(it->first, ts_sz); keys_without_timestamps.push_back(ukey_without_ts); } else { - keys_without_timestamps.emplace_back(it->first); + keys_without_timestamps.emplace_back(ExtractUserKey(it->first)); } values.emplace_back(); statuses.emplace_back(); + expected_values.push_back(&(it->second)); std::advance(it, step); } } @@ -311,8 +410,7 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { // Ensure that keys are not in cache before MultiGet. for (auto& key : keys) { - std::string ikey = ToInternalKey(key.ToString()); - ASSERT_FALSE(table->TEST_KeyInCache(read_opts, ikey)); + ASSERT_FALSE(table->TEST_KeyInCache(read_opts, key.ToString())); } // Prepare MultiGetContext. @@ -321,8 +419,8 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { autovector sorted_keys; for (size_t i = 0; i < keys.size(); ++i) { get_context.emplace_back(options.comparator, nullptr, nullptr, nullptr, - GetContext::kNotFound, keys[i], &values[i], - nullptr, nullptr, nullptr, nullptr, + GetContext::kNotFound, ExtractUserKey(keys[i]), + &values[i], nullptr, nullptr, nullptr, nullptr, true /* do_merge */, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); key_context.emplace_back(nullptr, keys_without_timestamps[i], &values[i], @@ -352,9 +450,8 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) { } // Check that keys are in cache after MultiGet. for (size_t i = 0; i < keys.size(); i++) { - std::string ikey = ToInternalKey(keys[i].ToString()); - ASSERT_TRUE(table->TEST_KeyInCache(read_opts, ikey)); - ASSERT_EQ(values[i].ToString(), kv[keys[i].ToString()]); + ASSERT_TRUE(table->TEST_KeyInCache(read_opts, keys[i])); + ASSERT_EQ(values[i].ToString(), *expected_values[i]); } } @@ -369,7 +466,7 @@ TEST_P(BlockBasedTableReaderTest, NewIterator) { } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); - std::map kv = + std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 100 /* num_block */, true /* mixed_with_human_readable_string_value */, ts_sz); @@ -401,8 +498,7 @@ TEST_P(BlockBasedTableReaderTest, NewIterator) { iter->SeekToFirst(); ASSERT_OK(iter->status()); for (auto kv_iter = kv.begin(); kv_iter != kv.end(); kv_iter++) { - std::string ikey = ToInternalKey(kv_iter->first); - ASSERT_EQ(iter->key().ToString(), ikey); + ASSERT_EQ(iter->key().ToString(), kv_iter->first); ASSERT_EQ(iter->value().ToString(), kv_iter->second); iter->Next(); ASSERT_OK(iter->status()); @@ -414,8 +510,7 @@ TEST_P(BlockBasedTableReaderTest, NewIterator) { iter->SeekToLast(); ASSERT_OK(iter->status()); for (auto kv_iter = kv.rbegin(); kv_iter != kv.rend(); kv_iter++) { - std::string ikey = ToInternalKey(kv_iter->first); - ASSERT_EQ(iter->key().ToString(), ikey); + ASSERT_EQ(iter->key().ToString(), kv_iter->first); ASSERT_EQ(iter->value().ToString(), kv_iter->second); iter->Prev(); ASSERT_OK(iter->status()); @@ -504,7 +599,7 @@ class ChargeTableReaderTest TargetCacheChargeTrackingCache> table_reader_charge_tracking_cache_; std::size_t approx_table_reader_mem_; - std::map kv_; + std::vector> kv_; CompressionType compression_type_; private: @@ -641,7 +736,7 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) { } options.persist_user_defined_timestamps = persist_udt_; size_t ts_sz = options.comparator->timestamp_size(); - std::map kv = + std::vector> kv = BlockBasedTableReaderBaseTest::GenerateKVMap( 800 /* num_block */, false /* mixed_with_human_readable_string_value=*/, ts_sz); @@ -705,6 +800,7 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) { // Param 7: CompressionOptions.max_dict_bytes and // CompressionOptions.max_dict_buffer_bytes. This enable/disables // compression dictionary. +// Param 8: test mode to specify the pattern for generating key / value pairs. INSTANTIATE_TEST_CASE_P( BlockBasedTableReaderTest, BlockBasedTableReaderTest, ::testing::Combine( @@ -715,7 +811,20 @@ INSTANTIATE_TEST_CASE_P( BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey), ::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()), - ::testing::Values(1, 2), ::testing::Values(0, 4096))); + ::testing::Values(1, 2), ::testing::Values(0, 4096), + ::testing::Values(false))); +INSTANTIATE_TEST_CASE_P( + BlockBasedTableReaderGetTest, BlockBasedTableReaderGetTest, + ::testing::Combine( + ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Bool(), + ::testing::Values( + BlockBasedTableOptions::IndexType::kBinarySearch, + BlockBasedTableOptions::IndexType::kHashSearch, + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch, + BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey), + ::testing::Values(false), ::testing::ValuesIn(test::GetUDTTestModes()), + ::testing::Values(1, 2), ::testing::Values(0, 4096), + ::testing::Values(false, true))); INSTANTIATE_TEST_CASE_P( VerifyChecksum, BlockBasedTableReaderTestVerifyChecksum, ::testing::Combine( @@ -724,7 +833,8 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch), ::testing::Values(true), ::testing::ValuesIn(test::GetUDTTestModes()), - ::testing::Values(1, 2), ::testing::Values(0))); + ::testing::Values(1, 2), ::testing::Values(0), + ::testing::Values(false))); } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/index_builder.h b/table/block_based/index_builder.h index 56935546d..be690d799 100644 --- a/table/block_based/index_builder.h +++ b/table/block_based/index_builder.h @@ -35,7 +35,7 @@ class IndexBuilder { public: static IndexBuilder* CreateIndexBuilder( BlockBasedTableOptions::IndexType index_type, - const ROCKSDB_NAMESPACE::InternalKeyComparator* comparator, + const InternalKeyComparator* comparator, const InternalKeySliceTransform* int_key_slice_transform, bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt, size_t ts_sz, bool persist_user_defined_timestamps); @@ -106,6 +106,25 @@ class IndexBuilder { virtual bool seperator_is_key_plus_seq() { return true; } protected: + // Given the last key in current block and the first key in the next block, + // return true if internal key should be used as separator, false if user key + // can be used as separator. + inline bool ShouldUseKeyPlusSeqAsSeparator( + const Slice& last_key_in_current_block, + const Slice& first_key_in_next_block) { + Slice l_user_key = ExtractUserKey(last_key_in_current_block); + Slice r_user_key = ExtractUserKey(first_key_in_next_block); + // If user defined timestamps are not persisted. All the user keys will + // act like they have minimal timestamp. Only having user key is not + // sufficient, even if they are different user keys for now, they have to be + // different user keys without the timestamp part. + return persist_user_defined_timestamps_ + ? comparator_->user_comparator()->Compare(l_user_key, + r_user_key) == 0 + : comparator_->user_comparator()->CompareWithoutTimestamp( + l_user_key, r_user_key) == 0; + } + const InternalKeyComparator* comparator_; // Size of user-defined timestamp in bytes. size_t ts_sz_; @@ -173,9 +192,8 @@ class ShortenedIndexBuilder : public IndexBuilder { *first_key_in_next_block); } if (!seperator_is_key_plus_seq_ && - comparator_->user_comparator()->Compare( - ExtractUserKey(*last_key_in_current_block), - ExtractUserKey(*first_key_in_next_block)) == 0) { + ShouldUseKeyPlusSeqAsSeparator(*last_key_in_current_block, + *first_key_in_next_block)) { seperator_is_key_plus_seq_ = true; } } else { @@ -414,9 +432,9 @@ class HashIndexBuilder : public IndexBuilder { class PartitionedIndexBuilder : public IndexBuilder { public: static PartitionedIndexBuilder* CreateIndexBuilder( - const ROCKSDB_NAMESPACE::InternalKeyComparator* comparator, - bool use_value_delta_encoding, const BlockBasedTableOptions& table_opt, - size_t ts_sz, bool persist_user_defined_timestamps); + const InternalKeyComparator* comparator, bool use_value_delta_encoding, + const BlockBasedTableOptions& table_opt, size_t ts_sz, + bool persist_user_defined_timestamps); PartitionedIndexBuilder(const InternalKeyComparator* comparator, const BlockBasedTableOptions& table_opt, diff --git a/unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md b/unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md new file mode 100644 index 000000000..53917e657 --- /dev/null +++ b/unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md @@ -0,0 +1 @@ +Fixed some bugs in the index builder/reader path for user-defined timestamps in Memtable only feature. \ No newline at end of file From ddb7df10efb7e90d55360ee64c0135c74e195ff1 Mon Sep 17 00:00:00 2001 From: Jay Huh Date: Tue, 21 Nov 2023 18:07:19 -0800 Subject: [PATCH 299/386] Update HISTORY.md and version.h for 8.9.fb release (#12074) Summary: Creating cut for 8.9 release Pull Request resolved: https://github.com/facebook/rocksdb/pull/12074 Test Plan: CI Reviewed By: ajkr Differential Revision: D51435289 Pulled By: jaykorean fbshipit-source-id: 3918a8250032839e5b71f67f26c8ba01cbc17a41 --- HISTORY.md | 23 +++++++++++++++++++ include/rocksdb/version.h | 2 +- tools/check_format_compatible.sh | 2 +- .../WAL_ttl_seconds_expiration.md | 1 - .../periodic_compaction_during_offpeak.md | 1 - .../behavior_changes/skip_trace_write.md | 1 - ..._file_deletion_on_recovery_rate_limited.md | 1 - .../table_prop_collector_api_call.md | 1 - .../bug_fixes/exp_autohcc_fix.md | 1 - .../flush_recovery_db_destructor_race.md | 1 - .../index_bug_fix_for_udt_in_memtable_only.md | 1 - .../new_features/attribute_group_support.md | 1 - .../add_auto_tuned_rate_limiter_to_c_api.md | 1 - .../add_clip_column_family_to_java_api.md | 1 - .../enable_file_deletion_not_default_force.md | 2 -- .../tiered_cache_capacity_and_usage.md | 1 - 16 files changed, 25 insertions(+), 16 deletions(-) delete mode 100644 unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md delete mode 100644 unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md delete mode 100644 unreleased_history/behavior_changes/skip_trace_write.md delete mode 100644 unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md delete mode 100644 unreleased_history/behavior_changes/table_prop_collector_api_call.md delete mode 100644 unreleased_history/bug_fixes/exp_autohcc_fix.md delete mode 100644 unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md delete mode 100644 unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md delete mode 100644 unreleased_history/new_features/attribute_group_support.md delete mode 100644 unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md delete mode 100644 unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md delete mode 100644 unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md delete mode 100644 unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md diff --git a/HISTORY.md b/HISTORY.md index b32a3dd7f..47adf0040 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,29 @@ # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` +## 8.9.0 (11/17/2023) +### New Features +* Add GetEntity() and PutEntity() API implementation for Attribute Group support. Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. + +### Public API Changes +* Added rocksdb_ratelimiter_create_auto_tuned API to create an auto-tuned GenericRateLimiter. +* Added clipColumnFamily() to the Java API to clip the entries in the CF according to the range [begin_key, end_key). +* Make the `EnableFileDeletion` API not default to force enabling. For users that rely on this default behavior and still +want to continue to use force enabling, they need to explicitly pass a `true` to `EnableFileDeletion`. +* Add new Cache APIs GetSecondaryCacheCapacity() and GetSecondaryCachePinnedUsage() to return the configured capacity, and cache reservation charged to the secondary cache. + +### Behavior Changes +* During off-peak hours defined by `daily_offpeak_time_utc`, the compaction picker will select a larger number of files for periodic compaction. This selection will include files that are projected to expire by the next off-peak start time, ensuring that these files are not chosen for periodic compaction outside of off-peak hours. +* If an error occurs when writing to a trace file after `DB::StartTrace()`, the subsequent trace writes are skipped to avoid writing to a file that has previously seen error. In this case, `DB::EndTrace()` will also return a non-ok status with info about the error occured previously in its status message. +* Deleting stale files upon recovery are delegated to SstFileManger if available so they can be rate limited. +* Make RocksDB only call `TablePropertiesCollector::Finish()` once. +* When `WAL_ttl_seconds > 0`, we now process archived WALs for deletion at least every `WAL_ttl_seconds / 2` seconds. Previously it could be less frequent in case of small `WAL_ttl_seconds` values when size-based expiration (`WAL_size_limit_MB > 0 `) was simultaneously enabled. + +### Bug Fixes +* Fixed a crash or assertion failure bug in experimental new HyperClockCache variant, especially when running with a SecondaryCache. +* Fix a race between flush error recovery and db destruction that can lead to db crashing. +* Fixed some bugs in the index builder/reader path for user-defined timestamps in Memtable only feature. + ## 8.8.0 (10/23/2023) ### New Features * Introduce AttributeGroup by adding the first AttributeGroup support API, MultiGetEntity(). Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. More APIs to support AttributeGroup will come soon, including GetEntity, PutEntity, and others. diff --git a/include/rocksdb/version.h b/include/rocksdb/version.h index 139058155..530d4fee7 100644 --- a/include/rocksdb/version.h +++ b/include/rocksdb/version.h @@ -12,7 +12,7 @@ // NOTE: in 'main' development branch, this should be the *next* // minor or major version number planned for release. #define ROCKSDB_MAJOR 8 -#define ROCKSDB_MINOR 9 +#define ROCKSDB_MINOR 10 #define ROCKSDB_PATCH 0 // Do not use these. We made the mistake of declaring macros starting with diff --git a/tools/check_format_compatible.sh b/tools/check_format_compatible.sh index e4771706f..93b51a9b9 100755 --- a/tools/check_format_compatible.sh +++ b/tools/check_format_compatible.sh @@ -125,7 +125,7 @@ EOF # To check for DB forward compatibility with loading options (old version # reading data from new), as well as backward compatibility -declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb" "8.7.fb" "8.8.fb") +declare -a db_forward_with_options_refs=("6.27.fb" "6.28.fb" "6.29.fb" "7.0.fb" "7.1.fb" "7.2.fb" "7.3.fb" "7.4.fb" "7.5.fb" "7.6.fb" "7.7.fb" "7.8.fb" "7.9.fb" "7.10.fb" "8.0.fb" "8.1.fb" "8.2.fb" "8.3.fb" "8.4.fb" "8.5.fb" "8.6.fb" "8.7.fb" "8.8.fb" "8.9.fb") # To check for DB forward compatibility without loading options (in addition # to the "with loading options" set), as well as backward compatibility declare -a db_forward_no_options_refs=() # N/A at the moment diff --git a/unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md b/unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md deleted file mode 100644 index 3d55f41c8..000000000 --- a/unreleased_history/behavior_changes/WAL_ttl_seconds_expiration.md +++ /dev/null @@ -1 +0,0 @@ -* When `WAL_ttl_seconds > 0`, we now process archived WALs for deletion at least every `WAL_ttl_seconds / 2` seconds. Previously it could be less frequent in case of small `WAL_ttl_seconds` values when size-based expiration (`WAL_size_limit_MB > 0 `) was simultaneously enabled. diff --git a/unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md b/unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md deleted file mode 100644 index 475adf7f7..000000000 --- a/unreleased_history/behavior_changes/periodic_compaction_during_offpeak.md +++ /dev/null @@ -1 +0,0 @@ -During off-peak hours defined by `daily_offpeak_time_utc`, the compaction picker will select a larger number of files for periodic compaction. This selection will include files that are projected to expire by the next off-peak start time, ensuring that these files are not chosen for periodic compaction outside of off-peak hours. diff --git a/unreleased_history/behavior_changes/skip_trace_write.md b/unreleased_history/behavior_changes/skip_trace_write.md deleted file mode 100644 index 55ee8edae..000000000 --- a/unreleased_history/behavior_changes/skip_trace_write.md +++ /dev/null @@ -1 +0,0 @@ -If an error occurs when writing to a trace file after `DB::StartTrace()`, the subsequent trace writes are skipped to avoid writing to a file that has previously seen error. In this case, `DB::EndTrace()` will also return a non-ok status with info about the error occured previously in its status message. diff --git a/unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md b/unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md deleted file mode 100644 index e57647ea9..000000000 --- a/unreleased_history/behavior_changes/stale_file_deletion_on_recovery_rate_limited.md +++ /dev/null @@ -1 +0,0 @@ -Deleting stale files upon recovery are delegated to SstFileManger if available so they can be rate limited. \ No newline at end of file diff --git a/unreleased_history/behavior_changes/table_prop_collector_api_call.md b/unreleased_history/behavior_changes/table_prop_collector_api_call.md deleted file mode 100644 index ce69b3b2b..000000000 --- a/unreleased_history/behavior_changes/table_prop_collector_api_call.md +++ /dev/null @@ -1 +0,0 @@ -Make RocksDB only call `TablePropertiesCollector::Finish()` once. diff --git a/unreleased_history/bug_fixes/exp_autohcc_fix.md b/unreleased_history/bug_fixes/exp_autohcc_fix.md deleted file mode 100644 index 95a0de7b5..000000000 --- a/unreleased_history/bug_fixes/exp_autohcc_fix.md +++ /dev/null @@ -1 +0,0 @@ -Fixed a crash or assertion failure bug in experimental new HyperClockCache variant, especially when running with a SecondaryCache. diff --git a/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md b/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md deleted file mode 100644 index 76cc3c721..000000000 --- a/unreleased_history/bug_fixes/flush_recovery_db_destructor_race.md +++ /dev/null @@ -1 +0,0 @@ -Fix a race between flush error recovery and db destruction that can lead to db crashing. diff --git a/unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md b/unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md deleted file mode 100644 index 53917e657..000000000 --- a/unreleased_history/bug_fixes/index_bug_fix_for_udt_in_memtable_only.md +++ /dev/null @@ -1 +0,0 @@ -Fixed some bugs in the index builder/reader path for user-defined timestamps in Memtable only feature. \ No newline at end of file diff --git a/unreleased_history/new_features/attribute_group_support.md b/unreleased_history/new_features/attribute_group_support.md deleted file mode 100644 index 4645a1a63..000000000 --- a/unreleased_history/new_features/attribute_group_support.md +++ /dev/null @@ -1 +0,0 @@ -Add GetEntity() and PutEntity() API implementation for Attribute Group support. Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. diff --git a/unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md b/unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md deleted file mode 100644 index 980ca7868..000000000 --- a/unreleased_history/public_api_changes/add_auto_tuned_rate_limiter_to_c_api.md +++ /dev/null @@ -1 +0,0 @@ -Added rocksdb_ratelimiter_create_auto_tuned API to create an auto-tuned GenericRateLimiter. diff --git a/unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md b/unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md deleted file mode 100644 index 3da9e99a5..000000000 --- a/unreleased_history/public_api_changes/add_clip_column_family_to_java_api.md +++ /dev/null @@ -1 +0,0 @@ -Added clipColumnFamily() to the Java API to clip the entries in the CF according to the range [begin_key, end_key). \ No newline at end of file diff --git a/unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md b/unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md deleted file mode 100644 index e4d032613..000000000 --- a/unreleased_history/public_api_changes/enable_file_deletion_not_default_force.md +++ /dev/null @@ -1,2 +0,0 @@ -Make the `EnableFileDeletion` API not default to force enabling. For users that rely on this default behavior and still -want to continue to use force enabling, they need to explicitly pass a `true` to `EnableFileDeletion`. \ No newline at end of file diff --git a/unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md b/unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md deleted file mode 100644 index e281d069d..000000000 --- a/unreleased_history/public_api_changes/tiered_cache_capacity_and_usage.md +++ /dev/null @@ -1 +0,0 @@ -Add new Cache APIs GetSecondaryCacheCapacity() and GetSecondaryCachePinnedUsage() to return the configured capacity, and cache reservation charged to the secondary cache. From 324453e5790f52557d02abf64a2dcadbaa54c8fa Mon Sep 17 00:00:00 2001 From: cz2h <32441682+cz2h@users.noreply.github.com> Date: Tue, 21 Nov 2023 20:39:33 -0800 Subject: [PATCH 300/386] Fix rowcache get returning incorrect timestamp (#11952) Summary: Fixes https://github.com/facebook/rocksdb/issues/7930. When there is a timestamp associated with stored records, get from row cache will return the timestamp provided in query instead of the timestamp associated with the stored record. ## Cause of error: Currently a row_handle is fetched using row_cache_key(contains a timestamp provided by user query) and the row_handle itself does not persist timestamp associated with the object. Hence the [GetContext::SaveValue() ](https://github.com/facebook/rocksdb/blob/6e3429b8a6a53d5e477074057b5f27218063b5f2/table/get_context.cc#L257) function will fetch the timestamp in row_cache_key and may return the incorrect timestamp value. ## Proposed Solution If current cf enables ts, append a timestamp associated with stored records after the value in replay_log (equivalently the value of row cache entry). When read, `replayGetContextLog()` will update parsed_key with the correct timestamp. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11952 Reviewed By: ajkr Differential Revision: D51501176 Pulled By: jowlyzhang fbshipit-source-id: 808fc943a8ae95de56ae0e82ec59a2573a031f28 --- db/db_with_timestamp_basic_test.cc | 188 +++++++++++++++++++++++------ table/get_context.cc | 112 ++++++++++------- table/get_context.h | 4 + 3 files changed, 224 insertions(+), 80 deletions(-) diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc index 4bd8eaa0b..3214faae9 100644 --- a/db/db_with_timestamp_basic_test.cc +++ b/db/db_with_timestamp_basic_test.cc @@ -1646,35 +1646,88 @@ TEST_F(DBBasicTestWithTimestamp, GetWithRowCache) { const Snapshot* snap_with_nothing = db_->GetSnapshot(); ASSERT_OK(db_->Put(write_opts, "foo", ts_early, "bar")); - const Snapshot* snap_with_foo = db_->GetSnapshot(); + ASSERT_OK(db_->Put(write_opts, "foo2", ts_early, "bar2")); + ASSERT_OK(db_->Put(write_opts, "foo3", ts_early, "bar3")); - // Ensure file has sequence number greater than snapshot_with_foo - for (int i = 0; i < 10; i++) { - std::string numStr = std::to_string(i); - ASSERT_OK(db_->Put(write_opts, numStr, ts_later, numStr)); - } + const Snapshot* snap_with_foo = db_->GetSnapshot(); ASSERT_OK(Flush()); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 0); ReadOptions read_opts; read_opts.timestamp = &ts_later_slice; std::string read_value; std::string read_ts; - Status s = db_->Get(read_opts, "foo", &read_value, &read_ts); - ASSERT_OK(s); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); - ASSERT_EQ(read_ts, ts_early); + Status s; - s = db_->Get(read_opts, "foo", &read_value, &read_ts); - ASSERT_OK(s); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 1); - // Row cache is not storing the ts when record is inserted/updated. - // To be fixed after enabling ROW_CACHE with timestamp. - // ASSERT_EQ(read_ts, ts_early); + int expected_hit_count = 0; + int expected_miss_count = 0; + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count); + + { + read_opts.timestamp = nullptr; + s = db_->Get(read_opts, "foo", &read_value); + ASSERT_NOK(s); + ASSERT_TRUE(s.IsInvalidArgument()); + } + + // Mix use of Get + { + read_opts.timestamp = &ts_later_slice; + + // Use Get without ts first, expect cache entry to store the correct ts + s = db_->Get(read_opts, "foo2", &read_value); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); + ASSERT_EQ(read_value, "bar2"); + + s = db_->Get(read_opts, "foo2", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), ++expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count); + ASSERT_EQ(read_ts, ts_early); + ASSERT_EQ(read_value, "bar2"); + + // Use Get with ts first, expect the Get without ts can get correct record + s = db_->Get(read_opts, "foo3", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); + ASSERT_EQ(read_ts, ts_early); + ASSERT_EQ(read_value, "bar3"); + + s = db_->Get(read_opts, "foo3", &read_value); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), ++expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count); + ASSERT_EQ(read_value, "bar3"); + } + + { + // Test with consecutive calls of Get with ts. + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); + ASSERT_EQ(read_ts, ts_early); + ASSERT_EQ(read_value, "bar"); + + // Test repeated get on cache entry + for (int i = 0; i < 3; i++) { + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), + ++expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + expected_miss_count); + ASSERT_EQ(read_ts, ts_early); + ASSERT_EQ(read_value, "bar"); + } + } { std::string ts_nothing = Timestamp(0, 0); @@ -1682,41 +1735,43 @@ TEST_F(DBBasicTestWithTimestamp, GetWithRowCache) { read_opts.timestamp = &ts_nothing_slice; s = db_->Get(read_opts, "foo", &read_value, &read_ts); ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 1); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); - - read_opts.timestamp = &ts_later_slice; - s = db_->Get(read_opts, "foo", &read_value, &read_ts); - ASSERT_OK(s); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); } { read_opts.snapshot = snap_with_foo; - + read_opts.timestamp = &ts_later_slice; s = db_->Get(read_opts, "foo", &read_value, &read_ts); ASSERT_OK(s); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 3); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); + ASSERT_EQ(read_ts, ts_early); + ASSERT_EQ(read_value, "bar"); s = db_->Get(read_opts, "foo", &read_value, &read_ts); ASSERT_OK(s); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 3); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), ++expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), expected_miss_count); + ASSERT_EQ(read_ts, ts_early); + ASSERT_EQ(read_value, "bar"); } { read_opts.snapshot = snap_with_nothing; s = db_->Get(read_opts, "foo", &read_value, &read_ts); ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 4); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); s = db_->Get(read_opts, "foo", &read_value, &read_ts); ASSERT_TRUE(s.IsNotFound()); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 3); - ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 5); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), expected_hit_count); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), + ++expected_miss_count); } db_->ReleaseSnapshot(snap_with_nothing); @@ -1724,6 +1779,65 @@ TEST_F(DBBasicTestWithTimestamp, GetWithRowCache) { Close(); } +TEST_F(DBBasicTestWithTimestamp, GetWithRowCacheMultiSST) { + BlockBasedTableOptions table_options; + table_options.block_size = 1; + Options options = CurrentOptions(); + options.env = env_; + options.create_if_missing = true; + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + LRUCacheOptions cache_options; + cache_options.capacity = 8192; + options.row_cache = cache_options.MakeSharedRowCache(); + + const size_t kTimestampSize = Timestamp(0, 0).size(); + TestComparator test_cmp(kTimestampSize); + options.comparator = &test_cmp; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.merge_operator = MergeOperators::CreateStringAppendTESTOperator(); + options.disable_auto_compactions = true; + + DestroyAndReopen(options); + + std::string ts_early = Timestamp(1, 0); + std::string ts_later = Timestamp(10, 0); + Slice ts_later_slice = ts_later; + + ASSERT_OK(db_->Put(WriteOptions(), "foo", ts_early, "v1")); + ASSERT_OK(Flush()); + + ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily(); + ASSERT_OK( + db_->Merge(WriteOptions(), default_cf, "foo", Timestamp(2, 0), "v2")); + ASSERT_OK( + db_->Merge(WriteOptions(), default_cf, "foo", Timestamp(3, 0), "v3")); + ASSERT_OK(Flush()); + + ReadOptions read_opts; + read_opts.timestamp = &ts_later_slice; + + std::string read_value; + std::string read_ts; + Status s; + + { + // Since there are two SST files, will trigger the table lookup twice. + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 0); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); + ASSERT_EQ(read_ts, Timestamp(3, 0)); + ASSERT_EQ(read_value, "v1,v2,v3"); + + s = db_->Get(read_opts, "foo", &read_value, &read_ts); + ASSERT_OK(s); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_HIT), 2); + ASSERT_EQ(TestGetTickerCount(options, ROW_CACHE_MISS), 2); + ASSERT_EQ(read_ts, Timestamp(3, 0)); + ASSERT_EQ(read_value, "v1,v2,v3"); + } +} + TEST_P(DBBasicTestWithTimestampTableOptions, MultiGetPrefixFilter) { Options options = CurrentOptions(); options.env = env_; diff --git a/table/get_context.cc b/table/get_context.cc index 660726cd3..23b1abd64 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -19,22 +19,6 @@ namespace ROCKSDB_NAMESPACE { -namespace { - -void appendToReplayLog(std::string* replay_log, ValueType type, Slice value) { - if (replay_log) { - if (replay_log->empty()) { - // Optimization: in the common case of only one operation in the - // log, we allocate the exact amount of space needed. - replay_log->reserve(1 + VarintLength(value.size()) + value.size()); - } - replay_log->push_back(type); - PutLengthPrefixedSlice(replay_log, value); - } -} - -} // namespace - GetContext::GetContext( const Comparator* ucmp, const MergeOperator* merge_operator, Logger* logger, Statistics* statistics, GetState init_state, const Slice& user_key, @@ -88,6 +72,24 @@ GetContext::GetContext(const Comparator* ucmp, seq, _pinned_iters_mgr, callback, is_blob_index, tracing_get_id, blob_fetcher) {} +void GetContext::appendToReplayLog(ValueType type, Slice value, Slice ts) { + if (replay_log_) { + if (replay_log_->empty()) { + // Optimization: in the common case of only one operation in the + // log, we allocate the exact amount of space needed. + replay_log_->reserve(1 + VarintLength(value.size()) + value.size()); + } + replay_log_->push_back(type); + PutLengthPrefixedSlice(replay_log_, value); + + // If cf enables ts, there should always be a ts following each value + if (ucmp_->timestamp_size() > 0) { + assert(ts.size() == ucmp_->timestamp_size()); + PutLengthPrefixedSlice(replay_log_, ts); + } + } +} + // Called from TableCache::Get and Table::Get when file/block in which // key may exist are not there in TableCache/BlockCache respectively. In this // case we can't guarantee that key does not exist and are not permitted to do @@ -102,7 +104,9 @@ void GetContext::MarkKeyMayExist() { void GetContext::SaveValue(const Slice& value, SequenceNumber /*seq*/) { assert(state_ == kNotFound); - appendToReplayLog(replay_log_, kTypeValue, value); + assert(ucmp_->timestamp_size() == 0); + + appendToReplayLog(kTypeValue, value, Slice()); state_ = kFound; if (LIKELY(pinnable_val_ != nullptr)) { @@ -228,7 +232,6 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, return true; // to continue to the next seq } - appendToReplayLog(replay_log_, parsed_key.type, value); if (seq_ != nullptr) { // Set the sequence number if it is uninitialized @@ -241,32 +244,37 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, } size_t ts_sz = ucmp_->timestamp_size(); - if (ts_sz > 0 && timestamp_ != nullptr) { - if (!timestamp_->empty()) { - assert(ts_sz == timestamp_->size()); - // `timestamp` can be set before `SaveValue` is ever called - // when max_covering_tombstone_seq_ was set. - // If this key has a higher sequence number than range tombstone, - // then timestamp should be updated. `ts_from_rangetombstone_` is - // set to false afterwards so that only the key with highest seqno - // updates the timestamp. - if (ts_from_rangetombstone_) { - assert(max_covering_tombstone_seq_); - if (parsed_key.sequence > *max_covering_tombstone_seq_) { - Slice ts = ExtractTimestampFromUserKey(parsed_key.user_key, ts_sz); - timestamp_->assign(ts.data(), ts.size()); - ts_from_rangetombstone_ = false; + Slice ts; + + if (ts_sz > 0) { + // ensure always have ts if cf enables ts. + ts = ExtractTimestampFromUserKey(parsed_key.user_key, ts_sz); + if (timestamp_ != nullptr) { + if (!timestamp_->empty()) { + assert(ts_sz == timestamp_->size()); + // `timestamp` can be set before `SaveValue` is ever called + // when max_covering_tombstone_seq_ was set. + // If this key has a higher sequence number than range tombstone, + // then timestamp should be updated. `ts_from_rangetombstone_` is + // set to false afterwards so that only the key with highest seqno + // updates the timestamp. + if (ts_from_rangetombstone_) { + assert(max_covering_tombstone_seq_); + if (parsed_key.sequence > *max_covering_tombstone_seq_) { + timestamp_->assign(ts.data(), ts.size()); + ts_from_rangetombstone_ = false; + } } } - } - // TODO optimize for small size ts - const std::string kMaxTs(ts_sz, '\xff'); - if (timestamp_->empty() || - ucmp_->CompareTimestamp(*timestamp_, kMaxTs) == 0) { - Slice ts = ExtractTimestampFromUserKey(parsed_key.user_key, ts_sz); - timestamp_->assign(ts.data(), ts.size()); + // TODO optimize for small size ts + const std::string kMaxTs(ts_sz, '\xff'); + if (timestamp_->empty() || + ucmp_->CompareTimestamp(*timestamp_, kMaxTs) == 0) { + timestamp_->assign(ts.data(), ts.size()); + } } } + appendToReplayLog(parsed_key.type, value, ts); auto type = parsed_key.type; // Key matches. Process it @@ -561,17 +569,35 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key, GetContext* get_context, Cleanable* value_pinner, SequenceNumber seq_no) { Slice s = replay_log; + Slice ts; + size_t ts_sz = get_context->TimestampSize(); + bool ret = false; + while (s.size()) { auto type = static_cast(*s.data()); s.remove_prefix(1); Slice value; - bool ret = GetLengthPrefixedSlice(&s, &value); + ret = GetLengthPrefixedSlice(&s, &value); assert(ret); - (void)ret; bool dont_care __attribute__((__unused__)); - ParsedInternalKey ikey = ParsedInternalKey(user_key, seq_no, type); + // Use a copy to prevent modifying user_key. Modification of user_key + // could result to potential cache miss. + std::string user_key_str = user_key.ToString(); + ParsedInternalKey ikey = ParsedInternalKey(user_key_str, seq_no, type); + + // If ts enabled for current cf, there will always be ts appended after each + // piece of value. + if (ts_sz > 0) { + ret = GetLengthPrefixedSlice(&s, &ts); + assert(ts_sz == ts.size()); + assert(ret); + ikey.SetTimestamp(ts); + } + + (void)ret; + get_context->SaveValue(ikey, value, &dont_care, value_pinner); } } diff --git a/table/get_context.h b/table/get_context.h index b43ff6e16..da41631fc 100644 --- a/table/get_context.h +++ b/table/get_context.h @@ -149,6 +149,8 @@ class GetContext { bool NeedTimestamp() { return timestamp_ != nullptr; } + inline size_t TimestampSize() { return ucmp_->timestamp_size(); } + void SetTimestampFromRangeTombstone(const Slice& timestamp) { assert(timestamp_); timestamp_->assign(timestamp.data(), timestamp.size()); @@ -204,6 +206,8 @@ class GetContext { bool GetBlobValue(const Slice& user_key, const Slice& blob_index, PinnableSlice* blob_value); + void appendToReplayLog(ValueType type, Slice value, Slice ts); + const Comparator* ucmp_; const MergeOperator* merge_operator_; // the merge operations encountered; From a140b519b10f005da93f5abea117723ed382cc47 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 22 Nov 2023 10:40:52 -0800 Subject: [PATCH 301/386] Convert all but one windows job to nightly (#12089) Summary: ... because they are expensive and rarely disagree with each other. Historical data indicates that the 2019 job is most sensitive to failure. https://fburl.com/scuba/opensource_ci_jobs/ntq3ue3p https://fburl.com/scuba/opensource_ci_jobs/0xo91j5f Pull Request resolved: https://github.com/facebook/rocksdb/pull/12089 Test Plan: CI Reviewed By: ajkr Differential Revision: D51530386 Pulled By: pdillinger fbshipit-source-id: 8b676d6e01096e359a0f465b59d81ac10f4f7969 --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f12de88b2..370ad301a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -914,8 +914,6 @@ workflows: - build-linux-mini-crashtest jobs-windows: jobs: - - build-windows-vs2022-avx2 - - build-windows-vs2022 - build-windows-vs2019 - build-cmake-mingw jobs-java: @@ -964,3 +962,5 @@ workflows: - build-linux-non-shm - build-linux-clang-13-asan-ubsan-with-folly - build-linux-valgrind + - build-windows-vs2022-avx2 + - build-windows-vs2022 From f6fd4b9dbd15dba36f7e5ad23de407b5c26b1460 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 22 Nov 2023 11:55:10 -0800 Subject: [PATCH 302/386] Print stack traces more reliably with concurrency (#12086) Summary: It's been relatively easy to break our stack trace printer: * If another thread reaches a signal condition such as a related SEGV or assertion failure while one is trying to print a stack trace from the signal handler, it seems to end the process abruptly without a stack trace. * If the process exits normally in one thread (such as main finishing) while another is trying to print a stack trace from the signal handler, it seems the process will often end normally without a stack trace. This change attempts to fix these issues, with * Keep the custom signal handler installed as long as possible, so that other threads will most likely re-enter our custom handler. (We only switch back to default for triggering core dump or whatever after stack trace.) * Use atomics and sleeps to implement a crude recursive mutex for ensuring all threads hitting the custom signal handler wait on the first that is trying to print a stack trace, while recursive signals in the same thread can still be handled cleanly. * Use an atexit handler to hook into normal exit to (a) wait on a pending printing of stack trace when detectable and applicable, and (b) detect and warn when printing a stack trace might be interrupted by a process exit in progress. (I don't know how to pause that *after* our atexit handler has been called; the best I know how to do is warn, "In a race with process already exiting...".) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12086 Test Plan: manual, including with TSAN. I added this code to the end of a unit test file: ``` for (size_t i = 0; i < 3; ++i) { std::thread t([]() { assert(false); }); t.detach(); } ``` Followed by either `sleep(100)` or `usleep(100)` or usual process exit. And for recursive signal testing, inject `abort()` at various places in the handler. Reviewed By: cbi42 Differential Revision: D51531882 Pulled By: pdillinger fbshipit-source-id: 3473b863a43e61b722dfb7a2ed12a8120949b09c --- port/stack_trace.cc | 90 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/port/stack_trace.cc b/port/stack_trace.cc index a5a6d2e77..292430752 100644 --- a/port/stack_trace.cc +++ b/port/stack_trace.cc @@ -25,6 +25,7 @@ void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) { #include #include +#include #include #include #include @@ -48,6 +49,9 @@ void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) { #endif // GLIBC version #endif // OS_LINUX +#include +#include + #include "port/lang.h" namespace ROCKSDB_NAMESPACE { @@ -311,28 +315,85 @@ void* SaveStack(int* num_frames, int first_frames_to_skip) { return callstack; } +static std::atomic g_thread_handling_stack_trace{0}; +static int g_recursion_count = 0; +static std::atomic g_at_exit_called{false}; + static void StackTraceHandler(int sig) { - // reset to default handler - signal(sig, SIG_DFL); fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig)); - // skip the top three signal handler related frames - PrintStack(3); + // Crude recursive mutex with no signal-unsafe system calls, to avoid + // re-entrance from multiple threads and avoid core dumping while trying + // to print the stack trace. + uint64_t tid = 0; + { + const auto ptid = pthread_self(); + // pthread_t is an opaque type + memcpy(&tid, &ptid, std::min(sizeof(tid), sizeof(ptid))); + // Essentially ensure non-zero + ++tid; + } + for (;;) { + uint64_t expected = 0; + if (g_thread_handling_stack_trace.compare_exchange_strong(expected, tid)) { + // Acquired mutex + g_recursion_count = 0; + break; + } + if (expected == tid) { + ++g_recursion_count; + fprintf(stderr, "Recursive call to stack trace handler (%d)\n", + g_recursion_count); + break; + } + // Sleep before trying again + usleep(1000); + } + + if (g_recursion_count > 2) { + // Give up after too many recursions + fprintf(stderr, "Too many recursive calls to stack trace handler (%d)\n", + g_recursion_count); + } else { + if (g_at_exit_called.load(std::memory_order_acquire)) { + fprintf(stderr, "In a race with process already exiting...\n"); + } - // Efforts to fix or suppress TSAN warnings "signal-unsafe call inside of - // a signal" have failed, so just warn the user about them. + // skip the top three signal handler related frames + PrintStack(3); + + // Efforts to fix or suppress TSAN warnings "signal-unsafe call inside of + // a signal" have failed, so just warn the user about them. #ifdef __SANITIZE_THREAD__ - fprintf(stderr, - "==> NOTE: any above warnings about \"signal-unsafe call\" are\n" - "==> ignorable, as they are expected when generating a stack\n" - "==> trace because of a signal under TSAN. Consider why the\n" - "==> signal was generated to begin with, and the stack trace\n" - "==> in the TSAN warning can be useful for that. (The stack\n" - "==> trace printed by the signal handler is likely obscured\n" - "==> by TSAN output.)\n"); + fprintf(stderr, + "==> NOTE: any above warnings about \"signal-unsafe call\" are\n" + "==> ignorable, as they are expected when generating a stack\n" + "==> trace because of a signal under TSAN. Consider why the\n" + "==> signal was generated to begin with, and the stack trace\n" + "==> in the TSAN warning can be useful for that. (The stack\n" + "==> trace printed by the signal handler is likely obscured\n" + "==> by TSAN output.)\n"); #endif + } + // reset to default handler + signal(sig, SIG_DFL); // re-signal to default handler (so we still get core dump if needed...) raise(sig); + + // release the mutex, in case this is somehow recoverable + if (g_recursion_count > 0) { + --g_recursion_count; + } else { + g_thread_handling_stack_trace.store(0, std::memory_order_release); + } +} + +static void AtExit() { + // wait for stack trace handler to finish, if needed + while (g_thread_handling_stack_trace.load(std::memory_order_acquire)) { + usleep(1000); + } + g_at_exit_called.store(true, std::memory_order_release); } void InstallStackTraceHandler() { @@ -342,6 +403,7 @@ void InstallStackTraceHandler() { signal(SIGSEGV, StackTraceHandler); signal(SIGBUS, StackTraceHandler); signal(SIGABRT, StackTraceHandler); + atexit(AtExit); // Allow ouside debugger to attach, even with Yama security restrictions. // This is needed even outside of PrintStack() so that external mechanisms // can dump stacks if they suspect that a test has hung. From 4dd2bb8f701b8f0ecc20feb47d8245f7fd27709e Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 27 Nov 2023 11:49:52 -0800 Subject: [PATCH 303/386] Fix stack trace trimming with LLDB (#12101) Summary: I must have chosen trimming before frame 8 based on assertion failures, but that trims too many frame for a general segfault. So this changes to start printing at frame 4, as in this example where I've seeded a null deref: ``` Received signal 11 (Segmentation fault) Invoking LLDB for stack trace... Process 873208 stopped * thread #1, name = 'db_stress', stop reason = signal SIGSTOP frame #0: 0x00007fb1fe8f1033 libc.so.6`__GI___wait4(pid=873478, stat_loc=0x00007fb1fb114030, options=0, usage=0x0000000000000000) at wait4.c:30:10 thread #2, name = 'rocksdb:low', stop reason = signal SIGSTOP frame #0: 0x00007fb1fe8972a1 libc.so.6`__GI___futex_abstimed_wait_cancelable64 at futex-internal.c:57:12 Executable module set to "/data/users/peterd/rocksdb/db_stress". Architecture set to: x86_64-unknown-linux-gnu. True frame #4: 0x00007fb1fe844540 libc.so.6`__restore_rt at libc_sigaction.c:13 frame #5: 0x0000000000608514 db_stress`rocksdb::StressTest::InitDb(rocksdb::SharedState*) at db_stress_test_base.cc:345:18 frame #6: 0x0000000000585d62 db_stress`rocksdb::RunStressTestImpl(rocksdb::SharedState*) at db_stress_driver.cc:84:17 frame #7: 0x000000000058dd69 db_stress`rocksdb::RunStressTest(shared=0x00006120000001c0) at db_stress_driver.cc:266:34 frame #8: 0x0000000000453b34 db_stress`rocksdb::db_stress_tool(int, char**) at db_stress_tool.cc:370:20 ... ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12101 Test Plan: manual (see above) Reviewed By: ajkr Differential Revision: D51593217 Pulled By: pdillinger fbshipit-source-id: 4a71eb8e516edbc32e682f9537bc77d073a7b4ed --- port/stack_trace.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/port/stack_trace.cc b/port/stack_trace.cc index 292430752..9ef51bebb 100644 --- a/port/stack_trace.cc +++ b/port/stack_trace.cc @@ -256,9 +256,9 @@ void PrintStack(int first_frames_to_skip) { if (lldb_stack_trace) { fprintf(stderr, "Invoking LLDB for stack trace...\n"); - // Skip top ~8 frames here in PrintStack + // Skip top ~4 frames here in PrintStack auto bt_in_lldb = - "script -l python -- for f in lldb.thread.frames[8:]: print(f)"; + "script -l python -- for f in lldb.thread.frames[4:]: print(f)"; execlp(/*cmd in PATH*/ "lldb", /*arg0*/ "lldb", "-p", attach_pid_str, "-b", "-Q", "-o", GetLldbScriptSelectThread(attach_tid), "-o", bt_in_lldb, (char*)nullptr); From 6e7701d49bd7d1615eb7c1295d798956cff010f2 Mon Sep 17 00:00:00 2001 From: Alexander Kiel Date: Mon, 27 Nov 2023 11:50:53 -0800 Subject: [PATCH 304/386] Fix JavaDoc of setCompactionReadaheadSize (#12090) Summary: Recently in https://github.com/facebook/rocksdb/issues/11762 the default of `compaction_readahead_size` changed from 0 to 2 MB. Closes: https://github.com/facebook/rocksdb/issues/12088 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12090 Reviewed By: jaykorean Differential Revision: D51531762 Pulled By: ajkr fbshipit-source-id: a0b7145a1dca95ee90ffa3553f6eeacce6424aee --- java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java b/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java index 8bf7b0d64..1521fb4d0 100644 --- a/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java +++ b/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java @@ -418,7 +418,7 @@ public interface MutableDBOptionsInterface * That way RocksDB's compaction is doing sequential instead of random reads. *

    - * Default: 0 + * Default: 2MB * * @param compactionReadaheadSize The compaction read-ahead size * From 4d041385129b626be318610511f14344c500fc0c Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 27 Nov 2023 13:00:15 -0800 Subject: [PATCH 305/386] Add dynamic disabling of compressed cache to db_stress (#12102) Summary: We now support re-enabling the compressed portion of the `TieredCache` after dynamically disabling it. Add it to db_stress for testing purposes. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12102 Reviewed By: akankshamahajan15 Differential Revision: D51594259 Pulled By: anand1976 fbshipit-source-id: ea544e30a5ebd6290fc9ed46a241f09634764d2a --- db_stress_tool/db_stress_common.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/db_stress_tool/db_stress_common.cc b/db_stress_tool/db_stress_common.cc index c0087dc5c..dc600a49f 100644 --- a/db_stress_tool/db_stress_common.cc +++ b/db_stress_tool/db_stress_common.cc @@ -210,9 +210,6 @@ void CompressedCacheSetCapacityThread(void* v) { (double)thread->rand.Uniform( FLAGS_compressed_secondary_cache_ratio * 100) / 100; - if (new_comp_cache_ratio == 0.0) { - new_comp_cache_ratio = 0.05; - } fprintf(stderr, "New comp cache ratio = %f\n", new_comp_cache_ratio); s = UpdateTieredCache(block_cache, /*capacity*/ -1, From acc078f8784d87a4703ff5ecd04df349ef0f44b4 Mon Sep 17 00:00:00 2001 From: anand76 Date: Tue, 28 Nov 2023 14:54:08 -0800 Subject: [PATCH 306/386] Add tiered cache options to db_bench (#12104) Summary: Add the option to have a 3-tier block cache (uncompressed RAM, compressed RAM, and local flash) in db_bench, as well as specifying secondary cache admission policy. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12104 Reviewed By: ajkr Differential Revision: D51629092 Pulled By: anand1976 fbshipit-source-id: 6a208f853bc85d3d8b437d91cb1b0142d9a99e53 --- tools/db_bench_tool.cc | 104 +++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 24 deletions(-) diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 340a8a3a1..dc595781d 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -602,12 +602,17 @@ DEFINE_uint32( "compress_format_version == 2 -- decompressed size is included" " in the block header in varint32 format."); -DEFINE_bool(use_tiered_volatile_cache, false, +DEFINE_bool(use_tiered_cache, false, "If use_compressed_secondary_cache is true and " "use_tiered_volatile_cache is true, then allocate a tiered cache " "that distributes cache reservations proportionally over both " "the caches."); +DEFINE_string( + tiered_adm_policy, "auto", + "Admission policy to use for the secondary cache(s) in the tiered cache. " + "Allowed values are auto, placeholder, allow_cache_hits, and three_queue."); + DEFINE_int64(simcache_size, -1, "Number of bytes to use as a simcache of " "uncompressed data. Nagative value disables simcache."); @@ -1275,6 +1280,24 @@ static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType( } } +static enum ROCKSDB_NAMESPACE::TieredAdmissionPolicy StringToAdmissionPolicy( + const char* policy) { + assert(policy); + + if (!strcasecmp(policy, "auto")) + return ROCKSDB_NAMESPACE::kAdmPolicyAuto; + else if (!strcasecmp(policy, "placeholder")) + return ROCKSDB_NAMESPACE::kAdmPolicyPlaceholder; + else if (!strcasecmp(policy, "allow_cache_hits")) + return ROCKSDB_NAMESPACE::kAdmPolicyAllowCacheHits; + else if (!strcasecmp(policy, "three_queue")) + return ROCKSDB_NAMESPACE::kAdmPolicyThreeQueue; + else { + fprintf(stderr, "Cannot parse admission policy %s\n", policy); + exit(1); + } +} + static std::string ColumnFamilyName(size_t i) { if (i == 0) { return ROCKSDB_NAMESPACE::kDefaultColumnFamilyName; @@ -3022,6 +3045,7 @@ class Benchmark { static std::shared_ptr NewCache(int64_t capacity) { CompressedSecondaryCacheOptions secondary_cache_opts; + TieredAdmissionPolicy adm_policy = TieredAdmissionPolicy::kAdmPolicyAuto; bool use_tiered_cache = false; if (capacity <= 0) { return nullptr; @@ -3038,10 +3062,30 @@ class Benchmark { FLAGS_compressed_secondary_cache_compression_type_e; secondary_cache_opts.compress_format_version = FLAGS_compressed_secondary_cache_compress_format_version; - if (FLAGS_use_tiered_volatile_cache) { + if (FLAGS_use_tiered_cache) { use_tiered_cache = true; + adm_policy = StringToAdmissionPolicy(FLAGS_tiered_adm_policy.c_str()); } } + if (!FLAGS_secondary_cache_uri.empty()) { + if (!use_tiered_cache && FLAGS_use_compressed_secondary_cache) { + fprintf( + stderr, + "Cannot specify both --secondary_cache_uri and " + "--use_compressed_secondary_cache when using a non-tiered cache\n"); + exit(1); + } + Status s = SecondaryCache::CreateFromString( + ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); + if (secondary_cache == nullptr) { + fprintf(stderr, + "No secondary cache registered matching string: %s status=%s\n", + FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str()); + exit(1); + } + } + + std::shared_ptr block_cache; if (FLAGS_cache_type == "clock_cache") { fprintf(stderr, "Old clock cache implementation has been removed.\n"); exit(1); @@ -3061,13 +3105,24 @@ class Benchmark { opts.hash_seed = GetCacheHashSeed(); if (use_tiered_cache) { TieredCacheOptions tiered_opts; - opts.capacity += secondary_cache_opts.capacity; tiered_opts.cache_type = PrimaryCacheType::kCacheTypeHCC; tiered_opts.cache_opts = &opts; + tiered_opts.total_capacity = + opts.capacity + secondary_cache_opts.capacity; + tiered_opts.compressed_secondary_ratio = + secondary_cache_opts.capacity * 1.0 / tiered_opts.total_capacity; tiered_opts.comp_cache_opts = secondary_cache_opts; - return NewTieredCache(tiered_opts); + tiered_opts.nvm_sec_cache = secondary_cache; + tiered_opts.adm_policy = adm_policy; + block_cache = NewTieredCache(tiered_opts); } else { - return opts.MakeSharedCache(); + if (!FLAGS_secondary_cache_uri.empty()) { + opts.secondary_cache = secondary_cache; + } else if (FLAGS_use_compressed_secondary_cache) { + opts.secondary_cache = + NewCompressedSecondaryCache(secondary_cache_opts); + } + block_cache = opts.MakeSharedCache(); } } else if (FLAGS_cache_type == "lru_cache") { LRUCacheOptions opts( @@ -3076,36 +3131,37 @@ class Benchmark { GetCacheAllocator(), kDefaultToAdaptiveMutex, kDefaultCacheMetadataChargePolicy, FLAGS_cache_low_pri_pool_ratio); opts.hash_seed = GetCacheHashSeed(); - if (!FLAGS_secondary_cache_uri.empty()) { - Status s = SecondaryCache::CreateFromString( - ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache); - if (secondary_cache == nullptr) { - fprintf( - stderr, - "No secondary cache registered matching string: %s status=%s\n", - FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str()); - exit(1); - } - opts.secondary_cache = secondary_cache; - } else if (FLAGS_use_compressed_secondary_cache && !use_tiered_cache) { - opts.secondary_cache = - NewCompressedSecondaryCache(secondary_cache_opts); - } - if (use_tiered_cache) { TieredCacheOptions tiered_opts; - opts.capacity += secondary_cache_opts.capacity; tiered_opts.cache_type = PrimaryCacheType::kCacheTypeLRU; tiered_opts.cache_opts = &opts; + tiered_opts.total_capacity = + opts.capacity + secondary_cache_opts.capacity; + tiered_opts.compressed_secondary_ratio = + secondary_cache_opts.capacity * 1.0 / tiered_opts.total_capacity; tiered_opts.comp_cache_opts = secondary_cache_opts; - return NewTieredCache(tiered_opts); + tiered_opts.nvm_sec_cache = secondary_cache; + tiered_opts.adm_policy = adm_policy; + block_cache = NewTieredCache(tiered_opts); } else { - return opts.MakeSharedCache(); + if (!FLAGS_secondary_cache_uri.empty()) { + opts.secondary_cache = secondary_cache; + } else if (FLAGS_use_compressed_secondary_cache) { + opts.secondary_cache = + NewCompressedSecondaryCache(secondary_cache_opts); + } + block_cache = opts.MakeSharedCache(); } } else { fprintf(stderr, "Cache type not supported."); exit(1); } + + if (!block_cache) { + fprintf(stderr, "Unable to allocate block cache\n"); + exit(1); + } + return block_cache; } public: From d68f45e777563018453c1506a94dc3a4f2cc7b82 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Wed, 29 Nov 2023 11:35:59 -0800 Subject: [PATCH 307/386] Flush buffered logs when FlushRequest is rescheduled (#12105) Summary: The optimization to not find and delete obsolete files when FlushRequest is re-scheduled also inadvertently skipped flushing the `LogBuffer`, resulting in missed logs. This PR fixes the issue. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12105 Test Plan: manually check this test has the correct info log after the fix `./column_family_test --gtest_filter=ColumnFamilyRetainUDTTest.NotAllKeysExpiredFlushRescheduled` Reviewed By: ajkr Differential Revision: D51671079 Pulled By: jowlyzhang fbshipit-source-id: da0640e07e35c69c08988772ed611ec9e67f2e92 --- db/db_impl/db_impl_compaction_flush.cc | 36 +++++++++++++------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index 08812a35b..71c23de95 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -3301,7 +3301,7 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) { TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0"); ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem); - // There is no need to do these clean up if the flush job is rescheduled + // There is no need to find obsolete files if the flush job is rescheduled // to retain user-defined timestamps because the job doesn't get to the // stage of actually flushing the MemTables. if (!flush_rescheduled_to_retain_udt) { @@ -3309,25 +3309,25 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) { // have created. Thus, we force full scan in FindObsoleteFiles() FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()); - // delete unnecessary files if any, this is done outside the mutex - if (job_context.HaveSomethingToClean() || - job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { - mutex_.Unlock(); - TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound"); - // Have to flush the info logs before bg_flush_scheduled_-- - // because if bg_flush_scheduled_ becomes 0 and the lock is - // released, the deconstructor of DB can kick in and destroy all the - // states of DB so info_log might not be available after that point. - // It also applies to access other states that DB owns. - log_buffer.FlushBufferToLog(); - if (job_context.HaveSomethingToDelete()) { - PurgeObsoleteFiles(job_context); - } - job_context.Clean(); - mutex_.Lock(); + } + // delete unnecessary files if any, this is done outside the mutex + if (job_context.HaveSomethingToClean() || + job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) { + mutex_.Unlock(); + TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound"); + // Have to flush the info logs before bg_flush_scheduled_-- + // because if bg_flush_scheduled_ becomes 0 and the lock is + // released, the deconstructor of DB can kick in and destroy all the + // states of DB so info_log might not be available after that point. + // It also applies to access other states that DB owns. + log_buffer.FlushBufferToLog(); + if (job_context.HaveSomethingToDelete()) { + PurgeObsoleteFiles(job_context); } - TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp"); + job_context.Clean(); + mutex_.Lock(); } + TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp"); assert(num_running_flushes_ > 0); num_running_flushes_--; From a7779458bdf1f55a70f28f2fa557f42d9cdd6a60 Mon Sep 17 00:00:00 2001 From: raffertyyu Date: Thu, 30 Nov 2023 08:06:37 -0800 Subject: [PATCH 308/386] sst_dump support cuckoo table (#12098) Summary: https://github.com/facebook/rocksdb/issues/11446 Support Cuckoo Table format in sst_dump. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12098 Reviewed By: jowlyzhang Differential Revision: D51594094 Pulled By: ajkr fbshipit-source-id: ba9092818bc3cc0f207b000391aa21d564570df2 --- table/sst_file_dumper.cc | 16 +++++++++++++++- table/sst_file_dumper.h | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 150776de1..d44056def 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -71,6 +71,7 @@ extern const uint64_t kBlockBasedTableMagicNumber; extern const uint64_t kLegacyBlockBasedTableMagicNumber; extern const uint64_t kPlainTableMagicNumber; extern const uint64_t kLegacyPlainTableMagicNumber; +extern const uint64_t kCuckooTableMagicNumber; const char* testFileName = "test_file_name"; @@ -123,9 +124,15 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) { if (s.ok()) { if (magic_number == kPlainTableMagicNumber || - magic_number == kLegacyPlainTableMagicNumber) { + magic_number == kLegacyPlainTableMagicNumber || + magic_number == kCuckooTableMagicNumber) { soptions_.use_mmap_reads = true; + if (magic_number == kCuckooTableMagicNumber) { + fopts = soptions_; + fopts.temperature = file_temp_; + } + fs->NewRandomAccessFile(file_path, fopts, &file, nullptr); file_.reset(new RandomAccessFileReader(std::move(file), file_path)); } @@ -426,6 +433,13 @@ Status SstFileDumper::SetTableOptionsByMagicNumber( if (!silent_) { fprintf(stdout, "Sst file format: plain table\n"); } + } else if (table_magic_number == kCuckooTableMagicNumber) { + ioptions_.allow_mmap_reads = true; + + options_.table_factory.reset(NewCuckooTableFactory()); + if (!silent_) { + fprintf(stdout, "Sst file format: cuckoo table\n"); + } } else { char error_msg_buffer[80]; snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1, diff --git a/table/sst_file_dumper.h b/table/sst_file_dumper.h index 1e78959d1..bd97d817d 100644 --- a/table/sst_file_dumper.h +++ b/table/sst_file_dumper.h @@ -88,7 +88,7 @@ class SstFileDumper { std::unique_ptr table_reader_; std::unique_ptr file_; - const ImmutableOptions ioptions_; + ImmutableOptions ioptions_; const MutableCFOptions moptions_; ReadOptions read_options_; InternalKeyComparator internal_comparator_; From b760af321fedd73163c337f184be1c626c294e19 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Thu, 30 Nov 2023 14:10:13 -0800 Subject: [PATCH 309/386] Initial support for wide columns in WriteBatchWithIndex (#11982) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/11982 The patch constitutes the first phase of adding wide-column support to `WriteBatchWithIndex`. Namely, it implements the `PutEntity` API in `WriteBatchWithIndex` on the write path, and the `Iterator::columns()` API in `BaseDeltaIterator` on the read path. In addition, it updates all existing read APIs (`GetFromBatch`, `GetFromBatchAndDB`, `MultiGetFromBatchAndDB`, and `BaseDeltaIterator`) so that they handle wide-column entities correctly. This includes returning the value of the default column of entities as appropriate and correctly applying merges to wide-column base values. I plan to add the wide-column specific point lookup APIs (`GetEntityFromBatch`, `GetEntityFromBatchAndDB`, and `MultiGetEntityFromBatchAndDB`) in subsequent patches. Reviewed By: jaykorean Differential Revision: D50439231 fbshipit-source-id: 59fd0f12c45249fecde8af249c5d3f509ba58bbe --- .../utilities/write_batch_with_index.h | 47 +-- .../write_batch_with_index.cc | 198 ++++++++----- .../write_batch_with_index_internal.cc | 184 ++++++++---- .../write_batch_with_index_internal.h | 73 ++++- .../write_batch_with_index_test.cc | 276 +++++++++++++++++- 5 files changed, 606 insertions(+), 172 deletions(-) diff --git a/include/rocksdb/utilities/write_batch_with_index.h b/include/rocksdb/utilities/write_batch_with_index.h index e0536712c..090a4a444 100644 --- a/include/rocksdb/utilities/write_batch_with_index.h +++ b/include/rocksdb/utilities/write_batch_with_index.h @@ -10,7 +10,6 @@ // inserted. #pragma once - #include #include #include @@ -30,6 +29,7 @@ class DB; class ReadCallback; struct ReadOptions; struct DBOptions; +class MergeContext; enum WriteType { kPutRecord, @@ -39,11 +39,12 @@ enum WriteType { kDeleteRangeRecord, kLogDataRecord, kXIDRecord, + kPutEntityRecord, kUnknownRecord, }; -// an entry for Put, Merge, Delete, or SingleDelete entry for write batches. -// Used in WBWIIterator. +// An entry for Put, PutEntity, Merge, Delete, or SingleDelete for write +// batches. Used in WBWIIterator. struct WriteEntry { WriteType type = kUnknownRecord; Slice key; @@ -77,12 +78,11 @@ class WBWIIterator { }; // A WriteBatchWithIndex with a binary searchable index built for all the keys -// inserted. -// In Put(), Merge() Delete(), or SingleDelete(), the same function of the -// wrapped will be called. At the same time, indexes will be built. -// By calling GetWriteBatch(), a user will get the WriteBatch for the data -// they inserted, which can be used for DB::Write(). -// A user can call NewIterator() to create an iterator. +// inserted. In Put(), PutEntity(), Merge(), Delete(), or SingleDelete(), the +// corresponding function of the wrapped WriteBatch will be called. At the same +// time, indexes will be built. By calling GetWriteBatch(), a user will get the +// WriteBatch for the data they inserted, which can be used for DB::Write(). A +// user can call NewIterator() to create an iterator. class WriteBatchWithIndex : public WriteBatchBase { public: // backup_index_comparator: the backup comparator used to compare keys @@ -121,14 +121,7 @@ class WriteBatchWithIndex : public WriteBatchBase { } Status PutEntity(ColumnFamilyHandle* column_family, const Slice& /* key */, - const WideColumns& /* columns */) override { - if (!column_family) { - return Status::InvalidArgument( - "Cannot call this method without a column family handle"); - } - return Status::NotSupported( - "PutEntity not supported by WriteBatchWithIndex"); - } + const WideColumns& /* columns */) override; Status PutEntity(const Slice& /* key */, const AttributeGroups& attribute_groups) override { @@ -236,6 +229,8 @@ class WriteBatchWithIndex : public WriteBatchBase { return GetFromBatch(nullptr, options, key, value); } + // TODO: implement GetEntityFromBatch + // Similar to DB::Get() but will also read writes from this batch. // // This function will query both this batch and the DB and then merge @@ -262,21 +257,24 @@ class WriteBatchWithIndex : public WriteBatchBase { ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value); + // TODO: implement GetEntityFromBatchAndDB + void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const size_t num_keys, const Slice* keys, PinnableSlice* values, Status* statuses, bool sorted_input); + // TODO: implement MultiGetEntityFromBatchAndDB + // Records the state of the batch for future calls to RollbackToSavePoint(). // May be called multiple times to set multiple save points. void SetSavePoint() override; - // Remove all entries in this batch (Put, Merge, Delete, SingleDelete, - // PutLogData) since the most recent call to SetSavePoint() and removes the - // most recent save point. - // If there is no previous call to SetSavePoint(), behaves the same as - // Clear(). + // Remove all entries in this batch (Put, PutEntity, Merge, Delete, + // SingleDelete, PutLogData) since the most recent call to SetSavePoint() and + // removes the most recent save point. If there is no previous call to + // SetSavePoint(), behaves the same as Clear(). // // Calling RollbackToSavePoint invalidates any open iterators on this batch. // @@ -305,6 +303,11 @@ class WriteBatchWithIndex : public WriteBatchBase { // last sub-batch. size_t SubBatchCnt(); + void MergeAcrossBatchAndDB(ColumnFamilyHandle* column_family, + const Slice& key, + const PinnableWideColumns& existing, + const MergeContext& merge_context, + PinnableSlice* value, Status* status); Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* value, ReadCallback* callback); diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index bbfc60f9b..f969f6067 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -10,8 +10,10 @@ #include "db/column_family.h" #include "db/db_impl/db_impl.h" +#include "db/dbformat.h" #include "db/merge_context.h" #include "db/merge_helper.h" +#include "db/wide/wide_columns_helper.h" #include "memory/arena.h" #include "memtable/skiplist.h" #include "options/db_options.h" @@ -253,6 +255,14 @@ Status WriteBatchWithIndex::Rep::ReBuildIndex() { case kTypeRollbackXID: case kTypeNoop: break; + case kTypeColumnFamilyWideColumnEntity: + case kTypeWideColumnEntity: + found++; + if (!UpdateExistingEntryWithCfId(column_family_id, key, + kPutEntityRecord)) { + AddNewEntry(column_family_id); + } + break; default: return Status::Corruption( "unknown WriteBatch tag in ReBuildIndex", @@ -352,6 +362,22 @@ Status WriteBatchWithIndex::Put(ColumnFamilyHandle* column_family, return Status::NotSupported(); } +Status WriteBatchWithIndex::PutEntity(ColumnFamilyHandle* column_family, + const Slice& key, + const WideColumns& columns) { + assert(rep); + + rep->SetLastEntryOffset(); + + const Status s = rep->write_batch.PutEntity(column_family, key, columns); + + if (s.ok()) { + rep->AddOrUpdateIndex(column_family, key, kPutEntityRecord); + } + + return s; +} + Status WriteBatchWithIndex::Delete(ColumnFamilyHandle* column_family, const Slice& key) { rep->SetLastEntryOffset(); @@ -509,6 +535,43 @@ Status WriteBatchWithIndex::GetFromBatchAndDB(DB* db, nullptr); } +void WriteBatchWithIndex::MergeAcrossBatchAndDB( + ColumnFamilyHandle* column_family, const Slice& key, + const PinnableWideColumns& existing, const MergeContext& merge_context, + PinnableSlice* value, Status* status) { + assert(value); + assert(status); + assert(status->ok() || status->IsNotFound()); + + std::string result_value; + + if (status->ok()) { + if (WideColumnsHelper::HasDefaultColumnOnly(existing.columns())) { + *status = WriteBatchWithIndexInternal::MergeKeyWithBaseValue( + column_family, key, MergeHelper::kPlainBaseValue, + WideColumnsHelper::GetDefaultColumn(existing.columns()), + merge_context, &result_value, + static_cast(nullptr)); + } else { + *status = WriteBatchWithIndexInternal::MergeKeyWithBaseValue( + column_family, key, MergeHelper::kWideBaseValue, existing.columns(), + merge_context, &result_value, + static_cast(nullptr)); + } + } else { + assert(status->IsNotFound()); + *status = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( + column_family, key, merge_context, &result_value, + static_cast(nullptr)); + } + + if (status->ok()) { + value->Reset(); + *value->GetSelf() = std::move(result_value); + value->PinSelf(); + } +} + Status WriteBatchWithIndex::GetFromBatchAndDB( DB* db, const ReadOptions& read_options, ColumnFamilyHandle* column_family, const Slice& key, PinnableSlice* pinnable_val, ReadCallback* callback) { @@ -537,45 +600,39 @@ Status WriteBatchWithIndex::GetFromBatchAndDB( if (result == WBWIIteratorImpl::kFound) { pinnable_val->PinSelf(); return s; - } else if (!s.ok() || result == WBWIIteratorImpl::kError) { + } + + if (!s.ok() || result == WBWIIteratorImpl::kError) { return s; - } else if (result == WBWIIteratorImpl::kDeleted) { + } + + if (result == WBWIIteratorImpl::kDeleted) { return Status::NotFound(); } - assert(result == WBWIIteratorImpl::kMergeInProgress || - result == WBWIIteratorImpl::kNotFound); // Did not find key in batch OR could not resolve Merges. Try DB. - if (!callback) { - s = static_cast_with_check(db->GetRootDB()) - ->GetImpl(read_options, column_family, key, pinnable_val); + DBImpl::GetImplOptions get_impl_options; + get_impl_options.column_family = column_family; + + // Note: we have to retrieve all columns if we have to merge KVs from the + // batch and the DB; otherwise, the default column is sufficient. + PinnableWideColumns existing; + + if (result == WBWIIteratorImpl::kMergeInProgress) { + get_impl_options.columns = &existing; } else { - DBImpl::GetImplOptions get_impl_options; - get_impl_options.column_family = column_family; + assert(result == WBWIIteratorImpl::kNotFound); get_impl_options.value = pinnable_val; - get_impl_options.callback = callback; - s = static_cast_with_check(db->GetRootDB()) - ->GetImpl(read_options, key, get_impl_options); } - if (s.ok() || s.IsNotFound()) { // DB Get Succeeded - if (result == WBWIIteratorImpl::kMergeInProgress) { - // Merge result from DB with merges in Batch - std::string merge_result; - - if (s.ok()) { - s = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - column_family, key, *pinnable_val, merge_context, &merge_result); - } else { - assert(s.IsNotFound()); - s = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - column_family, key, merge_context, &merge_result); - } - if (s.ok()) { - pinnable_val->Reset(); - *pinnable_val->GetSelf() = std::move(merge_result); - pinnable_val->PinSelf(); - } + get_impl_options.callback = callback; + s = static_cast_with_check(db->GetRootDB()) + ->GetImpl(read_options, key, get_impl_options); + + if (result == WBWIIteratorImpl::kMergeInProgress) { + if (s.ok() || s.IsNotFound()) { // DB lookup succeeded + MergeAcrossBatchAndDB(column_family, key, existing, merge_context, + pinnable_val, &s); } } @@ -612,12 +669,16 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( return; } - autovector key_context; - autovector sorted_keys; - // To hold merges from the write batch - autovector, - MultiGetContext::MAX_BATCH_SIZE> - merges; + autovector existing; + existing.reserve(num_keys); + + autovector key_contexts; + key_contexts.reserve(num_keys); + + using MergeTuple = std::tuple; + autovector merges; + merges.reserve(num_keys); + // Since the lifetime of the WriteBatch is the same as that of the transaction // we cannot pin it as otherwise the returned value will not be available // after the transaction finishes. @@ -635,57 +696,56 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( pinnable_val->PinSelf(); continue; } + if (result == WBWIIteratorImpl::kDeleted) { *s = Status::NotFound(); continue; } + if (result == WBWIIteratorImpl::kError) { continue; } - assert(result == WBWIIteratorImpl::kMergeInProgress || - result == WBWIIteratorImpl::kNotFound); - key_context.emplace_back(column_family, keys[i], &values[i], - /* columns */ nullptr, /* timestamp */ nullptr, - &statuses[i]); - merges.emplace_back(result, std::move(merge_context)); + + // Note: we have to retrieve all columns if we have to merge KVs from the + // batch and the DB; otherwise, the default column is sufficient. + if (result == WBWIIteratorImpl::kMergeInProgress) { + existing.emplace_back(); + key_contexts.emplace_back(column_family, keys[i], /* value */ nullptr, + &existing.back(), /* timestamp */ nullptr, + &statuses[i]); + merges.emplace_back(&key_contexts.back(), std::move(merge_context), + pinnable_val); + continue; + } + + assert(result == WBWIIteratorImpl::kNotFound); + key_contexts.emplace_back(column_family, keys[i], pinnable_val, + /* columns */ nullptr, + /* timestamp */ nullptr, &statuses[i]); } - for (KeyContext& key : key_context) { + autovector sorted_keys; + sorted_keys.reserve(key_contexts.size()); + + for (KeyContext& key : key_contexts) { sorted_keys.emplace_back(&key); } // Did not find key in batch OR could not resolve Merges. Try DB. static_cast_with_check(db->GetRootDB()) - ->PrepareMultiGetKeys(key_context.size(), sorted_input, &sorted_keys); + ->PrepareMultiGetKeys(sorted_keys.size(), sorted_input, &sorted_keys); static_cast_with_check(db->GetRootDB()) ->MultiGetWithCallback(read_options, column_family, callback, &sorted_keys); - for (auto iter = key_context.begin(); iter != key_context.end(); ++iter) { - KeyContext& key = *iter; - if (key.s->ok() || key.s->IsNotFound()) { // DB Get Succeeded - size_t index = iter - key_context.begin(); - std::pair& merge_result = - merges[index]; - if (merge_result.first == WBWIIteratorImpl::kMergeInProgress) { - // Merge result from DB with merges in Batch - std::string merged_value; - - if (key.s->ok()) { - *key.s = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - column_family, *key.key, *key.value, merge_result.second, - &merged_value); - } else { - assert(key.s->IsNotFound()); - *key.s = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - column_family, *key.key, merge_result.second, &merged_value); - } - if (key.s->ok()) { - key.value->Reset(); - *key.value->GetSelf() = std::move(merged_value); - key.value->PinSelf(); - } - } + for (auto iter = merges.begin(); iter != merges.end(); ++iter) { + auto& [key_context, merge_context, value] = *iter; + + if (key_context->s->ok() || + key_context->s->IsNotFound()) { // DB lookup succeeded + MergeAcrossBatchAndDB(column_family, *key_context->key, + *key_context->columns, merge_context, value, + key_context->s); } } } diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.cc b/utilities/write_batch_with_index/write_batch_with_index_internal.cc index bedd5934d..2ae9fa86f 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.cc @@ -8,6 +8,8 @@ #include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/merge_helper.h" +#include "db/wide/wide_column_serialization.h" +#include "db/wide/wide_columns_helper.h" #include "options/cf_options.h" #include "rocksdb/comparator.h" #include "rocksdb/db.h" @@ -247,45 +249,83 @@ void BaseDeltaIterator::AdvanceBase() { bool BaseDeltaIterator::BaseValid() const { return base_iterator_->Valid(); } bool BaseDeltaIterator::DeltaValid() const { return delta_iterator_->Valid(); } -void BaseDeltaIterator::ResetValue() { value_.clear(); } +void BaseDeltaIterator::ResetValueAndColumns() { + value_.clear(); + columns_.clear(); +} -void BaseDeltaIterator::SetValueFromBase() { +void BaseDeltaIterator::SetValueAndColumnsFromBase() { assert(current_at_base_); assert(BaseValid()); assert(value_.empty()); + assert(columns_.empty()); value_ = base_iterator_->value(); + columns_ = base_iterator_->columns(); } -void BaseDeltaIterator::SetValueFromDelta() { +void BaseDeltaIterator::SetValueAndColumnsFromDelta() { assert(!current_at_base_); assert(DeltaValid()); assert(value_.empty()); + assert(columns_.empty()); WriteEntry delta_entry = delta_iterator_->Entry(); if (merge_context_.GetNumOperands() == 0) { - value_ = delta_entry.value; + if (delta_entry.type == kPutRecord) { + value_ = delta_entry.value; + columns_.emplace_back(kDefaultWideColumnName, value_); + } else if (delta_entry.type == kPutEntityRecord) { + Slice value_copy(delta_entry.value); + + status_ = WideColumnSerialization::Deserialize(value_copy, columns_); + if (!status_.ok()) { + return; + } + + if (WideColumnsHelper::HasDefaultColumn(columns_)) { + value_ = WideColumnsHelper::GetDefaultColumn(columns_); + } + } return; } + ValueType result_type = kTypeValue; + if (delta_entry.type == kDeleteRecord || delta_entry.type == kSingleDeleteRecord) { status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - column_family_, delta_entry.key, merge_context_, &merge_result_); + column_family_, delta_entry.key, merge_context_, &merge_result_, + /* result_operand */ nullptr, &result_type); } else if (delta_entry.type == kPutRecord) { - status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - column_family_, delta_entry.key, delta_entry.value, merge_context_, - &merge_result_); + status_ = WriteBatchWithIndexInternal::MergeKeyWithBaseValue( + column_family_, delta_entry.key, MergeHelper::kPlainBaseValue, + delta_entry.value, merge_context_, &merge_result_, + /* result_operand */ nullptr, &result_type); + } else if (delta_entry.type == kPutEntityRecord) { + status_ = WriteBatchWithIndexInternal::MergeKeyWithBaseValue( + column_family_, delta_entry.key, MergeHelper::kWideBaseValue, + delta_entry.value, merge_context_, &merge_result_, + /* result_operand */ nullptr, &result_type); } else if (delta_entry.type == kMergeRecord) { if (equal_keys_) { - status_ = WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - column_family_, delta_entry.key, base_iterator_->value(), - merge_context_, &merge_result_); + if (WideColumnsHelper::HasDefaultColumnOnly(base_iterator_->columns())) { + status_ = WriteBatchWithIndexInternal::MergeKeyWithBaseValue( + column_family_, delta_entry.key, MergeHelper::kPlainBaseValue, + base_iterator_->value(), merge_context_, &merge_result_, + /* result_operand */ nullptr, &result_type); + } else { + status_ = WriteBatchWithIndexInternal::MergeKeyWithBaseValue( + column_family_, delta_entry.key, MergeHelper::kWideBaseValue, + base_iterator_->columns(), merge_context_, &merge_result_, + /* result_operand */ nullptr, &result_type); + } } else { status_ = WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - column_family_, delta_entry.key, merge_context_, &merge_result_); + column_family_, delta_entry.key, merge_context_, &merge_result_, + /* result_operand */ nullptr, &result_type); } } else { status_ = Status::NotSupported("Unsupported entry type for merge"); @@ -295,14 +335,32 @@ void BaseDeltaIterator::SetValueFromDelta() { return; } + if (result_type == kTypeWideColumnEntity) { + Slice entity(merge_result_); + + status_ = WideColumnSerialization::Deserialize(entity, columns_); + if (!status_.ok()) { + return; + } + + if (WideColumnsHelper::HasDefaultColumn(columns_)) { + value_ = WideColumnsHelper::GetDefaultColumn(columns_); + } + + return; + } + + assert(result_type == kTypeValue); + value_ = merge_result_; + columns_.emplace_back(kDefaultWideColumnName, value_); } void BaseDeltaIterator::UpdateCurrent() { // Suppress false positive clang analyzer warnings. #ifndef __clang_analyzer__ status_ = Status::OK(); - ResetValue(); + ResetValueAndColumns(); while (true) { auto delta_result = WBWIIteratorImpl::kNotFound; @@ -334,13 +392,13 @@ void BaseDeltaIterator::UpdateCurrent() { AdvanceDelta(); } else { current_at_base_ = false; - SetValueFromDelta(); + SetValueAndColumnsFromDelta(); return; } } else if (!DeltaValid()) { // Delta has finished. current_at_base_ = true; - SetValueFromBase(); + SetValueAndColumnsFromBase(); return; } else { int compare = @@ -354,7 +412,7 @@ void BaseDeltaIterator::UpdateCurrent() { if (delta_result != WBWIIteratorImpl::kDeleted || merge_context_.GetNumOperands() > 0) { current_at_base_ = false; - SetValueFromDelta(); + SetValueAndColumnsFromDelta(); return; } // Delta is less advanced and is delete. @@ -364,7 +422,7 @@ void BaseDeltaIterator::UpdateCurrent() { } } else { current_at_base_ = true; - SetValueFromBase(); + SetValueAndColumnsFromBase(); return; } } @@ -457,6 +515,8 @@ WBWIIteratorImpl::Result WBWIIteratorImpl::FindLatestUpdate( break; // ignore case kXIDRecord: break; // ignore + case kPutEntityRecord: + return WBWIIteratorImpl::kFound; default: return WBWIIteratorImpl::kError; } // end switch statement @@ -533,6 +593,11 @@ Status ReadableWriteBatch::GetEntryFromDataOffset(size_t data_offset, case kTypeRollbackXID: *type = kXIDRecord; break; + case kTypeColumnFamilyWideColumnEntity: + case kTypeWideColumnEntity: { + *type = kPutEntityRecord; + break; + } default: return Status::Corruption("unknown WriteBatch tag ", std::to_string(static_cast(tag))); @@ -632,9 +697,9 @@ WriteEntry WBWIIteratorImpl::Entry() const { auto s = write_batch_->GetEntryFromDataOffset( iter_entry->offset, &ret.type, &ret.key, &ret.value, &blob, &xid); assert(s.ok()); - assert(ret.type == kPutRecord || ret.type == kDeleteRecord || - ret.type == kSingleDeleteRecord || ret.type == kDeleteRangeRecord || - ret.type == kMergeRecord); + assert(ret.type == kPutRecord || ret.type == kPutEntityRecord || + ret.type == kDeleteRecord || ret.type == kSingleDeleteRecord || + ret.type == kDeleteRangeRecord || ret.type == kMergeRecord); // Make sure entry.key does not include user-defined timestamp. const Comparator* const ucmp = comparator_->GetComparator(column_family_id_); size_t ts_sz = ucmp->timestamp_size(); @@ -652,56 +717,26 @@ bool WBWIIteratorImpl::MatchesKey(uint32_t cf_id, const Slice& key) { } } -Status WriteBatchWithIndexInternal::MergeKeyWithNoBaseValue( - ColumnFamilyHandle* column_family, const Slice& key, - const MergeContext& context, std::string* result) { - // TODO: support wide columns in WBWI +Status WriteBatchWithIndexInternal::CheckAndGetImmutableOptions( + ColumnFamilyHandle* column_family, const ImmutableOptions** ioptions) { + assert(ioptions); + assert(!*ioptions); if (!column_family) { return Status::InvalidArgument("Must provide a column family"); } - const auto& ioptions = GetImmutableOptions(column_family); + const auto& iopts = GetImmutableOptions(column_family); - const auto* merge_operator = ioptions.merge_operator.get(); + const auto* merge_operator = iopts.merge_operator.get(); if (!merge_operator) { return Status::InvalidArgument( "Merge operator must be set for column family"); } - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - return MergeHelper::TimedFullMerge( - merge_operator, key, MergeHelper::kNoBaseValue, context.GetOperands(), - ioptions.logger, ioptions.stats, ioptions.clock, - /* update_num_ops_stats */ false, result, - /* columns */ nullptr, /* op_failure_scope */ nullptr); -} - -Status WriteBatchWithIndexInternal::MergeKeyWithPlainBaseValue( - ColumnFamilyHandle* column_family, const Slice& key, const Slice& value, - const MergeContext& context, std::string* result) { - // TODO: support wide columns in WBWI - - if (!column_family) { - return Status::InvalidArgument("Must provide a column family"); - } - - const auto& ioptions = GetImmutableOptions(column_family); - - const auto* merge_operator = ioptions.merge_operator.get(); - if (!merge_operator) { - return Status::InvalidArgument( - "Merge operator must be set for column family"); - } + *ioptions = &iopts; - // `op_failure_scope` (an output parameter) is not provided (set to - // nullptr) since a failure must be propagated regardless of its value. - return MergeHelper::TimedFullMerge( - merge_operator, key, MergeHelper::kPlainBaseValue, value, - context.GetOperands(), ioptions.logger, ioptions.stats, ioptions.clock, - /* update_num_ops_stats */ false, result, - /* columns */ nullptr, /* op_failure_scope */ nullptr); + return Status::OK(); } WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( @@ -722,20 +757,41 @@ WBWIIteratorImpl::Result WriteBatchWithIndexInternal::GetFromBatch( return result; } else if (result == WBWIIteratorImpl::kNotFound) { return result; - } else if (result == WBWIIteratorImpl::Result::kFound) { // PUT - Slice entry_value = iter->Entry().value; + } else if (result == WBWIIteratorImpl::Result::kFound) { // Put/PutEntity + WriteEntry entry = iter->Entry(); + Slice entry_value = entry.value; if (context->GetNumOperands() > 0) { - *s = MergeKeyWithPlainBaseValue(column_family, key, entry_value, *context, - value); + if (entry.type == kPutRecord) { + *s = MergeKeyWithBaseValue( + column_family, key, MergeHelper::kPlainBaseValue, entry_value, + *context, value, static_cast(nullptr)); + } else { + assert(entry.type == kPutEntityRecord); + + *s = MergeKeyWithBaseValue( + column_family, key, MergeHelper::kWideBaseValue, entry_value, + *context, value, static_cast(nullptr)); + } if (!s->ok()) { result = WBWIIteratorImpl::Result::kError; } } else { - value->assign(entry_value.data(), entry_value.size()); + if (entry.type == kPutRecord) { + value->assign(entry_value.data(), entry_value.size()); + } else { + assert(entry.type == kPutEntityRecord); + Slice value_of_default; + *s = WideColumnSerialization::GetValueOfDefaultColumn(entry_value, + value_of_default); + if (s->ok()) { + value->assign(value_of_default.data(), value_of_default.size()); + } + } } } else if (result == WBWIIteratorImpl::kDeleted) { if (context->GetNumOperands() > 0) { - *s = MergeKeyWithNoBaseValue(column_family, key, *context, value); + *s = MergeKeyWithNoBaseValue(column_family, key, *context, value, + static_cast(nullptr)); if (s->ok()) { result = WBWIIteratorImpl::Result::kFound; } else { diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index c4135ad32..38904593f 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -8,7 +8,9 @@ #include #include +#include "db/dbformat.h" #include "db/merge_context.h" +#include "db/merge_helper.h" #include "memtable/skiplist.h" #include "options/db_options.h" #include "port/port.h" @@ -47,6 +49,7 @@ class BaseDeltaIterator : public Iterator { void Prev() override; Slice key() const override; Slice value() const override { return value_; } + const WideColumns& columns() const override { return columns_; } Slice timestamp() const override; Status status() const override; void Invalidate(Status s); @@ -58,9 +61,9 @@ class BaseDeltaIterator : public Iterator { void AdvanceBase(); bool BaseValid() const; bool DeltaValid() const; - void ResetValue(); - void SetValueFromBase(); - void SetValueFromDelta(); + void ResetValueAndColumns(); + void SetValueAndColumnsFromBase(); + void SetValueAndColumnsFromDelta(); void UpdateCurrent(); bool forward_; @@ -74,6 +77,7 @@ class BaseDeltaIterator : public Iterator { MergeContext merge_context_; std::string merge_result_; Slice value_; + WideColumns columns_; }; // Key used by skip list, as the binary searchable index of WriteBatchWithIndex. @@ -145,7 +149,7 @@ class ReadableWriteBatch : public WriteBatch { default_cf_ts_sz) {} // Retrieve some information from a write entry in the write batch, given // the start offset of the write entry. - Status GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* Key, + Status GetEntryFromDataOffset(size_t data_offset, WriteType* type, Slice* key, Slice* value, Slice* blob, Slice* xid) const; }; @@ -319,12 +323,12 @@ class WBWIIteratorImpl : public WBWIIterator { // Moves the iterator to first entry of the next key. void NextKey(); - // Moves the iterator to the Update (Put or Delete) for the current key - // If there are no Put/Delete, the Iterator will point to the first entry for - // this key - // @return kFound if a Put was found for the key + // Moves the iterator to the Update (Put, PutEntity or Delete) for the current + // key. If there is no Put/PutEntity/Delete, the Iterator will point to the + // first entry for this key. + // @return kFound if a Put/PutEntity was found for the key // @return kDeleted if a delete was found for the key - // @return kMergeInProgress if only merges were fouund for the key + // @return kMergeInProgress if only merges were found for the key // @return kError if an unsupported operation was found for the key // @return kNotFound if no operations were found for this key // @@ -385,15 +389,52 @@ class WriteBatchWithIndexInternal { static const Comparator* GetUserComparator(const WriteBatchWithIndex& wbwi, uint32_t cf_id); + template static Status MergeKeyWithNoBaseValue(ColumnFamilyHandle* column_family, const Slice& key, const MergeContext& context, - std::string* result); + ResultTs... results) { + const ImmutableOptions* ioptions = nullptr; - static Status MergeKeyWithPlainBaseValue(ColumnFamilyHandle* column_family, - const Slice& key, const Slice& value, - const MergeContext& context, - std::string* result); + const Status s = CheckAndGetImmutableOptions(column_family, &ioptions); + if (!s.ok()) { + return s; + } + + assert(ioptions); + + // `op_failure_scope` (an output parameter) is not provided (set to + // nullptr) since a failure must be propagated regardless of its value. + return MergeHelper::TimedFullMerge( + ioptions->merge_operator.get(), key, MergeHelper::kNoBaseValue, + context.GetOperands(), ioptions->logger, ioptions->stats, + ioptions->clock, /* update_num_ops_stats */ false, results..., + /* op_failure_scope */ nullptr); + } + + template + static Status MergeKeyWithBaseValue(ColumnFamilyHandle* column_family, + const Slice& key, const BaseTag& base_tag, + const BaseT& value, + const MergeContext& context, + ResultTs... results) { + const ImmutableOptions* ioptions = nullptr; + + const Status s = CheckAndGetImmutableOptions(column_family, &ioptions); + if (!s.ok()) { + return s; + } + + assert(ioptions); + + // `op_failure_scope` (an output parameter) is not provided (set to + // nullptr) since a failure must be propagated regardless of its value. + return MergeHelper::TimedFullMerge( + ioptions->merge_operator.get(), key, base_tag, value, + context.GetOperands(), ioptions->logger, ioptions->stats, + ioptions->clock, /* update_num_ops_stats */ false, results..., + /* op_failure_scope */ nullptr); + } // If batch contains a value for key, store it in *value and return kFound. // If batch contains a deletion for key, return Deleted. @@ -407,6 +448,10 @@ class WriteBatchWithIndexInternal { WriteBatchWithIndex* batch, ColumnFamilyHandle* column_family, const Slice& key, MergeContext* merge_context, std::string* value, Status* s); + + private: + static Status CheckAndGetImmutableOptions(ColumnFamilyHandle* column_family, + const ImmutableOptions** ioptions); }; } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/write_batch_with_index/write_batch_with_index_test.cc b/utilities/write_batch_with_index/write_batch_with_index_test.cc index 95333d8f4..90438ff2e 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_test.cc +++ b/utilities/write_batch_with_index/write_batch_with_index_test.cc @@ -81,37 +81,83 @@ using KVMap = std::map; class KVIter : public Iterator { public: explicit KVIter(const KVMap* map) : map_(map), iter_(map_->end()) {} + bool Valid() const override { return iter_ != map_->end(); } - void SeekToFirst() override { iter_ = map_->begin(); } + + void SeekToFirst() override { + iter_ = map_->begin(); + + if (Valid()) { + Update(); + } + } + void SeekToLast() override { if (map_->empty()) { iter_ = map_->end(); } else { iter_ = map_->find(map_->rbegin()->first); } + + if (Valid()) { + Update(); + } } + void Seek(const Slice& k) override { iter_ = map_->lower_bound(k.ToString()); + + if (Valid()) { + Update(); + } } + void SeekForPrev(const Slice& k) override { iter_ = map_->upper_bound(k.ToString()); Prev(); + + if (Valid()) { + Update(); + } } - void Next() override { ++iter_; } + + void Next() override { + ++iter_; + + if (Valid()) { + Update(); + } + } + void Prev() override { if (iter_ == map_->begin()) { iter_ = map_->end(); return; } --iter_; + + if (Valid()) { + Update(); + } } + Slice key() const override { return iter_->first; } - Slice value() const override { return iter_->second; } + Slice value() const override { return value_; } + const WideColumns& columns() const override { return columns_; } Status status() const override { return Status::OK(); } private: + void Update() { + assert(Valid()); + + value_ = iter_->second; + columns_ = WideColumns{{kDefaultWideColumnName, value_}}; + } + const KVMap* const map_; KVMap::const_iterator iter_; + Slice value_; + WideColumns columns_; }; static std::string PrintContents(WriteBatchWithIndex* batch, @@ -265,6 +311,12 @@ class WBWIBaseTest : public testing::Test { } else if (key[i] == 'p') { result = key + std::to_string(i); EXPECT_OK(batch_->Put(cf, key, result)); + } else if (key[i] == 'e') { + const std::string suffix = std::to_string(i); + result = key + suffix; + const WideColumns columns{{kDefaultWideColumnName, result}, + {key, suffix}}; + EXPECT_OK(batch_->PutEntity(cf, key, columns)); } else if (key[i] == 'm') { std::string value = key + std::to_string(i); EXPECT_OK(batch_->Merge(cf, key, value)); @@ -2496,6 +2548,224 @@ TEST_P(WriteBatchWithIndexTest, IndexNoTs) { } } +TEST_P(WriteBatchWithIndexTest, WideColumnsBatchOnly) { + // Tests for the case when there's no need to consult the underlying DB during + // queries, i.e. when all queries can be answered using the write batch only. + + ASSERT_OK(OpenDB()); + + constexpr size_t num_keys = 6; + + constexpr char delete_key[] = "d"; + constexpr char delete_merge_key[] = "dm"; + constexpr char put_entity_key[] = "e"; + constexpr char put_entity_merge_key[] = "em"; + constexpr char put_key[] = "p"; + constexpr char put_merge_key[] = "pm"; + + AddToBatch(db_->DefaultColumnFamily(), delete_key); + AddToBatch(db_->DefaultColumnFamily(), delete_merge_key); + AddToBatch(db_->DefaultColumnFamily(), put_entity_key); + AddToBatch(db_->DefaultColumnFamily(), put_entity_merge_key); + AddToBatch(db_->DefaultColumnFamily(), put_key); + AddToBatch(db_->DefaultColumnFamily(), put_merge_key); + + std::array keys{{delete_key, delete_merge_key, + put_entity_key, put_entity_merge_key, + put_key, put_merge_key}}; + + std::array expected{ + {{}, + {{kDefaultWideColumnName, "dm1"}}, + {{kDefaultWideColumnName, "e0"}, {"e", "0"}}, + {{kDefaultWideColumnName, "em0,em1"}, {"em", "0"}}, + {{kDefaultWideColumnName, "p0"}}, + {{kDefaultWideColumnName, "pm0,pm1"}}}}; + + // GetFromBatchAndDB + { + PinnableSlice value; + ASSERT_TRUE(batch_->GetFromBatchAndDB(db_, read_opts_, delete_key, &value) + .IsNotFound()); + } + + for (size_t i = 1; i < num_keys; ++i) { + PinnableSlice value; + ASSERT_OK(batch_->GetFromBatchAndDB(db_, read_opts_, keys[i], &value)); + ASSERT_EQ(value, expected[i].front().value()); + } + + // MultiGetFromBatchAndDB + { + std::array values; + std::array statuses; + constexpr bool sorted_input = false; + + batch_->MultiGetFromBatchAndDB(db_, read_opts_, db_->DefaultColumnFamily(), + num_keys, keys.data(), values.data(), + statuses.data(), sorted_input); + + ASSERT_TRUE(statuses[0].IsNotFound()); + + for (size_t i = 1; i < num_keys; ++i) { + ASSERT_OK(statuses[i]); + ASSERT_EQ(values[i], expected[i].front().value()); + } + } + + // TODO: add tests for GetEntityFromBatchAndDB and + // MultiGetEntityFromBatchAndDB once they are implemented + + // Iterator + std::unique_ptr iter(batch_->NewIteratorWithBase( + db_->DefaultColumnFamily(), db_->NewIterator(read_opts_), &read_opts_)); + + iter->SeekToFirst(); + + for (size_t i = 1; i < num_keys; ++i) { + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), keys[i]); + ASSERT_EQ(iter->value(), expected[i].front().value()); + ASSERT_EQ(iter->columns(), expected[i]); + iter->Next(); + } + + ASSERT_FALSE(iter->Valid()); + + iter->SeekToLast(); + + for (size_t i = num_keys - 1; i > 0; --i) { + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), keys[i]); + ASSERT_EQ(iter->value(), expected[i].front().value()); + ASSERT_EQ(iter->columns(), expected[i]); + iter->Prev(); + } + + ASSERT_FALSE(iter->Valid()); +} + +TEST_P(WriteBatchWithIndexTest, WideColumnsBatchAndDB) { + // Tests for the case when queries require consulting both the write batch and + // the underlying DB, either because of merges or because the write batch + // doesn't contain the key. + + ASSERT_OK(OpenDB()); + + constexpr size_t num_keys = 6; + + // Note: for the "merge" keys, we'll have a merge operation in the write batch + // and the base value (Put/PutEntity/Delete) in the DB. For the "no-merge" + // keys, we'll have nothing in the write batch and a standalone + // Put/PutEntity/Delete in the DB. + constexpr char merge_a_key[] = "ma"; + constexpr char merge_b_key[] = "mb"; + constexpr char merge_c_key[] = "mc"; + constexpr char no_merge_a_key[] = "na"; + constexpr char no_merge_b_key[] = "nb"; + constexpr char no_merge_c_key[] = "nc"; + + constexpr char merge_a_value[] = "mao"; + const WideColumns merge_b_columns{{kDefaultWideColumnName, "mbo"}, + {"mb", "o"}}; + constexpr char no_merge_a_value[] = "nao"; + const WideColumns no_merge_b_columns{{kDefaultWideColumnName, "nbo"}, + {"nb", "o"}}; + + ASSERT_OK(db_->Put(write_opts_, db_->DefaultColumnFamily(), merge_a_key, + merge_a_value)); + ASSERT_OK(db_->PutEntity(write_opts_, db_->DefaultColumnFamily(), merge_b_key, + merge_b_columns)); + ASSERT_OK(db_->Delete(write_opts_, db_->DefaultColumnFamily(), merge_c_key)); + ASSERT_OK(db_->Put(write_opts_, db_->DefaultColumnFamily(), no_merge_a_key, + no_merge_a_value)); + ASSERT_OK(db_->PutEntity(write_opts_, db_->DefaultColumnFamily(), + no_merge_b_key, no_merge_b_columns)); + ASSERT_OK( + db_->Delete(write_opts_, db_->DefaultColumnFamily(), no_merge_c_key)); + + AddToBatch(db_->DefaultColumnFamily(), merge_a_key); + AddToBatch(db_->DefaultColumnFamily(), merge_b_key); + AddToBatch(db_->DefaultColumnFamily(), merge_c_key); + + std::array keys{{merge_a_key, merge_b_key, merge_c_key, + no_merge_a_key, no_merge_b_key, + no_merge_c_key}}; + + std::array expected{ + {{{kDefaultWideColumnName, "mao,ma0"}}, + {{kDefaultWideColumnName, "mbo,mb0"}, {"mb", "o"}}, + {{kDefaultWideColumnName, "mc0"}}, + {{kDefaultWideColumnName, "nao"}}, + {{kDefaultWideColumnName, "nbo"}, {"nb", "o"}}, + {}}}; + + // GetFromBatchAndDB + for (size_t i = 0; i < num_keys - 1; ++i) { + PinnableSlice value; + ASSERT_OK(batch_->GetFromBatchAndDB(db_, read_opts_, keys[i], &value)); + ASSERT_EQ(value, expected[i].front().value()); + } + + { + PinnableSlice value; + ASSERT_TRUE( + batch_->GetFromBatchAndDB(db_, read_opts_, no_merge_c_key, &value) + .IsNotFound()); + } + + // MultiGetFromBatchAndDB + { + std::array values; + std::array statuses; + constexpr bool sorted_input = false; + + batch_->MultiGetFromBatchAndDB(db_, read_opts_, db_->DefaultColumnFamily(), + num_keys, keys.data(), values.data(), + statuses.data(), sorted_input); + + for (size_t i = 0; i < num_keys - 1; ++i) { + ASSERT_OK(statuses[i]); + ASSERT_EQ(values[i], expected[i].front().value()); + } + + ASSERT_TRUE(statuses[num_keys - 1].IsNotFound()); + } + + // TODO: add tests for GetEntityFromBatchAndDB and + // MultiGetEntityFromBatchAndDB once they are implemented + + // Iterator + std::unique_ptr iter(batch_->NewIteratorWithBase( + db_->DefaultColumnFamily(), db_->NewIterator(read_opts_), &read_opts_)); + + iter->SeekToFirst(); + + for (size_t i = 0; i < num_keys - 1; ++i) { + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), keys[i]); + ASSERT_EQ(iter->value(), expected[i].front().value()); + ASSERT_EQ(iter->columns(), expected[i]); + iter->Next(); + } + + ASSERT_FALSE(iter->Valid()); + + iter->SeekToLast(); + + for (size_t i = 0; i < num_keys - 1; ++i) { + const size_t idx = num_keys - 2 - i; + + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), keys[idx]); + ASSERT_EQ(iter->value(), expected[idx].front().value()); + ASSERT_EQ(iter->columns(), expected[idx]); + iter->Prev(); + } + + ASSERT_FALSE(iter->Valid()); +} + INSTANTIATE_TEST_CASE_P(WBWI, WriteBatchWithIndexTest, testing::Bool()); } // namespace ROCKSDB_NAMESPACE From 7eca51dfc36e40d47bff5ff44f905ebd8fb7082b Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Fri, 1 Dec 2023 11:01:29 -0800 Subject: [PATCH 310/386] Refactor crash test stderr parsing logic into a function (#12109) Summary: This is a simple refactor for the crash test script to put shared logic for parsing stderr into a function. There is no functional change. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12109 Test Plan: manually tested the script Reviewed By: ajkr Differential Revision: D51692172 Pulled By: jowlyzhang fbshipit-source-id: d346d64e981d9c489c380ff6ce33296a224b5877 --- tools/db_crashtest.py | 62 +++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 01c3ae329..303131602 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -814,6 +814,24 @@ def execute_cmd(cmd, timeout=None): return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8") +def exit_if_stderr_has_errors(stderr, print_stderr=True): + if print_stderr: + for line in stderr.split("\n"): + if line != "" and not line.startswith("WARNING"): + print("stderr has error message:") + print("***" + line + "***") + + stderrdata = stderr.lower() + errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") + print("#times error occurred in output is " + str(errorcount) + "\n") + + if errorcount > 0: + print("TEST FAILED. Output has 'error'!!!\n") + sys.exit(2) + if stderrdata.find("fail") >= 0: + print("TEST FAILED. Output has 'fail'!!!\n") + sys.exit(2) + # This script runs and kills db_stress multiple times. It checks consistency # in case of unsafe crashes in RocksDB. def blackbox_crash_main(args, unknown_args): @@ -846,21 +864,7 @@ def blackbox_crash_main(args, unknown_args): print(errs) sys.exit(2) - for line in errs.split("\n"): - if line != "" and not line.startswith("WARNING"): - print("stderr has error message:") - print("***" + line + "***") - - stderrdata = errs.lower() - errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") - print("#times error occurred in output is " + str(errorcount) + "\n") - - if errorcount > 0: - print("TEST FAILED. Output has 'error'!!!\n") - sys.exit(2) - if stderrdata.find("fail") >= 0: - print("TEST FAILED. Output has 'fail'!!!\n") - sys.exit(2) + exit_if_stderr_has_errors(errs); time.sleep(1) # time to stabilize before the next run @@ -880,21 +884,7 @@ def blackbox_crash_main(args, unknown_args): # Print stats of the final run print("stdout:", outs) - for line in errs.split("\n"): - if line != "" and not line.startswith("WARNING"): - print("stderr has error message:") - print("***" + line + "***") - - stderrdata = errs.lower() - errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") - print("#times error occurred in output is " + str(errorcount) + "\n") - - if errorcount > 0: - print("TEST FAILED. Output has 'error'!!!\n") - sys.exit(2) - if stderrdata.find("fail") >= 0: - print("TEST FAILED. Output has 'fail'!!!\n") - sys.exit(2) + exit_if_stderr_has_errors(errs) # we need to clean up after ourselves -- only do this on test success shutil.rmtree(dbname, True) @@ -1056,16 +1046,8 @@ def whitebox_crash_main(args, unknown_args): print("TEST FAILED. See kill option and exit code above!!!\n") sys.exit(1) - stderrdata = stderrdata.lower() - errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times") - print("#times error occurred in output is " + str(errorcount) + "\n") - - if errorcount > 0: - print("TEST FAILED. Output has 'error'!!!\n") - sys.exit(2) - if stderrdata.find("fail") >= 0: - print("TEST FAILED. Output has 'fail'!!!\n") - sys.exit(2) + #stderr already printed above + exit_if_stderr_has_errors(stderrdata, False) # First half of the duration, keep doing kill test. For the next half, # try different modes. From be3bc368116e1be3580407b9fa2205d785e27673 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 1 Dec 2023 11:10:30 -0800 Subject: [PATCH 311/386] internal_repo_rocksdb (-8794174668376270091) (#12114) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12114 Reviewed By: jowlyzhang Differential Revision: D51745613 Pulled By: ajkr fbshipit-source-id: 27ca4bda275cab057d3a3ec99f0f92cdb9be5177 --- logging/auto_roll_logger_test.cc | 4 +- logging/env_logger_test.cc | 2 +- memory/arena_test.cc | 2 +- memtable/alloc_tracker.cc | 2 +- memtable/hash_linklist_rep.cc | 10 ++--- memtable/hash_skiplist_rep.cc | 4 +- memtable/skiplistrep.cc | 6 +-- memtable/vectorrep.cc | 4 +- microbench/db_basic_bench.cc | 4 +- monitoring/histogram.cc | 28 ++++++++---- monitoring/histogram_windowing.cc | 10 +++-- monitoring/in_memory_stats_history.cc | 2 +- monitoring/perf_level.cc | 2 +- monitoring/persistent_stats_history.cc | 4 +- monitoring/statistics.cc | 6 ++- monitoring/thread_status_impl.cc | 2 +- options/cf_options.cc | 10 ++--- options/configurable.cc | 32 +++++++++----- options/configurable_test.cc | 4 +- options/configurable_test.h | 4 +- options/customizable_test.cc | 2 +- options/db_options.cc | 3 +- options/options.cc | 2 +- options/options_helper.cc | 10 +++-- options/options_parser.cc | 6 +-- options/options_test.cc | 22 +++++----- port/port_posix.cc | 10 ++--- port/stack_trace.cc | 9 ++-- table/block_based/block.cc | 44 ++++++++++++++----- .../block_based/block_based_table_builder.cc | 18 +++++--- .../block_based/block_based_table_factory.cc | 3 +- table/block_based/block_based_table_reader.cc | 4 +- table/block_based/block_builder.cc | 3 +- table/block_based/block_test.cc | 3 +- table/block_based/filter_policy.cc | 18 ++++---- table/block_based/full_filter_block.cc | 2 +- table/block_based/full_filter_block_test.cc | 4 +- table/block_based/index_builder.cc | 3 +- .../partitioned_filter_block_test.cc | 12 ++--- table/block_fetcher_test.cc | 4 +- table/cuckoo/cuckoo_table_builder.cc | 7 ++- 41 files changed, 191 insertions(+), 140 deletions(-) diff --git a/logging/auto_roll_logger_test.cc b/logging/auto_roll_logger_test.cc index 3d0ec1763..344fea96e 100644 --- a/logging/auto_roll_logger_test.cc +++ b/logging/auto_roll_logger_test.cc @@ -574,8 +574,8 @@ TEST_F(AutoRollLoggerTest, Close) { static std::vector GetOldFileNames(const std::string& path) { std::vector ret; - const std::string dirname = path.substr(/*start=*/0, path.find_last_of("/")); - const std::string fname = path.substr(path.find_last_of("/") + 1); + const std::string dirname = path.substr(/*start=*/0, path.find_last_of('/')); + const std::string fname = path.substr(path.find_last_of('/') + 1); std::vector children; EXPECT_OK(Env::Default()->GetChildren(dirname, &children)); diff --git a/logging/env_logger_test.cc b/logging/env_logger_test.cc index 467ab064f..21db8b658 100644 --- a/logging/env_logger_test.cc +++ b/logging/env_logger_test.cc @@ -138,7 +138,7 @@ TEST_F(EnvLoggerTest, ConcurrentLogging) { const int kNumThreads = 5; // Create threads. for (int ii = 0; ii < kNumThreads; ++ii) { - threads.push_back(port::Thread(cb)); + threads.emplace_back(cb); } // Wait for them to complete. diff --git a/memory/arena_test.cc b/memory/arena_test.cc index 592bbd723..8db761f68 100644 --- a/memory/arena_test.cc +++ b/memory/arena_test.cc @@ -170,7 +170,7 @@ static void SimpleTest(size_t huge_page_size) { r[b] = i % 256; } bytes += s; - allocated.push_back(std::make_pair(s, r)); + allocated.emplace_back(s, r); ASSERT_GE(arena.ApproximateMemoryUsage(), bytes); if (i > N / 10) { ASSERT_LE(arena.ApproximateMemoryUsage(), bytes * 1.10); diff --git a/memtable/alloc_tracker.cc b/memtable/alloc_tracker.cc index 4c6d35431..d780df0bf 100644 --- a/memtable/alloc_tracker.cc +++ b/memtable/alloc_tracker.cc @@ -7,7 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include +#include #include "memory/allocator.h" #include "memory/arena.h" diff --git a/memtable/hash_linklist_rep.cc b/memtable/hash_linklist_rep.cc index 9e60f9be3..a75587917 100644 --- a/memtable/hash_linklist_rep.cc +++ b/memtable/hash_linklist_rep.cc @@ -81,7 +81,7 @@ struct Node { void NoBarrier_SetNext(Node* x) { next_.store(x, std::memory_order_relaxed); } // Needed for placement new below which is fine - Node() {} + Node() = default; private: std::atomic next_; @@ -265,7 +265,7 @@ class HashLinkListRep : public MemTableRep { explicit FullListIterator(MemtableSkipList* list, Allocator* allocator) : iter_(list), full_list_(list), allocator_(allocator) {} - ~FullListIterator() override {} + ~FullListIterator() override = default; // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return iter_.Valid(); } @@ -332,7 +332,7 @@ class HashLinkListRep : public MemTableRep { head_(head), node_(nullptr) {} - ~LinkListIterator() override {} + ~LinkListIterator() override = default; // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return node_ != nullptr; } @@ -482,7 +482,7 @@ class HashLinkListRep : public MemTableRep { // This is used when there wasn't a bucket. It is cheaper than // instantiating an empty bucket over which to iterate. public: - EmptyIterator() {} + EmptyIterator() = default; bool Valid() const override { return false; } const char* key() const override { assert(false); @@ -526,7 +526,7 @@ HashLinkListRep::HashLinkListRep( } } -HashLinkListRep::~HashLinkListRep() {} +HashLinkListRep::~HashLinkListRep() = default; KeyHandle HashLinkListRep::Allocate(const size_t len, char** buf) { char* mem = allocator_->AllocateAligned(sizeof(Node) + len); diff --git a/memtable/hash_skiplist_rep.cc b/memtable/hash_skiplist_rep.cc index 15ff4f071..aed1580c4 100644 --- a/memtable/hash_skiplist_rep.cc +++ b/memtable/hash_skiplist_rep.cc @@ -208,7 +208,7 @@ class HashSkipListRep : public MemTableRep { // This is used when there wasn't a bucket. It is cheaper than // instantiating an empty bucket over which to iterate. public: - EmptyIterator() {} + EmptyIterator() = default; bool Valid() const override { return false; } const char* key() const override { assert(false); @@ -248,7 +248,7 @@ HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare, } } -HashSkipListRep::~HashSkipListRep() {} +HashSkipListRep::~HashSkipListRep() = default; HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket( const Slice& transformed) { diff --git a/memtable/skiplistrep.cc b/memtable/skiplistrep.cc index c3b4c785d..e615ef9f6 100644 --- a/memtable/skiplistrep.cc +++ b/memtable/skiplistrep.cc @@ -161,7 +161,7 @@ class SkipListRep : public MemTableRep { } } - ~SkipListRep() override {} + ~SkipListRep() override = default; // Iteration over the contents of a skip list class Iterator : public MemTableRep::Iterator { @@ -174,7 +174,7 @@ class SkipListRep : public MemTableRep { const InlineSkipList* list) : iter_(list) {} - ~Iterator() override {} + ~Iterator() override = default; // Returns true iff the iterator is positioned at a valid node. bool Valid() const override { return iter_.Valid(); } @@ -232,7 +232,7 @@ class SkipListRep : public MemTableRep { explicit LookaheadIterator(const SkipListRep& rep) : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {} - ~LookaheadIterator() override {} + ~LookaheadIterator() override = default; bool Valid() const override { return iter_.Valid(); } diff --git a/memtable/vectorrep.cc b/memtable/vectorrep.cc index e42ae4439..9b0192cb8 100644 --- a/memtable/vectorrep.cc +++ b/memtable/vectorrep.cc @@ -40,7 +40,7 @@ class VectorRep : public MemTableRep { void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override; - ~VectorRep() override {} + ~VectorRep() override = default; class Iterator : public MemTableRep::Iterator { class VectorRep* vrep_; @@ -59,7 +59,7 @@ class VectorRep : public MemTableRep { // Initialize an iterator over the specified collection. // The returned iterator is not valid. // explicit Iterator(const MemTableRep* collection); - ~Iterator() override{}; + ~Iterator() override = default; // Returns true iff the iterator is positioned at a valid node. bool Valid() const override; diff --git a/microbench/db_basic_bench.cc b/microbench/db_basic_bench.cc index c2e547f60..2eca31f10 100644 --- a/microbench/db_basic_bench.cc +++ b/microbench/db_basic_bench.cc @@ -543,7 +543,9 @@ BENCHMARK(ManualFlush)->Iterations(1)->Apply(ManualFlushArguments); static Slice CompressibleString(Random* rnd, double compressed_fraction, int len, std::string* dst) { int raw = static_cast(len * compressed_fraction); - if (raw < 1) raw = 1; + if (raw < 1) { + raw = 1; + } std::string raw_data = rnd->RandomBinaryString(raw); // Duplicate the random data until we have filled "len" bytes diff --git a/monitoring/histogram.cc b/monitoring/histogram.cc index 61bc6c140..bc70f3902 100644 --- a/monitoring/histogram.cc +++ b/monitoring/histogram.cc @@ -9,12 +9,11 @@ #include "monitoring/histogram.h" -#include - #include #include #include #include +#include #include "port/port.h" #include "util/cast_util.h" @@ -45,10 +44,11 @@ HistogramBucketMapper::HistogramBucketMapper() { size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const { auto beg = bucketValues_.begin(); auto end = bucketValues_.end(); - if (value >= maxBucketValue_) + if (value >= maxBucketValue_) { return end - beg - 1; // bucketValues_.size() - 1 - else + } else { return std::lower_bound(beg, end, value) - beg; + } } namespace { @@ -147,8 +147,12 @@ double HistogramStat::Percentile(double p) const { double r = left_point + (right_point - left_point) * pos; uint64_t cur_min = min(); uint64_t cur_max = max(); - if (r < cur_min) r = static_cast(cur_min); - if (r > cur_max) r = static_cast(cur_max); + if (r < cur_min) { + r = static_cast(cur_min); + } + if (r > cur_max) { + r = static_cast(cur_max); + } return r; } } @@ -158,7 +162,9 @@ double HistogramStat::Percentile(double p) const { double HistogramStat::Average() const { uint64_t cur_num = num(); uint64_t cur_sum = sum(); - if (cur_num == 0) return 0; + if (cur_num == 0) { + return 0; + } return static_cast(cur_sum) / static_cast(cur_num); } @@ -193,12 +199,16 @@ std::string HistogramStat::ToString() const { Percentile(99.99)); r.append(buf); r.append("------------------------------------------------------\n"); - if (cur_num == 0) return r; // all buckets are empty + if (cur_num == 0) { + return r; // all buckets are empty + } const double mult = 100.0 / cur_num; uint64_t cumulative_sum = 0; for (unsigned int b = 0; b < num_buckets_; b++) { uint64_t bucket_value = bucket_at(b); - if (bucket_value <= 0.0) continue; + if (bucket_value <= 0.0) { + continue; + } cumulative_sum += bucket_value; snprintf(buf, sizeof(buf), "%c %7" PRIu64 ", %7" PRIu64 " ] %8" PRIu64 " %7.3f%% %7.3f%% ", diff --git a/monitoring/histogram_windowing.cc b/monitoring/histogram_windowing.cc index c41ae8a03..726231a71 100644 --- a/monitoring/histogram_windowing.cc +++ b/monitoring/histogram_windowing.cc @@ -34,7 +34,7 @@ HistogramWindowingImpl::HistogramWindowingImpl(uint64_t num_windows, Clear(); } -HistogramWindowingImpl::~HistogramWindowingImpl() {} +HistogramWindowingImpl::~HistogramWindowingImpl() = default; void HistogramWindowingImpl::Clear() { std::lock_guard lock(mutex_); @@ -159,7 +159,9 @@ void HistogramWindowingImpl::SwapHistoryBucket() { for (unsigned int i = 0; i < num_windows_; i++) { if (i != next_window) { uint64_t m = window_stats_[i].min(); - if (m < new_min) new_min = m; + if (m < new_min) { + new_min = m; + } } } stats_.min_.store(new_min, std::memory_order_relaxed); @@ -170,7 +172,9 @@ void HistogramWindowingImpl::SwapHistoryBucket() { for (unsigned int i = 0; i < num_windows_; i++) { if (i != next_window) { uint64_t m = window_stats_[i].max(); - if (m > new_max) new_max = m; + if (m > new_max) { + new_max = m; + } } } stats_.max_.store(new_max, std::memory_order_relaxed); diff --git a/monitoring/in_memory_stats_history.cc b/monitoring/in_memory_stats_history.cc index 568d8ec13..7b7c8db83 100644 --- a/monitoring/in_memory_stats_history.cc +++ b/monitoring/in_memory_stats_history.cc @@ -12,7 +12,7 @@ namespace ROCKSDB_NAMESPACE { -InMemoryStatsHistoryIterator::~InMemoryStatsHistoryIterator() {} +InMemoryStatsHistoryIterator::~InMemoryStatsHistoryIterator() = default; bool InMemoryStatsHistoryIterator::Valid() const { return valid_; } diff --git a/monitoring/perf_level.cc b/monitoring/perf_level.cc index e3507624b..9ba4e0163 100644 --- a/monitoring/perf_level.cc +++ b/monitoring/perf_level.cc @@ -4,7 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). // -#include +#include #include "monitoring/perf_level_imp.h" diff --git a/monitoring/persistent_stats_history.cc b/monitoring/persistent_stats_history.cc index f4c022148..964fe536f 100644 --- a/monitoring/persistent_stats_history.cc +++ b/monitoring/persistent_stats_history.cc @@ -74,7 +74,7 @@ void OptimizeForPersistentStats(ColumnFamilyOptions* cfo) { cfo->compression = kNoCompression; } -PersistentStatsHistoryIterator::~PersistentStatsHistoryIterator() {} +PersistentStatsHistoryIterator::~PersistentStatsHistoryIterator() = default; bool PersistentStatsHistoryIterator::Valid() const { return valid_; } @@ -96,7 +96,7 @@ std::pair parseKey(const Slice& key, uint64_t start_time) { std::pair result; std::string key_str = key.ToString(); - std::string::size_type pos = key_str.find("#"); + std::string::size_type pos = key_str.find('#'); // TODO(Zhongyi): add counters to track parse failures? if (pos == std::string::npos) { result.first = std::numeric_limits::max(); diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index ebfd44300..d0a2a71a8 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -379,7 +379,7 @@ StatisticsImpl::StatisticsImpl(std::shared_ptr stats) RegisterOptions("StatisticsOptions", &stats_, &stats_type_info); } -StatisticsImpl::~StatisticsImpl() {} +StatisticsImpl::~StatisticsImpl() = default; uint64_t StatisticsImpl::getTickerCount(uint32_t tickerType) const { MutexLock lock(&aggregate_lock_); @@ -538,7 +538,9 @@ std::string StatisticsImpl::ToString() const { bool StatisticsImpl::getTickerMap( std::map* stats_map) const { assert(stats_map); - if (!stats_map) return false; + if (!stats_map) { + return false; + } stats_map->clear(); MutexLock lock(&aggregate_lock_); for (const auto& t : TickersNameMap) { diff --git a/monitoring/thread_status_impl.cc b/monitoring/thread_status_impl.cc index 9619dfd81..153753682 100644 --- a/monitoring/thread_status_impl.cc +++ b/monitoring/thread_status_impl.cc @@ -67,7 +67,7 @@ const std::string ThreadStatus::MicrosToString(uint64_t micros) { const std::string& ThreadStatus::GetOperationPropertyName( ThreadStatus::OperationType op_type, int i) { - static const std::string empty_str = ""; + static const std::string empty_str; switch (op_type) { case ThreadStatus::OP_COMPACTION: if (i >= NUM_COMPACTION_PROPERTIES) { diff --git a/options/cf_options.cc b/options/cf_options.cc index f026b0bc9..2ca826c98 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -417,7 +417,7 @@ static std::unordered_map // value, say, like "23", which would be assigned to // max_table_files_size. if (name == "compaction_options_fifo" && - value.find("=") == std::string::npos) { + value.find('=') == std::string::npos) { // Old format. Parse just a single uint64_t value. auto options = static_cast(addr); options->max_table_files_size = ParseUint64(value); @@ -529,7 +529,7 @@ static std::unordered_map // This is to handle backward compatibility, where // compression_options was a ":" separated list. if (name == kOptNameCompOpts && - value.find("=") == std::string::npos) { + value.find('=') == std::string::npos) { auto* compression = static_cast(addr); return ParseCompressionOptions(value, name, *compression); } else { @@ -549,7 +549,7 @@ static std::unordered_map // This is to handle backward compatibility, where // compression_options was a ":" separated list. if (name == kOptNameBMCompOpts && - value.find("=") == std::string::npos) { + value.find('=') == std::string::npos) { auto* compression = static_cast(addr); return ParseCompressionOptions(value, name, *compression); } else { @@ -627,7 +627,7 @@ static std::unordered_map {offsetof(struct ImmutableCFOptions, max_write_buffer_number_to_maintain), OptionType::kInt, OptionVerificationType::kNormal, - OptionTypeFlags::kNone, 0}}, + OptionTypeFlags::kNone, nullptr}}, {"max_write_buffer_size_to_maintain", {offsetof(struct ImmutableCFOptions, max_write_buffer_size_to_maintain), @@ -636,7 +636,7 @@ static std::unordered_map {"min_write_buffer_number_to_merge", {offsetof(struct ImmutableCFOptions, min_write_buffer_number_to_merge), OptionType::kInt, OptionVerificationType::kNormal, - OptionTypeFlags::kNone, 0}}, + OptionTypeFlags::kNone, nullptr}}, {"num_levels", {offsetof(struct ImmutableCFOptions, num_levels), OptionType::kInt, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, diff --git a/options/configurable.cc b/options/configurable.cc index 5491336e0..134de99a2 100644 --- a/options/configurable.cc +++ b/options/configurable.cc @@ -37,9 +37,9 @@ Status Configurable::PrepareOptions(const ConfigOptions& opts) { // We ignore the invoke_prepare_options here intentionally, // as if you are here, you must have called PrepareOptions explicitly. Status status = Status::OK(); - for (auto opt_iter : options_) { + for (const auto& opt_iter : options_) { if (opt_iter.type_map != nullptr) { - for (auto map_iter : *(opt_iter.type_map)) { + for (const auto& map_iter : *(opt_iter.type_map)) { auto& opt_info = map_iter.second; if (opt_info.ShouldPrepare()) { status = opt_info.Prepare(opts, map_iter.first, opt_iter.opt_ptr); @@ -56,9 +56,9 @@ Status Configurable::PrepareOptions(const ConfigOptions& opts) { Status Configurable::ValidateOptions(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const { Status status; - for (auto opt_iter : options_) { + for (const auto& opt_iter : options_) { if (opt_iter.type_map != nullptr) { - for (auto map_iter : *(opt_iter.type_map)) { + for (const auto& map_iter : *(opt_iter.type_map)) { auto& opt_info = map_iter.second; if (opt_info.ShouldValidate()) { status = opt_info.Validate(db_opts, cf_opts, map_iter.first, @@ -80,7 +80,7 @@ Status Configurable::ValidateOptions(const DBOptions& db_opts, /*********************************************************************************/ const void* Configurable::GetOptionsPtr(const std::string& name) const { - for (auto o : options_) { + for (const auto& o : options_) { if (o.name == name) { return o.opt_ptr; } @@ -95,7 +95,7 @@ std::string Configurable::GetOptionName(const std::string& opt_name) const { const OptionTypeInfo* ConfigurableHelper::FindOption( const std::vector& options, const std::string& short_name, std::string* opt_name, void** opt_ptr) { - for (auto iter : options) { + for (const auto& iter : options) { if (iter.type_map != nullptr) { const auto opt_info = OptionTypeInfo::Find(short_name, *(iter.type_map), opt_name); @@ -318,21 +318,29 @@ Status ConfigurableHelper::ConfigureSomeOptions( } // End while found one or options remain // Now that we have been through the list, remove any unsupported - for (auto u : unsupported) { + for (const auto& u : unsupported) { auto it = options->find(u); if (it != options->end()) { options->erase(it); } } if (config_options.ignore_unknown_options) { - if (!result.ok()) result.PermitUncheckedError(); - if (!notsup.ok()) notsup.PermitUncheckedError(); + if (!result.ok()) { + result.PermitUncheckedError(); + } + if (!notsup.ok()) { + notsup.PermitUncheckedError(); + } return Status::OK(); } else if (!result.ok()) { - if (!notsup.ok()) notsup.PermitUncheckedError(); + if (!notsup.ok()) { + notsup.PermitUncheckedError(); + } return result; } else if (config_options.ignore_unsupported_options) { - if (!notsup.ok()) notsup.PermitUncheckedError(); + if (!notsup.ok()) { + notsup.PermitUncheckedError(); + } return Status::OK(); } else { return notsup; @@ -374,7 +382,7 @@ Status ConfigurableHelper::ConfigureCustomizableOption( return Status::OK(); } else if (custom == nullptr || !StartsWith(name, custom->GetId() + ".")) { return configurable.ParseOption(copy, opt_info, name, value, opt_ptr); - } else if (value.find("=") != std::string::npos) { + } else if (value.find('=') != std::string::npos) { return custom->ConfigureFromString(copy, value); } else { return custom->ConfigureOption(copy, name, value); diff --git a/options/configurable_test.cc b/options/configurable_test.cc index a03d8f0a5..9284e8622 100644 --- a/options/configurable_test.cc +++ b/options/configurable_test.cc @@ -436,7 +436,7 @@ TEST_F(ConfigurableTest, AliasOptionsTest) { OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, {"alias", {offsetof(struct TestOptions, b), OptionType::kBoolean, - OptionVerificationType::kAlias, OptionTypeFlags::kNone, 0}}}; + OptionVerificationType::kAlias, OptionTypeFlags::kNone, nullptr}}}; std::unique_ptr orig; orig.reset(SimpleConfigurable::Create("simple", TestConfigMode::kDefaultMode, &alias_option_info)); @@ -758,7 +758,7 @@ void ConfigurableParamTest::TestConfigureOptions( ASSERT_OK(base->GetOptionNames(config_options, &names)); std::unordered_map unused; bool found_one = false; - for (auto name : names) { + for (const auto& name : names) { std::string value; Status s = base->GetOption(config_options, name, &value); if (s.ok()) { diff --git a/options/configurable_test.h b/options/configurable_test.h index 3d6fe8410..7acac776e 100644 --- a/options/configurable_test.h +++ b/options/configurable_test.h @@ -33,8 +33,8 @@ struct TestOptions { bool b = false; bool d = true; TestEnum e = TestEnum::kTestA; - std::string s = ""; - std::string u = ""; + std::string s; + std::string u; }; static std::unordered_map simple_option_info = { diff --git a/options/customizable_test.cc b/options/customizable_test.cc index 0e614ed16..f59724656 100644 --- a/options/customizable_test.cc +++ b/options/customizable_test.cc @@ -1265,7 +1265,7 @@ class TestStatistics : public StatisticsImpl { class TestFlushBlockPolicyFactory : public FlushBlockPolicyFactory { public: - TestFlushBlockPolicyFactory() {} + TestFlushBlockPolicyFactory() = default; static const char* kClassName() { return "TestFlushBlockPolicyFactory"; } const char* Name() const override { return kClassName(); } diff --git a/options/db_options.cc b/options/db_options.cc index ca72404dd..2d213f13f 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -995,8 +995,7 @@ MutableDBOptions::MutableDBOptions() wal_bytes_per_sync(0), strict_bytes_per_sync(false), compaction_readahead_size(0), - max_background_flushes(-1), - daily_offpeak_time_utc("") {} + max_background_flushes(-1) {} MutableDBOptions::MutableDBOptions(const DBOptions& options) : max_background_jobs(options.max_background_jobs), diff --git a/options/options.cc b/options/options.cc index 03289e5b6..d96cf4072 100644 --- a/options/options.cc +++ b/options/options.cc @@ -127,7 +127,7 @@ ColumnFamilyOptions::ColumnFamilyOptions() ColumnFamilyOptions::ColumnFamilyOptions(const Options& options) : ColumnFamilyOptions(*static_cast(&options)) {} -DBOptions::DBOptions() {} +DBOptions::DBOptions() = default; DBOptions::DBOptions(const Options& options) : DBOptions(*static_cast(&options)) {} diff --git a/options/options_helper.cc b/options/options_helper.cc index 0c76d0315..fa5d549c1 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -46,7 +46,9 @@ Status ValidateOptions(const DBOptions& db_opts, auto db_cfg = DBOptionsAsConfigurable(db_opts); auto cf_cfg = CFOptionsAsConfigurable(cf_opts); s = db_cfg->ValidateOptions(db_opts, cf_opts); - if (s.ok()) s = cf_cfg->ValidateOptions(db_opts, cf_opts); + if (s.ok()) { + s = cf_cfg->ValidateOptions(db_opts, cf_opts); + } return s; } @@ -912,7 +914,7 @@ Status OptionTypeInfo::Parse(const ConfigOptions& config_options, ConfigOptions copy = config_options; copy.ignore_unknown_options = false; copy.invoke_prepare_options = false; - if (opt_value.find("=") != std::string::npos) { + if (opt_value.find('=') != std::string::npos) { return config->ConfigureFromString(copy, opt_value); } else { return config->ConfigureOption(copy, opt_name, opt_value); @@ -1047,7 +1049,7 @@ Status OptionTypeInfo::Serialize(const ConfigOptions& config_options, } std::string value = custom->ToString(embedded); if (!embedded.mutable_options_only || - value.find("=") != std::string::npos) { + value.find('=') != std::string::npos) { *opt_value = value; } else { *opt_value = ""; @@ -1423,7 +1425,7 @@ const OptionTypeInfo* OptionTypeInfo::Find( *elem_name = opt_name; // Return the name return &(iter->second); // Return the contents of the iterator } else { - auto idx = opt_name.find("."); // Look for a separator + auto idx = opt_name.find('.'); // Look for a separator if (idx > 0 && idx != std::string::npos) { // We found a separator auto siter = opt_map.find(opt_name.substr(0, idx)); // Look for the short name diff --git a/options/options_parser.cc b/options/options_parser.cc index a8c855d6e..e2431016d 100644 --- a/options/options_parser.cc +++ b/options/options_parser.cc @@ -179,8 +179,8 @@ Status RocksDBOptionsParser::ParseSection(OptionSection* section, *section = kOptionSectionUnknown; // A section is of the form [ ""], where // "" is optional. - size_t arg_start_pos = line.find("\""); - size_t arg_end_pos = line.rfind("\""); + size_t arg_start_pos = line.find('\"'); + size_t arg_end_pos = line.rfind('\"'); // The following if-then check tries to identify whether the input // section has the optional section argument. if (arg_start_pos != std::string::npos && arg_start_pos != arg_end_pos) { @@ -224,7 +224,7 @@ Status RocksDBOptionsParser::ParseStatement(std::string* name, std::string* value, const std::string& line, const int line_num) { - size_t eq_pos = line.find("="); + size_t eq_pos = line.find('='); if (eq_pos == std::string::npos) { return InvalidArgument(line_num, "A valid statement must have a '='."); } diff --git a/options/options_test.cc b/options/options_test.cc index a70d479dd..6420ebf46 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -49,7 +49,7 @@ class OptionsTest : public testing::Test {}; class UnregisteredTableFactory : public TableFactory { public: - UnregisteredTableFactory() {} + UnregisteredTableFactory() = default; const char* Name() const override { return "Unregistered"; } using TableFactory::NewTableReader; Status NewTableReader(const ReadOptions&, const TableReaderOptions&, @@ -1888,7 +1888,7 @@ TEST_F(OptionsTest, StringToMapRandomTest) { "a={aa={};tt={xxx={}}};c=defff;d={{}yxx{}3{xx}}", "abc={{}{}{}{{{}}}{{}{}{}{}{}{}{}"}; - for (std::string base : bases) { + for (const std::string& base : bases) { for (int rand_seed = 301; rand_seed < 401; rand_seed++) { Random rnd(rand_seed); for (int attempt = 0; attempt < 10; attempt++) { @@ -1909,7 +1909,7 @@ TEST_F(OptionsTest, StringToMapRandomTest) { for (int rand_seed = 301; rand_seed < 1301; rand_seed++) { Random rnd(rand_seed); int len = rnd.Uniform(30); - std::string str = ""; + std::string str; for (int attempt = 0; attempt < len; attempt++) { // Add a random character size_t pos = static_cast( @@ -3554,7 +3554,7 @@ TEST_F(OptionsParserTest, ParseVersion) { "3..2", ".", ".1.2", // must have at least one digit before each dot "1.2.", "1.", "2.34."}; // must have at least one digit after each dot - for (auto iv : invalid_versions) { + for (const auto& iv : invalid_versions) { snprintf(buffer, kLength - 1, file_template.c_str(), iv.c_str()); parser.Reset(); @@ -3564,7 +3564,7 @@ TEST_F(OptionsParserTest, ParseVersion) { const std::vector valid_versions = { "1.232", "100", "3.12", "1", "12.3 ", " 1.25 "}; - for (auto vv : valid_versions) { + for (const auto& vv : valid_versions) { snprintf(buffer, kLength - 1, file_template.c_str(), vv.c_str()); parser.Reset(); ASSERT_OK(fs_->WriteToNewFile(vv, buffer)); @@ -4643,42 +4643,42 @@ TEST_F(OptionTypeInfoTest, TestCustomEnum) { TEST_F(OptionTypeInfoTest, TestBuiltinEnum) { ConfigOptions config_options; - for (auto iter : OptionsHelper::compaction_style_string_map) { + for (const auto& iter : OptionsHelper::compaction_style_string_map) { CompactionStyle e1, e2; TestParseAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompactionStyle), "CompactionStyle", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } - for (auto iter : OptionsHelper::compaction_pri_string_map) { + for (const auto& iter : OptionsHelper::compaction_pri_string_map) { CompactionPri e1, e2; TestParseAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompactionPri), "CompactionPri", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } - for (auto iter : OptionsHelper::compression_type_string_map) { + for (const auto& iter : OptionsHelper::compression_type_string_map) { CompressionType e1, e2; TestParseAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kCompressionType), "CompressionType", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } - for (auto iter : OptionsHelper::compaction_stop_style_string_map) { + for (const auto& iter : OptionsHelper::compaction_stop_style_string_map) { CompactionStopStyle e1, e2; TestParseAndCompareOption( config_options, OptionTypeInfo(0, OptionType::kCompactionStopStyle), "CompactionStopStyle", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } - for (auto iter : OptionsHelper::checksum_type_string_map) { + for (const auto& iter : OptionsHelper::checksum_type_string_map) { ChecksumType e1, e2; TestParseAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kChecksumType), "CheckSumType", iter.first, &e1, &e2); ASSERT_EQ(e1, iter.second); } - for (auto iter : OptionsHelper::encoding_type_string_map) { + for (const auto& iter : OptionsHelper::encoding_type_string_map) { EncodingType e1, e2; TestParseAndCompareOption(config_options, OptionTypeInfo(0, OptionType::kEncodingType), diff --git a/port/port_posix.cc b/port/port_posix.cc index 749ad5d60..713fecb6a 100644 --- a/port/port_posix.cc +++ b/port/port_posix.cc @@ -11,20 +11,20 @@ #include "port/port_posix.h" -#include +#include #if defined(__i386__) || defined(__x86_64__) #include #endif -#include #include -#include -#include -#include #include #include #include +#include +#include +#include #include +#include #include #include diff --git a/port/stack_trace.cc b/port/stack_trace.cc index 9ef51bebb..5ce459ba8 100644 --- a/port/stack_trace.cc +++ b/port/stack_trace.cc @@ -26,12 +26,13 @@ void* SaveStack(int* /*num_frames*/, int /*first_frames_to_skip*/) { #include #include #include -#include -#include -#include -#include #include +#include +#include +#include +#include + #ifdef OS_OPENBSD #include #include diff --git a/table/block_based/block.cc b/table/block_based/block.cc index 9bebdfbdc..bc18dd926 100644 --- a/table/block_based/block.cc +++ b/table/block_based/block.cc @@ -49,8 +49,12 @@ struct DecodeEntry { // Fast path: all three values are encoded in one byte each p += 3; } else { - if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; - if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; + if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) { + return nullptr; + } + if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) { + return nullptr; + } if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) { return nullptr; } @@ -82,8 +86,12 @@ struct CheckAndDecodeEntry { // Fast path: all three values are encoded in one byte each p += 3; } else { - if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; - if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; + if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) { + return nullptr; + } + if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) { + return nullptr; + } if ((p = GetVarint32Ptr(p, limit, value_length)) == nullptr) { return nullptr; } @@ -113,15 +121,21 @@ struct DecodeKeyV4 { // We need 2 bytes for shared and non_shared size. We also need one more // byte either for value size or the actual value in case of value delta // encoding. - if (limit - p < 3) return nullptr; + if (limit - p < 3) { + return nullptr; + } *shared = reinterpret_cast(p)[0]; *non_shared = reinterpret_cast(p)[1]; if ((*shared | *non_shared) < 128) { // Fast path: all three values are encoded in one byte each p += 2; } else { - if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) return nullptr; - if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) return nullptr; + if ((p = GetVarint32Ptr(p, limit, shared)) == nullptr) { + return nullptr; + } + if ((p = GetVarint32Ptr(p, limit, non_shared)) == nullptr) { + return nullptr; + } } return p; } @@ -140,7 +154,9 @@ struct DecodeEntryV4 { void DataBlockIter::NextImpl() { #ifndef NDEBUG - if (TEST_Corrupt_Callback("DataBlockIter::NextImpl")) return; + if (TEST_Corrupt_Callback("DataBlockIter::NextImpl")) { + return; + } #endif bool is_shared = false; ParseNextDataKey(&is_shared); @@ -446,7 +462,9 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) { void IndexBlockIter::SeekImpl(const Slice& target) { #ifndef NDEBUG - if (TEST_Corrupt_Callback("IndexBlockIter::SeekImpl")) return; + if (TEST_Corrupt_Callback("IndexBlockIter::SeekImpl")) { + return; + } #endif TEST_SYNC_POINT("IndexBlockIter::Seek:0"); PERF_TIMER_GUARD(block_seek_nanos); @@ -560,7 +578,9 @@ void MetaBlockIter::SeekToFirstImpl() { void IndexBlockIter::SeekToFirstImpl() { #ifndef NDEBUG - if (TEST_Corrupt_Callback("IndexBlockIter::SeekToFirstImpl")) return; + if (TEST_Corrupt_Callback("IndexBlockIter::SeekToFirstImpl")) { + return; + } #endif if (data_ == nullptr) { // Not init yet return; @@ -910,7 +930,9 @@ bool IndexBlockIter::BinaryBlockIndexSeek(const Slice& target, // Key at "target" is <= "mid". Therefore all blocks // after "mid" are uninteresting. // If there is only one block left, we found it. - if (left == right) break; + if (left == right) { + break; + } right = mid; } } diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index e66c4939a..5712354ac 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -9,10 +9,9 @@ #include "table/block_based/block_based_table_builder.h" -#include -#include - #include +#include +#include #include #include #include @@ -231,7 +230,6 @@ class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector uint64_t /* block_compressed_bytes_slow */) override { // Intentionally left blank. No interest in collecting stats for // blocks. - return; } Status Finish(UserCollectedProperties* properties) override { @@ -985,7 +983,9 @@ BlockBasedTableBuilder::~BlockBasedTableBuilder() { void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { Rep* r = rep_; assert(rep_->state != Rep::State::kClosed); - if (!ok()) return; + if (!ok()) { + return; + } ValueType value_type = ExtractValueType(key); if (IsValueType(value_type)) { #ifndef NDEBUG @@ -1097,8 +1097,12 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) { void BlockBasedTableBuilder::Flush() { Rep* r = rep_; assert(rep_->state != Rep::State::kClosed); - if (!ok()) return; - if (r->data_block.empty()) return; + if (!ok()) { + return; + } + if (r->data_block.empty()) { + return; + } if (r->IsParallelCompressionEnabled() && r->state == Rep::State::kUnbuffered) { r->data_block.Finish(); diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 25299ecab..67b8f704b 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -9,9 +9,8 @@ #include "table/block_based/block_based_table_factory.h" -#include - #include +#include #include #include diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 69a499d32..7e605726d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -3053,7 +3053,7 @@ Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) { << " size " << blockhandles_iter->value().handle.size() << "\n"; std::string str_key = user_key.ToString(); - std::string res_key(""); + std::string res_key; char cspace = ' '; for (size_t i = 0; i < str_key.size(); i++) { res_key.append(&str_key[i], 1); @@ -3154,7 +3154,7 @@ void BlockBasedTable::DumpKeyValue(const Slice& key, const Slice& value, std::string str_key = ikey.user_key().ToString(); std::string str_value = value.ToString(); - std::string res_key(""), res_value(""); + std::string res_key, res_value; char cspace = ' '; for (size_t i = 0; i < str_key.size(); i++) { if (str_key[i] == '\0') { diff --git a/table/block_based/block_builder.cc b/table/block_based/block_builder.cc index 877df81c1..e4950e435 100644 --- a/table/block_based/block_builder.cc +++ b/table/block_based/block_builder.cc @@ -33,9 +33,8 @@ #include "table/block_based/block_builder.h" -#include - #include +#include #include "db/dbformat.h" #include "rocksdb/comparator.h" diff --git a/table/block_based/block_test.cc b/table/block_based/block_test.cc index 9082a08e9..b1a855263 100644 --- a/table/block_based/block_test.cc +++ b/table/block_based/block_test.cc @@ -6,9 +6,8 @@ #include "table/block_based/block.h" -#include - #include +#include #include #include #include diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 19b880a90..d8e5cd93f 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -72,7 +72,7 @@ class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder { detect_filter_construct_corruption_( detect_filter_construct_corruption) {} - ~XXPH3FilterBitsBuilder() override {} + ~XXPH3FilterBitsBuilder() override = default; virtual void AddKey(const Slice& key) override { uint64_t hash = GetSliceHash64(key); @@ -321,7 +321,7 @@ class FastLocalBloomBitsBuilder : public XXPH3FilterBitsBuilder { FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete; void operator=(const FastLocalBloomBitsBuilder&) = delete; - ~FastLocalBloomBitsBuilder() override {} + ~FastLocalBloomBitsBuilder() override = default; using FilterBitsBuilder::Finish; @@ -525,7 +525,7 @@ class FastLocalBloomBitsReader : public BuiltinFilterBitsReader { FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete; void operator=(const FastLocalBloomBitsReader&) = delete; - ~FastLocalBloomBitsReader() override {} + ~FastLocalBloomBitsReader() override = default; bool MayMatch(const Slice& key) override { uint64_t h = GetSliceHash64(key); @@ -606,7 +606,7 @@ class Standard128RibbonBitsBuilder : public XXPH3FilterBitsBuilder { Standard128RibbonBitsBuilder(const Standard128RibbonBitsBuilder&) = delete; void operator=(const Standard128RibbonBitsBuilder&) = delete; - ~Standard128RibbonBitsBuilder() override {} + ~Standard128RibbonBitsBuilder() override = default; using FilterBitsBuilder::Finish; @@ -967,7 +967,7 @@ class Standard128RibbonBitsReader : public BuiltinFilterBitsReader { Standard128RibbonBitsReader(const Standard128RibbonBitsReader&) = delete; void operator=(const Standard128RibbonBitsReader&) = delete; - ~Standard128RibbonBitsReader() override {} + ~Standard128RibbonBitsReader() override = default; bool MayMatch(const Slice& key) override { uint64_t h = GetSliceHash64(key); @@ -1070,7 +1070,7 @@ LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key, assert(bits_per_key_); } -LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {} +LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() = default; void LegacyBloomBitsBuilder::AddKey(const Slice& key) { uint32_t hash = BloomHash(key); @@ -1220,7 +1220,7 @@ class LegacyBloomBitsReader : public BuiltinFilterBitsReader { LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete; void operator=(const LegacyBloomBitsReader&) = delete; - ~LegacyBloomBitsReader() override {} + ~LegacyBloomBitsReader() override = default; // "contents" contains the data built by a preceding call to // FilterBitsBuilder::Finish. MayMatch must return true if the key was @@ -1359,7 +1359,7 @@ BloomLikeFilterPolicy::BloomLikeFilterPolicy(double bits_per_key) whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000; } -BloomLikeFilterPolicy::~BloomLikeFilterPolicy() {} +BloomLikeFilterPolicy::~BloomLikeFilterPolicy() = default; const char* BloomLikeFilterPolicy::kClassName() { return "rocksdb.internal.BloomLikeFilter"; } @@ -1805,7 +1805,7 @@ FilterBuildingContext::FilterBuildingContext( const BlockBasedTableOptions& _table_options) : table_options(_table_options) {} -FilterPolicy::~FilterPolicy() {} +FilterPolicy::~FilterPolicy() = default; std::shared_ptr BloomLikeFilterPolicy::Create( const std::string& name, double bits_per_key) { diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 60ff7c44f..0d7d9a599 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -259,7 +259,7 @@ void FullFilterBlockReader::MayMatch(MultiGetRange* range, bool no_io, } } - filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]); + filter_bits_reader->MayMatch(num_keys, keys.data(), may_match.data()); int i = 0; for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) { diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index 0268b7b27..154c8c090 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -23,7 +23,7 @@ namespace ROCKSDB_NAMESPACE { class TestFilterBitsBuilder : public FilterBitsBuilder { public: - explicit TestFilterBitsBuilder() {} + explicit TestFilterBitsBuilder() = default; // Add Key to filter void AddKey(const Slice& key) override { @@ -197,7 +197,7 @@ class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder { public: explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {} - ~CountUniqueFilterBitsBuilderWrapper() override {} + ~CountUniqueFilterBitsBuilderWrapper() override = default; void AddKey(const Slice& key) override { b_->AddKey(key); diff --git a/table/block_based/index_builder.cc b/table/block_based/index_builder.cc index a9e02a287..98d084b34 100644 --- a/table/block_based/index_builder.cc +++ b/table/block_based/index_builder.cc @@ -9,8 +9,7 @@ #include "table/block_based/index_builder.h" -#include - +#include #include #include #include diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index 1d6e2fced..50bb77975 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -87,7 +87,7 @@ class PartitionedFilterBlockTest table_options_.index_block_restart_interval = 3; } - ~PartitionedFilterBlockTest() override {} + ~PartitionedFilterBlockTest() override = default; static constexpr int kKeyNum = 4; static constexpr int kMissingKeyNum = 2; @@ -200,7 +200,7 @@ class PartitionedFilterBlockTest // Querying added keys const bool no_io = true; std::vector keys = PrepareKeys(keys_without_ts, kKeyNum); - for (auto key : keys) { + for (const auto& key : keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->KeyMayMatch( @@ -220,7 +220,7 @@ class PartitionedFilterBlockTest // querying missing keys std::vector missing_keys = PrepareKeys(missing_keys_without_ts, kMissingKeyNum); - for (auto key : missing_keys) { + for (const auto& key : missing_keys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); if (empty) { @@ -386,7 +386,7 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { CutABlock(pib.get(), pkeys[2]); std::unique_ptr reader( NewReader(builder.get(), pib.get())); - for (auto key : pkeys) { + for (const auto& key : pkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), @@ -400,7 +400,7 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { "p-key31"}; std::vector pnonkeys = PrepareKeys(pnonkeys_without_ts, 4 /* number_of_keys */); - for (auto key : pnonkeys) { + for (const auto& key : pnonkeys) { auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key), @@ -440,7 +440,7 @@ TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) { CutABlock(pib.get(), pkeys[4]); std::unique_ptr reader( NewReader(builder.get(), pib.get())); - for (auto key : pkeys) { + for (const auto& key : pkeys) { auto prefix = prefix_extractor->Transform(key); auto ikey = InternalKey(key, 0, ValueType::kTypeValue); const Slice ikey_slice = Slice(*ikey.rep()); diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index d738fa3df..61e444e92 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -134,7 +134,9 @@ class BlockFetcherTest : public testing::Test { std::array expected_stats_by_mode) { for (CompressionType compression_type : GetSupportedCompressions()) { bool do_compress = compression_type != kNoCompression; - if (compressed != do_compress) continue; + if (compressed != do_compress) { + continue; + } std::string compression_type_str = CompressionTypeToString(compression_type); diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 0cf6834af..b0596edac 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -5,9 +5,8 @@ #include "table/cuckoo/cuckoo_table_builder.h" -#include - #include +#include #include #include #include @@ -481,7 +480,7 @@ bool CuckooTableBuilder::MakeSpaceForKey( uint64_t bid = hash_vals[hash_cnt]; (*buckets)[static_cast(bid)].make_space_for_key_call_id = make_space_for_key_call_id; - tree.push_back(CuckooNode(bid, 0, 0)); + tree.emplace_back(bid, 0, 0); } bool null_found = false; uint32_t curr_pos = 0; @@ -507,7 +506,7 @@ bool CuckooTableBuilder::MakeSpaceForKey( } (*buckets)[static_cast(child_bucket_id)] .make_space_for_key_call_id = make_space_for_key_call_id; - tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, curr_pos)); + tree.emplace_back(child_bucket_id, curr_depth + 1, curr_pos); if ((*buckets)[static_cast(child_bucket_id)].vector_idx == kMaxVectorIdx) { null_found = true; From 06dc32ef2567188d088f56c5588cc51913c544ac Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 1 Dec 2023 11:15:17 -0800 Subject: [PATCH 312/386] internal_repo_rocksdb (435146444452818992) (#12115) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12115 Reviewed By: jowlyzhang Differential Revision: D51745742 Pulled By: ajkr fbshipit-source-id: 67000d07783b413924798dd9c1751da27e119d53 --- table/cuckoo/cuckoo_table_reader.cc | 8 +- table/cuckoo/cuckoo_table_reader_test.cc | 8 +- table/format.cc | 8 +- table/merger_test.cc | 2 +- table/mock_table.cc | 4 +- table/plain/plain_table_builder.cc | 3 +- table/plain/plain_table_factory.cc | 11 +- table/plain/plain_table_reader.cc | 10 +- table/sst_file_dumper.cc | 4 +- table/sst_file_reader.cc | 2 +- table/table_test.cc | 66 ++++--- table/two_level_iterator.cc | 4 +- table/unique_id.cc | 4 +- test_util/testutil.cc | 16 +- .../block_cache_trace_analyzer.cc | 2 +- .../block_cache_trace_analyzer_test.cc | 6 +- tools/db_bench_tool.cc | 163 +++++++++++------- tools/dump/db_dump_tool.cc | 14 +- tools/ldb_cmd.cc | 50 +++--- tools/ldb_cmd_test.cc | 18 +- tools/ldb_tool.cc | 2 +- tools/sst_dump_test.cc | 2 +- tools/sst_dump_tool.cc | 4 +- tools/trace_analyzer_test.cc | 14 +- tools/trace_analyzer_tool.cc | 14 +- trace_replay/trace_record.cc | 2 +- trace_replay/trace_replay.cc | 4 +- util/comparator.cc | 11 +- util/compression_context_cache.cc | 2 +- util/crc32c.cc | 3 +- util/dynamic_bloom_test.cc | 5 +- util/file_checksum_helper.cc | 2 +- util/file_reader_writer_test.cc | 8 +- util/filelock_test.cc | 5 +- util/random.cc | 7 +- 35 files changed, 281 insertions(+), 207 deletions(-) diff --git a/table/cuckoo/cuckoo_table_reader.cc b/table/cuckoo/cuckoo_table_reader.cc index a4479ab60..54ae6266e 100644 --- a/table/cuckoo/cuckoo_table_reader.cc +++ b/table/cuckoo/cuckoo_table_reader.cc @@ -43,7 +43,7 @@ CuckooTableReader::CuckooTableReader( identity_as_first_hash_(false), use_module_hash_(false), num_hash_func_(0), - unused_key_(""), + key_length_(0), user_key_length_(0), value_length_(0), @@ -182,7 +182,9 @@ Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/, ParsedInternalKey found_ikey; Status s = ParseInternalKey(full_key, &found_ikey, false /* log_err_key */); // TODO - if (!s.ok()) return s; + if (!s.ok()) { + return s; + } bool dont_care __attribute__((__unused__)); get_context->SaveValue(found_ikey, value, &dont_care); } @@ -213,7 +215,7 @@ class CuckooTableIterator : public InternalIterator { // No copying allowed CuckooTableIterator(const CuckooTableIterator&) = delete; void operator=(const Iterator&) = delete; - ~CuckooTableIterator() override {} + ~CuckooTableIterator() override = default; bool Valid() const override; void SeekToFirst() override; void SeekToLast() override; diff --git a/table/cuckoo/cuckoo_table_reader_test.cc b/table/cuckoo/cuckoo_table_reader_test.cc index e83baa107..d829b3630 100644 --- a/table/cuckoo/cuckoo_table_reader_test.cc +++ b/table/cuckoo/cuckoo_table_reader_test.cc @@ -249,7 +249,7 @@ TEST_F(CuckooReaderTest, WhenKeyExistsWithUint64Comparator) { fname = test::PerThreadDBPath("CuckooReaderUint64_WhenKeyExists"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i].resize(8); - memcpy(&user_keys[i][0], static_cast(&i), 8); + memcpy(user_keys[i].data(), static_cast(&i), 8); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); @@ -296,7 +296,7 @@ TEST_F(CuckooReaderTest, CheckIteratorUint64) { fname = test::PerThreadDBPath("CuckooReader_CheckIterator"); for (uint64_t i = 0; i < num_items; i++) { user_keys[i].resize(8); - memcpy(&user_keys[i][0], static_cast(&i), 8); + memcpy(user_keys[i].data(), static_cast(&i), 8); ParsedInternalKey ikey(user_keys[i], 1000, kTypeValue); AppendInternalKey(&keys[i], ikey); values[i] = "value" + NumToStr(i); @@ -425,7 +425,7 @@ void WriteFile(const std::vector& keys, const uint64_t num, ASSERT_OK(builder.status()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { // Value is just a part of key. - builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], 4)); + builder.Add(Slice(keys[key_idx]), Slice(keys[key_idx].data(), 4)); ASSERT_EQ(builder.NumEntries(), key_idx + 1); ASSERT_OK(builder.status()); } @@ -454,7 +454,7 @@ void WriteFile(const std::vector& keys, const uint64_t num, value.Reset(); value.clear(); ASSERT_OK(reader.Get(r_options, Slice(keys[i]), &get_context, nullptr)); - ASSERT_TRUE(Slice(keys[i]) == Slice(&keys[i][0], 4)); + ASSERT_TRUE(Slice(keys[i]) == Slice(keys[i].data(), 4)); } } diff --git a/table/format.cc b/table/format.cc index 27ecce547..939b7dcd6 100644 --- a/table/format.cc +++ b/table/format.cc @@ -390,7 +390,7 @@ Status Footer::DecodeFrom(Slice input, uint64_t input_offset, if (checksum_type_ != kNoChecksum && format_version_ >= 6) { std::array copy_without_checksum; std::copy_n(input.data(), kNewVersionsEncodedLength, - ©_without_checksum[0]); + copy_without_checksum.data()); EncodeFixed32(©_without_checksum[5], 0); // Clear embedded checksum computed_checksum = ComputeBuiltinChecksum(checksum_type(), copy_without_checksum.data(), @@ -518,9 +518,11 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, } else { footer_buf.reserve(Footer::kMaxEncodedLength); s = file->Read(opts, read_offset, Footer::kMaxEncodedLength, - &footer_input, &footer_buf[0], nullptr); + &footer_input, footer_buf.data(), nullptr); + } + if (!s.ok()) { + return s; } - if (!s.ok()) return s; } // Check that we actually read the whole footer from the file. It may be diff --git a/table/merger_test.cc b/table/merger_test.cc index 71dc798e5..29e433c28 100644 --- a/table/merger_test.cc +++ b/table/merger_test.cc @@ -107,7 +107,7 @@ class MergerTest : public testing::Test { } merging_iterator_.reset( - NewMergingIterator(&icomp_, &small_iterators[0], + NewMergingIterator(&icomp_, small_iterators.data(), static_cast(small_iterators.size()))); single_iterator_.reset(new VectorIterator(all_keys_, all_keys_, &icomp_)); } diff --git a/table/mock_table.cc b/table/mock_table.cc index 1823758e4..1971c00fc 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -59,7 +59,7 @@ class MockTableReader : public TableReader { std::shared_ptr GetTableProperties() const override; - ~MockTableReader() {} + ~MockTableReader() = default; private: const KVVector& table_; @@ -134,7 +134,7 @@ class MockTableBuilder : public TableBuilder { } // REQUIRES: Either Finish() or Abandon() has been called. - ~MockTableBuilder() {} + ~MockTableBuilder() = default; // Add key,value to the table being constructed. // REQUIRES: key is after any previously added key according to comparator. diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index 24dd0f97a..784ef1471 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -5,8 +5,7 @@ #include "table/plain/plain_table_builder.h" -#include - +#include #include #include #include diff --git a/table/plain/plain_table_factory.cc b/table/plain/plain_table_factory.cc index 80aa9cb8e..730aec028 100644 --- a/table/plain/plain_table_factory.cc +++ b/table/plain/plain_table_factory.cc @@ -5,8 +5,7 @@ #include "table/plain/plain_table_factory.h" -#include - +#include #include #include "db/dbformat.h" @@ -157,7 +156,7 @@ static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library, AsPattern(VectorRepFactory::kClassName(), VectorRepFactory::kNickName()), [](const std::string& uri, std::unique_ptr* guard, std::string* /*errmsg*/) { - auto colon = uri.find(":"); + auto colon = uri.find(':'); if (colon != std::string::npos) { size_t count = ParseSizeT(uri.substr(colon + 1)); guard->reset(new VectorRepFactory(count)); @@ -170,7 +169,7 @@ static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library, AsPattern(SkipListFactory::kClassName(), SkipListFactory::kNickName()), [](const std::string& uri, std::unique_ptr* guard, std::string* /*errmsg*/) { - auto colon = uri.find(":"); + auto colon = uri.find(':'); if (colon != std::string::npos) { size_t lookahead = ParseSizeT(uri.substr(colon + 1)); guard->reset(new SkipListFactory(lookahead)); @@ -184,7 +183,7 @@ static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library, [](const std::string& uri, std::unique_ptr* guard, std::string* /*errmsg*/) { // Expecting format: hash_linkedlist: - auto colon = uri.find(":"); + auto colon = uri.find(':'); if (colon != std::string::npos) { size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1)); guard->reset(NewHashLinkListRepFactory(hash_bucket_count)); @@ -198,7 +197,7 @@ static int RegisterBuiltinMemTableRepFactory(ObjectLibrary& library, [](const std::string& uri, std::unique_ptr* guard, std::string* /*errmsg*/) { // Expecting format: prefix_hash: - auto colon = uri.find(":"); + auto colon = uri.find(':'); if (colon != std::string::npos) { size_t hash_bucket_count = ParseSizeT(uri.substr(colon + 1)); guard->reset(NewHashSkipListRepFactory(hash_bucket_count)); diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index a74da1f89..b917fce34 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -454,7 +454,9 @@ Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder, ParsedInternalKey parsed_target; Status s = ParseInternalKey(target, &parsed_target, false /* log_err_key */); // TODO - if (!s.ok()) return s; + if (!s.ok()) { + return s; + } // The key is between [low, high). Do a binary search between it. while (high - low > 1) { @@ -591,7 +593,9 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target, ParsedInternalKey parsed_target; s = ParseInternalKey(target, &parsed_target, false /* log_err_key */); // TODO - if (!s.ok()) return s; + if (!s.ok()) { + return s; + } Slice found_value; while (offset < file_info_.data_end_offset) { @@ -642,7 +646,7 @@ PlainTableIterator::PlainTableIterator(PlainTableReader* table, next_offset_ = offset_ = table_->file_info_.data_end_offset; } -PlainTableIterator::~PlainTableIterator() {} +PlainTableIterator::~PlainTableIterator() = default; bool PlainTableIterator::Valid() const { return offset_ < table_->file_info_.data_end_offset && diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index d44056def..821fff5b3 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -495,7 +495,9 @@ Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num, Slice key = iter->key(); Slice value = iter->value(); ++i; - if (read_num > 0 && i > read_num) break; + if (read_num > 0 && i > read_num) { + break; + } ParsedInternalKey ikey; Status pik_status = ParseInternalKey(key, &ikey, true /* log_err_key */); diff --git a/table/sst_file_reader.cc b/table/sst_file_reader.cc index 533b7cd6a..d23c58deb 100644 --- a/table/sst_file_reader.cc +++ b/table/sst_file_reader.cc @@ -36,7 +36,7 @@ struct SstFileReader::Rep { SstFileReader::SstFileReader(const Options& options) : rep_(new Rep(options)) {} -SstFileReader::~SstFileReader() {} +SstFileReader::~SstFileReader() = default; Status SstFileReader::Open(const std::string& file_path) { auto r = rep_.get(); diff --git a/table/table_test.cc b/table/table_test.cc index e6f95243e..c3981289e 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -10,10 +10,10 @@ #include "rocksdb/table.h" #include -#include -#include #include +#include +#include #include #include #include @@ -186,7 +186,7 @@ class Constructor { public: explicit Constructor(const Comparator* cmp) : data_(stl_wrappers::LessOfComparator(cmp)) {} - virtual ~Constructor() {} + virtual ~Constructor() = default; void Add(const std::string& key, const Slice& value) { data_[key] = value.ToString(); @@ -295,8 +295,8 @@ class KeyConvertingIterator : public InternalIterator { bool arena_mode_; // No copying allowed - KeyConvertingIterator(const KeyConvertingIterator&); - void operator=(const KeyConvertingIterator&); + KeyConvertingIterator(const KeyConvertingIterator&) = delete; + void operator=(const KeyConvertingIterator&) = delete; }; // `BlockConstructor` APIs always accept/return user keys. @@ -345,7 +345,7 @@ class BlockConstructor : public Constructor { std::string data_; Block* block_; - BlockConstructor(); + BlockConstructor() = delete; }; class TableConstructor : public Constructor { @@ -487,7 +487,7 @@ class TableConstructor : public Constructor { bool convert_to_internal_key_; int level_; - TableConstructor(); + TableConstructor() = delete; static uint64_t cur_file_num_; EnvOptions soptions; @@ -930,13 +930,17 @@ class HarnessTest : public testing::Test { InternalIterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); stl_wrappers::KVMap::const_iterator model_iter = data.begin(); - if (kVerbose) fprintf(stderr, "---\n"); + if (kVerbose) { + fprintf(stderr, "---\n"); + } for (int i = 0; i < 200; i++) { const int toss = rnd->Uniform(support_prev_ ? 5 : 3); switch (toss) { case 0: { if (iter->Valid()) { - if (kVerbose) fprintf(stderr, "Next\n"); + if (kVerbose) { + fprintf(stderr, "Next\n"); + } iter->Next(); ASSERT_OK(iter->status()); ++model_iter; @@ -946,7 +950,9 @@ class HarnessTest : public testing::Test { } case 1: { - if (kVerbose) fprintf(stderr, "SeekToFirst\n"); + if (kVerbose) { + fprintf(stderr, "SeekToFirst\n"); + } iter->SeekToFirst(); ASSERT_OK(iter->status()); model_iter = data.begin(); @@ -957,8 +963,9 @@ class HarnessTest : public testing::Test { case 2: { std::string key = PickRandomKey(rnd, keys); model_iter = data.lower_bound(key); - if (kVerbose) + if (kVerbose) { fprintf(stderr, "Seek '%s'\n", EscapeString(key).c_str()); + } iter->Seek(Slice(key)); ASSERT_OK(iter->status()); ASSERT_EQ(ToString(data, model_iter), ToString(iter)); @@ -967,7 +974,9 @@ class HarnessTest : public testing::Test { case 3: { if (iter->Valid()) { - if (kVerbose) fprintf(stderr, "Prev\n"); + if (kVerbose) { + fprintf(stderr, "Prev\n"); + } iter->Prev(); ASSERT_OK(iter->status()); if (model_iter == data.begin()) { @@ -981,7 +990,9 @@ class HarnessTest : public testing::Test { } case 4: { - if (kVerbose) fprintf(stderr, "SeekToLast\n"); + if (kVerbose) { + fprintf(stderr, "SeekToLast\n"); + } iter->SeekToLast(); ASSERT_OK(iter->status()); if (keys.empty()) { @@ -1253,7 +1264,7 @@ class FileChecksumTestHelper { public: FileChecksumTestHelper(bool convert_to_internal_key = false) : convert_to_internal_key_(convert_to_internal_key) {} - ~FileChecksumTestHelper() {} + ~FileChecksumTestHelper() = default; void CreateWritableFile() { sink_ = new test::StringSink(); @@ -1437,7 +1448,7 @@ TestIds GetUniqueId(TableProperties* tp, std::unordered_set* seen, std::string euid; EXPECT_OK(GetExtendedUniqueIdFromTableProperties(*tp, &euid)); EXPECT_EQ(euid.size(), 24U); - t.external_id[0] = DecodeFixed64(&euid[0]); + t.external_id[0] = DecodeFixed64(euid.data()); t.external_id[1] = DecodeFixed64(&euid[8]); t.external_id[2] = DecodeFixed64(&euid[16]); @@ -1445,7 +1456,7 @@ TestIds GetUniqueId(TableProperties* tp, std::unordered_set* seen, EXPECT_OK(GetUniqueIdFromTableProperties(*tp, &uid)); EXPECT_EQ(uid.size(), 16U); EXPECT_EQ(uid, euid.substr(0, 16)); - EXPECT_EQ(t.external_id[0], DecodeFixed64(&uid[0])); + EXPECT_EQ(t.external_id[0], DecodeFixed64(uid.data())); EXPECT_EQ(t.external_id[1], DecodeFixed64(&uid[8])); } // All these should be effectively random @@ -1930,19 +1941,19 @@ void AssertKeysInCache(BlockBasedTable* table_reader, const std::vector& keys_not_in_cache, bool convert = false) { if (convert) { - for (auto key : keys_in_cache) { + for (const auto& key : keys_in_cache) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); } - for (auto key : keys_not_in_cache) { + for (const auto& key : keys_not_in_cache) { InternalKey ikey(key, kMaxSequenceNumber, kTypeValue); ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode())); } } else { - for (auto key : keys_in_cache) { + for (const auto& key : keys_in_cache) { ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key)); } - for (auto key : keys_not_in_cache) { + for (const auto& key : keys_not_in_cache) { ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key)); } } @@ -3246,8 +3257,8 @@ TEST_P(BlockBasedTableTest, TracingMultiGetTest) { std::vector get_contexts; get_contexts.emplace_back( options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, - ukeys[0], &values[0], nullptr, nullptr, nullptr, true, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, get_id_offset); + ukeys[0], values.data(), nullptr, nullptr, nullptr, true, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, get_id_offset); get_contexts.emplace_back( options.comparator, nullptr, nullptr, nullptr, GetContext::kNotFound, ukeys[1], &values[1], nullptr, nullptr, nullptr, true, nullptr, nullptr, @@ -3258,12 +3269,12 @@ TEST_P(BlockBasedTableTest, TracingMultiGetTest) { std::array statuses; autovector key_context; key_context.emplace_back(/*ColumnFamilyHandle omitted*/ nullptr, ukeys[0], - &values[0], + values.data(), /*PinnableWideColumns omitted*/ nullptr, - /*timestamp omitted*/ nullptr, &statuses[0]); + /*timestamp omitted*/ nullptr, statuses.data()); key_context[0].ukey_without_ts = ukeys[0]; key_context[0].ikey = encoded_keys[0]; - key_context[0].get_context = &get_contexts[0]; + key_context[0].get_context = get_contexts.data(); key_context.emplace_back(/*ColumnFamilyHandle omitted*/ nullptr, ukeys[1], &values[1], /*PinnableWideColumns omitted*/ nullptr, @@ -4660,14 +4671,15 @@ TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) { class PrefixTest : public testing::Test { public: PrefixTest() : testing::Test() {} - ~PrefixTest() override {} + ~PrefixTest() override = default; }; namespace { // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform { public: - ~TestPrefixExtractor() override{}; + ~TestPrefixExtractor() override = default; + ; const char* Name() const override { return "TestPrefixExtractor"; } ROCKSDB_NAMESPACE::Slice Transform( diff --git a/table/two_level_iterator.cc b/table/two_level_iterator.cc index 4b6634e5c..c66a94fb5 100644 --- a/table/two_level_iterator.cc +++ b/table/two_level_iterator.cc @@ -70,7 +70,9 @@ class TwoLevelIndexIterator : public InternalIteratorBase { private: void SaveError(const Status& s) { - if (status_.ok() && !s.ok()) status_ = s; + if (status_.ok() && !s.ok()) { + status_ = s; + } } void SkipEmptyDataBlocksForward(); void SkipEmptyDataBlocksBackward(); diff --git a/table/unique_id.cc b/table/unique_id.cc index fcdd75650..8bfa8bcfd 100644 --- a/table/unique_id.cc +++ b/table/unique_id.cc @@ -14,7 +14,7 @@ namespace ROCKSDB_NAMESPACE { std::string EncodeSessionId(uint64_t upper, uint64_t lower) { std::string db_session_id(20U, '\0'); - char *buf = &db_session_id[0]; + char *buf = db_session_id.data(); // Preserving `lower` is slightly tricky. 36^12 is slightly more than // 62 bits, so we use 12 chars plus the bottom two bits of one more. // (A tiny fraction of 20 digit strings go unused.) @@ -152,7 +152,7 @@ void ExternalUniqueIdToInternal(UniqueIdPtr in_out) { std::string EncodeUniqueIdBytes(UniqueIdPtr in) { std::string ret(in.extended ? 24U : 16U, '\0'); - EncodeFixed64(&ret[0], in.ptr[0]); + EncodeFixed64(ret.data(), in.ptr[0]); EncodeFixed64(&ret[8], in.ptr[1]); if (in.extended) { EncodeFixed64(&ret[16], in.ptr[2]); diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 1e771f4fd..ce221e79b 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -94,7 +94,9 @@ bool ShouldPersistUDT(const UserDefinedTimestampTestMode& test_mode) { extern Slice CompressibleString(Random* rnd, double compressed_fraction, int len, std::string* dst) { int raw = static_cast(len * compressed_fraction); - if (raw < 1) raw = 1; + if (raw < 1) { + raw = 1; + } std::string raw_data = rnd->RandomBinaryString(raw); // Duplicate the random data until we have filled "len" bytes @@ -109,7 +111,7 @@ extern Slice CompressibleString(Random* rnd, double compressed_fraction, namespace { class Uint64ComparatorImpl : public Comparator { public: - Uint64ComparatorImpl() {} + Uint64ComparatorImpl() = default; const char* Name() const override { return "rocksdb.Uint64Comparator"; } @@ -131,11 +133,9 @@ class Uint64ComparatorImpl : public Comparator { } void FindShortestSeparator(std::string* /*start*/, - const Slice& /*limit*/) const override { - return; - } + const Slice& /*limit*/) const override {} - void FindShortSuccessor(std::string* /*key*/) const override { return; } + void FindShortSuccessor(std::string* /*key*/) const override {} }; } // namespace @@ -632,7 +632,7 @@ class SpecialMemTableRep : public MemTableRep { return memtable_->GetIterator(arena); } - virtual ~SpecialMemTableRep() override {} + virtual ~SpecialMemTableRep() override = default; private: std::unique_ptr memtable_; @@ -647,7 +647,7 @@ class SpecialSkipListFactory : public MemTableRepFactory { .AddNumber(":"), [](const std::string& uri, std::unique_ptr* guard, std::string* /* errmsg */) { - auto colon = uri.find(":"); + auto colon = uri.find(':'); if (colon != std::string::npos) { auto count = ParseInt(uri.substr(colon + 1)); guard->reset(new SpecialSkipListFactory(count)); diff --git a/tools/block_cache_analyzer/block_cache_trace_analyzer.cc b/tools/block_cache_analyzer/block_cache_trace_analyzer.cc index f2d4f05be..e6473191d 100644 --- a/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +++ b/tools/block_cache_analyzer/block_cache_trace_analyzer.cc @@ -577,7 +577,7 @@ void BlockCacheTraceAnalyzer::WriteSkewness( std::map> label_bucket_naccesses; std::vector> pairs; for (auto const& itr : label_naccesses) { - pairs.push_back(itr); + pairs.emplace_back(itr); } // Sort in descending order. sort(pairs.begin(), pairs.end(), diff --git a/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc b/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc index 174565641..77a6d1b2b 100644 --- a/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +++ b/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc @@ -492,7 +492,7 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) { ASSERT_EQ(20, ParseDouble(percent)); } ASSERT_EQ(expected_callers.size(), callers.size()); - for (auto caller : callers) { + for (const auto& caller : callers) { ASSERT_TRUE(expected_callers.find(caller) != expected_callers.end()); } ASSERT_OK(env_->DeleteFile(percent_access_summary_file)); @@ -504,7 +504,7 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) { std::string caller; ASSERT_TRUE(getline(analyzing_callers, caller, ',')); std::vector breakdowns{"level", "bt"}; - for (auto breakdown : breakdowns) { + for (const auto& breakdown : breakdowns) { const std::string file_name = test_path_ + "/" + caller + "_" + breakdown + "_percentage_of_accesses_summary"; @@ -554,7 +554,7 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) { } for (auto const& access_type : access_types) { std::vector block_types{"Index", "Data", "Filter"}; - for (auto block_type : block_types) { + for (const auto& block_type : block_types) { // Validate reuse block timeline. const std::string reuse_blocks_timeline = test_path_ + "/" + block_type + "_" + access_type + diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index dc595781d..e177934b0 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -15,9 +15,10 @@ #include #endif #include -#include -#include #include + +#include +#include #ifdef __APPLE__ #include #include @@ -1258,23 +1259,23 @@ static enum ROCKSDB_NAMESPACE::CompressionType StringToCompressionType( const char* ctype) { assert(ctype); - if (!strcasecmp(ctype, "none")) + if (!strcasecmp(ctype, "none")) { return ROCKSDB_NAMESPACE::kNoCompression; - else if (!strcasecmp(ctype, "snappy")) + } else if (!strcasecmp(ctype, "snappy")) { return ROCKSDB_NAMESPACE::kSnappyCompression; - else if (!strcasecmp(ctype, "zlib")) + } else if (!strcasecmp(ctype, "zlib")) { return ROCKSDB_NAMESPACE::kZlibCompression; - else if (!strcasecmp(ctype, "bzip2")) + } else if (!strcasecmp(ctype, "bzip2")) { return ROCKSDB_NAMESPACE::kBZip2Compression; - else if (!strcasecmp(ctype, "lz4")) + } else if (!strcasecmp(ctype, "lz4")) { return ROCKSDB_NAMESPACE::kLZ4Compression; - else if (!strcasecmp(ctype, "lz4hc")) + } else if (!strcasecmp(ctype, "lz4hc")) { return ROCKSDB_NAMESPACE::kLZ4HCCompression; - else if (!strcasecmp(ctype, "xpress")) + } else if (!strcasecmp(ctype, "xpress")) { return ROCKSDB_NAMESPACE::kXpressCompression; - else if (!strcasecmp(ctype, "zstd")) + } else if (!strcasecmp(ctype, "zstd")) { return ROCKSDB_NAMESPACE::kZSTD; - else { + } else { fprintf(stderr, "Cannot parse compression type '%s'\n", ctype); exit(1); } @@ -1284,15 +1285,15 @@ static enum ROCKSDB_NAMESPACE::TieredAdmissionPolicy StringToAdmissionPolicy( const char* policy) { assert(policy); - if (!strcasecmp(policy, "auto")) + if (!strcasecmp(policy, "auto")) { return ROCKSDB_NAMESPACE::kAdmPolicyAuto; - else if (!strcasecmp(policy, "placeholder")) + } else if (!strcasecmp(policy, "placeholder")) { return ROCKSDB_NAMESPACE::kAdmPolicyPlaceholder; - else if (!strcasecmp(policy, "allow_cache_hits")) + } else if (!strcasecmp(policy, "allow_cache_hits")) { return ROCKSDB_NAMESPACE::kAdmPolicyAllowCacheHits; - else if (!strcasecmp(policy, "three_queue")) + } else if (!strcasecmp(policy, "three_queue")) { return ROCKSDB_NAMESPACE::kAdmPolicyThreeQueue; - else { + } else { fprintf(stderr, "Cannot parse admission policy %s\n", policy); exit(1); } @@ -1806,12 +1807,13 @@ static enum DistributionType FLAGS_value_size_distribution_type_e = kFixed; static enum DistributionType StringToDistributionType(const char* ctype) { assert(ctype); - if (!strcasecmp(ctype, "fixed")) + if (!strcasecmp(ctype, "fixed")) { return kFixed; - else if (!strcasecmp(ctype, "uniform")) + } else if (!strcasecmp(ctype, "uniform")) { return kUniform; - else if (!strcasecmp(ctype, "normal")) + } else if (!strcasecmp(ctype, "normal")) { return kNormal; + } fprintf(stdout, "Cannot parse distribution type '%s'\n", ctype); exit(1); @@ -1821,7 +1823,7 @@ class BaseDistribution { public: BaseDistribution(unsigned int _min, unsigned int _max) : min_value_size_(_min), max_value_size_(_max) {} - virtual ~BaseDistribution() {} + virtual ~BaseDistribution() = default; unsigned int Generate() { auto val = Get(); @@ -1938,7 +1940,9 @@ class RandomGenerator { }; static void AppendWithSpace(std::string* str, Slice msg) { - if (msg.empty()) return; + if (msg.empty()) { + return; + } if (!str->empty()) { str->push_back(' '); } @@ -2192,7 +2196,9 @@ class Stats { } void Merge(const Stats& other) { - if (other.exclude_from_merge_) return; + if (other.exclude_from_merge_) { + return; + } for (auto it = other.hist_.begin(); it != other.hist_.end(); ++it) { auto this_it = hist_.find(it->first); @@ -2206,11 +2212,17 @@ class Stats { done_ += other.done_; bytes_ += other.bytes_; seconds_ += other.seconds_; - if (other.start_ < start_) start_ = other.start_; - if (other.finish_ > finish_) finish_ = other.finish_; + if (other.start_ < start_) { + start_ = other.start_; + } + if (other.finish_ > finish_) { + finish_ = other.finish_; + } // Just keep the messages from one thread. - if (message_.empty()) message_ = other.message_; + if (message_.empty()) { + message_ = other.message_; + } } void Stop() { @@ -2289,20 +2301,21 @@ class Stats { done_ += num_ops; if (done_ >= next_report_ && FLAGS_progress_reports) { if (!FLAGS_stats_interval) { - if (next_report_ < 1000) + if (next_report_ < 1000) { next_report_ += 100; - else if (next_report_ < 5000) + } else if (next_report_ < 5000) { next_report_ += 500; - else if (next_report_ < 10000) + } else if (next_report_ < 10000) { next_report_ += 1000; - else if (next_report_ < 50000) + } else if (next_report_ < 50000) { next_report_ += 5000; - else if (next_report_ < 100000) + } else if (next_report_ < 100000) { next_report_ += 10000; - else if (next_report_ < 500000) + } else if (next_report_ < 500000) { next_report_ += 50000; - else + } else { next_report_ += 100000; + } fprintf(stderr, "... finished %" PRIu64 " ops%30s\r", done_, ""); } else { uint64_t now = clock_->NowMicros(); @@ -2334,8 +2347,9 @@ class Stats { if (db_with_cfh && db_with_cfh->num_created.load()) { for (size_t i = 0; i < db_with_cfh->num_created.load(); ++i) { if (db->GetProperty(db_with_cfh->cfh[i], "rocksdb.cfstats", - &stats)) + &stats)) { fprintf(stderr, "%s\n", stats.c_str()); + } if (FLAGS_show_table_properties) { for (int level = 0; level < FLAGS_num_levels; ++level) { if (db->GetProperty( @@ -2393,7 +2407,9 @@ class Stats { void Report(const Slice& name) { // Pretend at least one op was done in case we are running a benchmark // that does not call FinishedOps(). - if (done_ < 1) done_ = 1; + if (done_ < 1) { + done_ = 1; + } std::string extra; double elapsed = (finish_ - start_) * 1e-6; @@ -2658,7 +2674,9 @@ class Duration { int64_t GetStage() { return std::min(ops_, max_ops_ - 1) / ops_per_stage_; } bool Done(int64_t increment) { - if (increment <= 0) increment = 1; // avoid Done(0) and infinite loops + if (increment <= 0) { + increment = 1; // avoid Done(0) and infinite loops + } ops_ += increment; if (max_seconds_) { @@ -2725,7 +2743,7 @@ class Benchmark { no_auto_recovery_(false), recovery_complete_(false) {} - ~ErrorHandlerListener() override {} + ~ErrorHandlerListener() override = default; const char* Name() const override { return kClassName(); } static const char* kClassName() { return "ErrorHandlerListener"; } @@ -4056,7 +4074,9 @@ class Benchmark { count++; thread->stats.FinishedOps(nullptr, nullptr, 1, kOthers); } - if (ptr == nullptr) exit(1); // Disable unused variable warning. + if (ptr == nullptr) { + exit(1); // Disable unused variable warning. + } } void Compress(ThreadState* thread) { @@ -4836,8 +4856,8 @@ class Benchmark { } std::vector column_families; for (size_t i = 0; i < num_hot; i++) { - column_families.push_back(ColumnFamilyDescriptor( - ColumnFamilyName(i), ColumnFamilyOptions(options))); + column_families.emplace_back(ColumnFamilyName(i), + ColumnFamilyOptions(options)); } std::vector cfh_idx_to_prob; if (!FLAGS_column_family_distribution.empty()) { @@ -5660,7 +5680,7 @@ class Benchmark { auto total_size = meta.levels[0].size; if (total_size >= db->GetOptions().compaction_options_fifo.max_table_files_size) { - for (auto file_meta : meta.levels[0].files) { + for (const auto& file_meta : meta.levels[0].files) { file_names.emplace_back(file_meta.name); } break; @@ -5711,7 +5731,7 @@ class Benchmark { SequenceNumber sorted_run_largest_seqno = 0; std::string sorted_run_smallest_key, sorted_run_largest_key; bool first_key = true; - for (auto fileMeta : sorted_runs[k][i]) { + for (const auto& fileMeta : sorted_runs[k][i]) { sorted_run_smallest_seqno = std::min(sorted_run_smallest_seqno, fileMeta.smallest_seqno); sorted_run_largest_seqno = @@ -5732,7 +5752,7 @@ class Benchmark { (compaction_style == kCompactionStyleUniversal && level > 0)) { SequenceNumber level_smallest_seqno = kMaxSequenceNumber; SequenceNumber level_largest_seqno = 0; - for (auto fileMeta : meta.levels[level].files) { + for (const auto& fileMeta : meta.levels[level].files) { level_smallest_seqno = std::min(level_smallest_seqno, fileMeta.smallest_seqno); level_largest_seqno = @@ -6254,8 +6274,8 @@ class Benchmark { GenerateKeyFromInt(lkey, FLAGS_num, &lkeys[i]); GenerateKeyFromInt(rkey, FLAGS_num, &rkeys[i]); } - db->GetApproximateSizes(&ranges[0], static_cast(entries_per_batch_), - &sizes[0]); + db->GetApproximateSizes( + ranges.data(), static_cast(entries_per_batch_), sizes.data()); num_sizes += entries_per_batch_; for (int64_t size : sizes) { size_sum += size; @@ -6308,8 +6328,8 @@ class Benchmark { std::vector ratio_; int range_; - QueryDecider() {} - ~QueryDecider() {} + QueryDecider() = default; + ~QueryDecider() = default; Status Initiate(std::vector ratio_input) { int range_max = 1000; @@ -7652,7 +7672,9 @@ class Benchmark { thread->stats.FinishedOps(nullptr, db, 1, kMerge); } else { Status s = db->Get(read_options_, key, &value); - if (value.length() > max_length) max_length = value.length(); + if (value.length() > max_length) { + max_length = value.length(); + } if (!s.ok() && !s.IsNotFound()) { fprintf(stderr, "get error: %s\n", s.ToString().c_str()); @@ -7717,10 +7739,16 @@ class Benchmark { } bool binary_search(std::vector& data, int start, int end, int key) { - if (data.empty()) return false; - if (start > end) return false; + if (data.empty()) { + return false; + } + if (start > end) { + return false; + } int mid = start + (end - start) / 2; - if (mid > static_cast(data.size()) - 1) return false; + if (mid > static_cast(data.size()) - 1) { + return false; + } if (data[mid] == key) { return true; } else if (data[mid] > key) { @@ -7793,7 +7821,9 @@ class Benchmark { found = binary_search(data, 0, static_cast(data.size() - 1), lookup_key); data.clear(); - if (found) break; + if (found) { + break; + } } std::cout << "Found key? " << std::to_string(found) << "\n"; sp = FLAGS_env->NowNanos(); @@ -7803,7 +7833,9 @@ class Benchmark { std::cout << "Sample data from GetMergeOperands API call: "; for (PinnableSlice& psl : a_slice) { std::cout << "List: " << to_print << " : " << *psl.GetSelf() << "\n"; - if (to_print++ > 2) break; + if (to_print++ > 2) { + break; + } } } @@ -8217,7 +8249,9 @@ class Benchmark { real_from_level = std::numeric_limits::max(); for (auto& f : files) { - if (f.level > 0 && f.level < real_from_level) real_from_level = f.level; + if (f.level > 0 && f.level < real_from_level) { + real_from_level = f.level; + } } if (real_from_level == std::numeric_limits::max()) { @@ -8233,10 +8267,11 @@ class Benchmark { std::vector files_to_compact; for (auto& f : files) { - if (f.level == real_from_level) + if (f.level == real_from_level) { files_to_compact.push_back(f.name); - else if (f.level > real_from_level && f.level < next_level) + } else if (f.level > real_from_level && f.level < next_level) { next_level = f.level; + } } if (files_to_compact.empty()) { @@ -8277,10 +8312,14 @@ class Benchmark { void CompactLevel(int from_level) { if (db_.db != nullptr) { - while (!CompactLevelHelper(db_, from_level)) WaitForCompaction(); + while (!CompactLevelHelper(db_, from_level)) { + WaitForCompaction(); + } } for (auto& db_with_cfh : multi_dbs_) { - while (!CompactLevelHelper(db_with_cfh, from_level)) WaitForCompaction(); + while (!CompactLevelHelper(db_with_cfh, from_level)) { + WaitForCompaction(); + } } } @@ -8614,15 +8653,15 @@ int db_bench_tool(int argc, char** argv) { exit(1); } - if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NONE")) + if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NONE")) { FLAGS_compaction_fadvice_e = ROCKSDB_NAMESPACE::Options::NONE; - else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NORMAL")) + } else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "NORMAL")) { FLAGS_compaction_fadvice_e = ROCKSDB_NAMESPACE::Options::NORMAL; - else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "SEQUENTIAL")) + } else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "SEQUENTIAL")) { FLAGS_compaction_fadvice_e = ROCKSDB_NAMESPACE::Options::SEQUENTIAL; - else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "WILLNEED")) + } else if (!strcasecmp(FLAGS_compaction_fadvice.c_str(), "WILLNEED")) { FLAGS_compaction_fadvice_e = ROCKSDB_NAMESPACE::Options::WILLNEED; - else { + } else { fprintf(stdout, "Unknown compaction fadvice:%s\n", FLAGS_compaction_fadvice.c_str()); exit(1); diff --git a/tools/dump/db_dump_tool.cc b/tools/dump/db_dump_tool.cc index 535e70c43..e234c9a73 100644 --- a/tools/dump/db_dump_tool.cc +++ b/tools/dump/db_dump_tool.cc @@ -195,16 +195,20 @@ bool DbUndumpTool::Run(const UndumpOptions& undump_options, std::unique_ptr keyscratch(new char[last_keysize]); std::unique_ptr valscratch(new char[last_valsize]); - while (1) { + while (true) { uint32_t keysize, valsize; ROCKSDB_NAMESPACE::Slice keyslice; ROCKSDB_NAMESPACE::Slice valslice; status = dumpfile->Read(4, &slice, scratch8); - if (!status.ok() || slice.size() != 4) break; + if (!status.ok() || slice.size() != 4) { + break; + } keysize = ROCKSDB_NAMESPACE::DecodeFixed32(slice.data()); if (keysize > last_keysize) { - while (keysize > last_keysize) last_keysize *= 2; + while (keysize > last_keysize) { + last_keysize *= 2; + } keyscratch = std::unique_ptr(new char[last_keysize]); } @@ -225,7 +229,9 @@ bool DbUndumpTool::Run(const UndumpOptions& undump_options, } valsize = ROCKSDB_NAMESPACE::DecodeFixed32(slice.data()); if (valsize > last_valsize) { - while (valsize > last_valsize) last_valsize *= 2; + while (valsize > last_valsize) { + last_valsize *= 2; + } valscratch = std::unique_ptr(new char[last_valsize]); } diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 0e983bd8a..1e7feb712 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -131,7 +131,7 @@ LDBCommand* LDBCommand::InitFromCmdLineArgs( const std::vector* column_families) { std::vector args; for (int i = 1; i < argc; i++) { - args.push_back(argv[i]); + args.emplace_back(argv[i]); } return InitFromCmdLineArgs(args, options, ldb_options, column_families, SelectCommand); @@ -984,7 +984,7 @@ void LDBCommand::PrepareOptions() { // existing DB. if (st.ok() && cf_list.size() > 1) { // Ignore single column family DB. - for (auto cf_name : cf_list) { + for (const auto& cf_name : cf_list) { column_families_.emplace_back(cf_name, options_); } } @@ -1397,8 +1397,7 @@ ManifestDumpCommand::ManifestDumpCommand( options, flags, false, BuildCmdLineOptions({ARG_VERBOSE, ARG_PATH, ARG_HEX, ARG_JSON})), verbose_(false), - json_(false), - path_("") { + json_(false) { verbose_ = IsFlagPresent(flags, ARG_VERBOSE); json_ = IsFlagPresent(flags, ARG_JSON); @@ -1544,8 +1543,7 @@ FileChecksumDumpCommand::FileChecksumDumpCommand( const std::map& options, const std::vector& flags) : LDBCommand(options, flags, false, - BuildCmdLineOptions({ARG_PATH, ARG_HEX})), - path_("") { + BuildCmdLineOptions({ARG_PATH, ARG_HEX})) { auto itr = options.find(ARG_PATH); if (itr != options.end()) { path_ = itr->second; @@ -1671,7 +1669,7 @@ void ListColumnFamiliesCommand::DoCommand() { } else { fprintf(stdout, "Column families in %s: \n{", db_path_.c_str()); bool first = true; - for (auto cf : column_families) { + for (const auto& cf : column_families) { if (!first) { fprintf(stdout, ", "); } @@ -1904,11 +1902,16 @@ void InternalDumpCommand::DoCommand() { s1 = 0; row = ikey.Encode().ToString(); val = key_version.value; - for (k = 0; row[k] != '\x01' && row[k] != '\0'; k++) s1++; - for (k = 0; val[k] != '\x01' && val[k] != '\0'; k++) s1++; + for (k = 0; row[k] != '\x01' && row[k] != '\0'; k++) { + s1++; + } + for (k = 0; val[k] != '\x01' && val[k] != '\0'; k++) { + s1++; + } for (int j = 0; row[j] != delim_[0] && row[j] != '\0' && row[j] != '\x01'; - j++) + j++) { rtype1 += row[j]; + } if (rtype2.compare("") && rtype2.compare(rtype1) != 0) { fprintf(stdout, "%s => count:%" PRIu64 "\tsize:%" PRIu64 "\n", rtype2.c_str(), c, s2); @@ -1954,7 +1957,9 @@ void InternalDumpCommand::DoCommand() { } // Terminate if maximum number of keys have been dumped - if (max_keys_ > 0 && count >= max_keys_) break; + if (max_keys_ > 0 && count >= max_keys_) { + break; + } } if (count_delim_) { fprintf(stdout, "%s => count:%" PRIu64 "\tsize:%" PRIu64 "\n", @@ -2193,9 +2198,13 @@ void DBDumperCommand::DoDumpCommand() { for (; iter->Valid(); iter->Next()) { int rawtime = 0; // If end marker was specified, we stop before it - if (!null_to_ && (iter->key().ToString() >= to_)) break; + if (!null_to_ && (iter->key().ToString() >= to_)) { + break; + } // Terminate if maximum number of keys have been dumped - if (max_keys == 0) break; + if (max_keys == 0) { + break; + } if (is_db_ttl_) { TtlIterator* it_ttl = static_cast_with_check(iter); rawtime = it_ttl->ttl_timestamp(); @@ -2216,8 +2225,9 @@ void DBDumperCommand::DoDumpCommand() { row = iter->key().ToString(); val = iter->value().ToString(); s1 = row.size() + val.size(); - for (int j = 0; row[j] != delim_[0] && row[j] != '\0'; j++) + for (int j = 0; row[j] != delim_[0] && row[j] != '\0'; j++) { rtype1 += row[j]; + } if (rtype2.compare("") && rtype2.compare(rtype1) != 0) { fprintf(stdout, "%s => count:%" PRIu64 "\tsize:%" PRIu64 "\n", rtype2.c_str(), c, s2); @@ -2294,7 +2304,7 @@ ReduceDBLevelsCommand::ReduceDBLevelsCommand( std::vector ReduceDBLevelsCommand::PrepareArgs( const std::string& db_path, int new_levels, bool print_old_level) { std::vector ret; - ret.push_back("reduce_levels"); + ret.emplace_back("reduce_levels"); ret.push_back("--" + ARG_DB + "=" + db_path); ret.push_back("--" + ARG_NEW_LEVELS + "=" + std::to_string(new_levels)); if (print_old_level) { @@ -2663,7 +2673,7 @@ class InMemoryHandler : public WriteBatch::Handler { return Status::OK(); } - ~InMemoryHandler() override {} + ~InMemoryHandler() override = default; protected: Handler::OptionState WriteAfterCommit() const override { @@ -2702,8 +2712,9 @@ void DumpWalFile(Options options, std::string wal_file, bool print_header, // we need the log number, but ParseFilename expects dbname/NNN.log. std::string sanitized = wal_file; size_t lastslash = sanitized.rfind('/'); - if (lastslash != std::string::npos) + if (lastslash != std::string::npos) { sanitized = sanitized.substr(lastslash + 1); + } if (!ParseFileName(sanitized, &log_number, &type)) { // bogus input, carry on as best we can log_number = 0; @@ -2979,9 +2990,8 @@ BatchPutCommand::BatchPutCommand( for (size_t i = 0; i < params.size(); i += 2) { std::string key = params.at(i); std::string value = params.at(i + 1); - key_values_.push_back(std::pair( - is_key_hex_ ? HexToString(key) : key, - is_value_hex_ ? HexToString(value) : value)); + key_values_.emplace_back(is_key_hex_ ? HexToString(key) : key, + is_value_hex_ ? HexToString(value) : value); } } create_if_missing_ = IsFlagPresent(flags_, ARG_CREATE_IF_MISSING); diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index 465d1eb31..8e43972d4 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -185,7 +185,7 @@ class FileChecksumTestHelper { public: FileChecksumTestHelper(Options& options, DB* db, std::string db_name) : options_(options), db_(db), dbname_(db_name) {} - ~FileChecksumTestHelper() {} + ~FileChecksumTestHelper() = default; // Verify the checksum information in Manifest. Status VerifyChecksumInManifest( @@ -233,8 +233,8 @@ class FileChecksumTestHelper { return Status::Corruption("The number of files does not match!"); } for (size_t i = 0; i < live_files.size(); i++) { - std::string stored_checksum = ""; - std::string stored_func_name = ""; + std::string stored_checksum; + std::string stored_func_name; s = checksum_list->SearchOneFileChecksum( live_files[i].file_number, &stored_checksum, &stored_func_name); if (s.IsNotFound()) { @@ -634,9 +634,9 @@ TEST_F(LdbCmdTest, OptionParsing) { opts.env = TryLoadCustomOrDefaultEnv(); { std::vector args; - args.push_back("scan"); - args.push_back("--ttl"); - args.push_back("--timestamp"); + args.emplace_back("scan"); + args.emplace_back("--ttl"); + args.emplace_back("--timestamp"); LDBCommand* command = ROCKSDB_NAMESPACE::LDBCommand::InitFromCmdLineArgs( args, opts, LDBOptions(), nullptr); const std::vector flags = command->TEST_GetFlags(); @@ -648,9 +648,9 @@ TEST_F(LdbCmdTest, OptionParsing) { // test parsing options which contains equal sign in the option value { std::vector args; - args.push_back("scan"); - args.push_back("--db=/dev/shm/ldbtest/"); - args.push_back( + args.emplace_back("scan"); + args.emplace_back("--db=/dev/shm/ldbtest/"); + args.emplace_back( "--from='abcd/efg/hijk/lmn/" "opq:__rst.uvw.xyz?a=3+4+bcd+efghi&jk=lm_no&pq=rst-0&uv=wx-8&yz=a&bcd_" "ef=gh.ijk'"); diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index 20e9ebe2c..a7aebc121 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -10,7 +10,7 @@ namespace ROCKSDB_NAMESPACE { -LDBOptions::LDBOptions() {} +LDBOptions::LDBOptions() = default; void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options, const char* /*exec_name*/, bool to_stderr) { diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index f0b71bf8e..07c42b6e7 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -7,7 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include +#include #include "db/wide/wide_column_serialization.h" #include "file/random_access_file_reader.h" diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 1b269043a..59df1af8d 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -400,7 +400,7 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { // that whether it is a valid sst or not // (A directory "file" is not a valid sst) filenames.clear(); - filenames.push_back(dir_or_file); + filenames.emplace_back(dir_or_file); dir = false; } @@ -468,7 +468,7 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str()); exit(1); } else { - fprintf(stdout, "raw dump written to file %s\n", &out_filename[0]); + fprintf(stdout, "raw dump written to file %s\n", out_filename.data()); } continue; } diff --git a/tools/trace_analyzer_test.cc b/tools/trace_analyzer_test.cc index e7d090eb2..1d5c87054 100644 --- a/tools/trace_analyzer_test.cc +++ b/tools/trace_analyzer_test.cc @@ -56,7 +56,7 @@ class TraceAnalyzerTest : public testing::Test { dbname_ = test_path_ + "/db"; } - ~TraceAnalyzerTest() override {} + ~TraceAnalyzerTest() override = default; void GenerateTrace(std::string trace_path) { Options options; @@ -87,11 +87,11 @@ class TraceAnalyzerTest : public testing::Test { ASSERT_OK(batch.DeleteRange("e", "f")); ASSERT_OK(db_->Write(wo, &batch)); std::vector keys; - keys.push_back("a"); - keys.push_back("b"); - keys.push_back("df"); - keys.push_back("gege"); - keys.push_back("hjhjhj"); + keys.emplace_back("a"); + keys.emplace_back("b"); + keys.emplace_back("df"); + keys.emplace_back("gege"); + keys.emplace_back("hjhjhj"); std::vector values; std::vector ss = db_->MultiGet(ro, keys, &values); ASSERT_GE(ss.size(), 0); @@ -176,8 +176,6 @@ class TraceAnalyzerTest : public testing::Test { ASSERT_EQ(result[i][0], cnt[i][0]); } } - - return; } void AnalyzeTrace(std::vector& paras_diff, diff --git a/tools/trace_analyzer_tool.cc b/tools/trace_analyzer_tool.cc index 00a4da046..a1e321dcd 100644 --- a/tools/trace_analyzer_tool.cc +++ b/tools/trace_analyzer_tool.cc @@ -201,7 +201,7 @@ uint64_t MultiplyCheckOverflow(uint64_t op1, uint64_t op2) { AnalyzerOptions::AnalyzerOptions() : correlation_map(kTaTypeNum, std::vector(kTaTypeNum, -1)) {} -AnalyzerOptions::~AnalyzerOptions() {} +AnalyzerOptions::~AnalyzerOptions() = default; void AnalyzerOptions::SparseCorrelationInput(const std::string& in_str) { std::string cur = in_str; @@ -214,14 +214,14 @@ void AnalyzerOptions::SparseCorrelationInput(const std::string& in_str) { exit(1); } std::string opt1, opt2; - std::size_t split = cur.find_first_of(","); + std::size_t split = cur.find_first_of(','); if (split != std::string::npos) { opt1 = cur.substr(1, split - 1); } else { fprintf(stderr, "Invalid correlation input: %s\n", in_str.c_str()); exit(1); } - std::size_t end = cur.find_first_of("]"); + std::size_t end = cur.find_first_of(']'); if (end != std::string::npos) { opt2 = cur.substr(split + 1, end - split - 1); } else { @@ -232,8 +232,7 @@ void AnalyzerOptions::SparseCorrelationInput(const std::string& in_str) { if (taOptToIndex.find(opt1) != taOptToIndex.end() && taOptToIndex.find(opt2) != taOptToIndex.end()) { - correlation_list.push_back( - std::make_pair(taOptToIndex[opt1], taOptToIndex[opt2])); + correlation_list.emplace_back(taOptToIndex[opt1], taOptToIndex[opt2]); } else { fprintf(stderr, "Invalid correlation input: %s\n", in_str.c_str()); exit(1); @@ -245,7 +244,6 @@ void AnalyzerOptions::SparseCorrelationInput(const std::string& in_str) { correlation_map[it.first][it.second] = sequence; sequence++; } - return; } // The trace statistic struct constructor @@ -264,7 +262,7 @@ TraceStats::TraceStats() { a_ave_qps = 0.0; } -TraceStats::~TraceStats() {} +TraceStats::~TraceStats() = default; // The trace analyzer constructor TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path, @@ -354,7 +352,7 @@ TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path, } } -TraceAnalyzer::~TraceAnalyzer() {} +TraceAnalyzer::~TraceAnalyzer() = default; // Prepare the processing // Initiate the global trace reader and writer here diff --git a/trace_replay/trace_record.cc b/trace_replay/trace_record.cc index 21df0275d..a4a4eb9f8 100644 --- a/trace_replay/trace_record.cc +++ b/trace_replay/trace_record.cc @@ -97,7 +97,7 @@ IteratorQueryTraceRecord::IteratorQueryTraceRecord( upper_.PinSelf(upper_bound); } -IteratorQueryTraceRecord::~IteratorQueryTraceRecord() {} +IteratorQueryTraceRecord::~IteratorQueryTraceRecord() = default; Slice IteratorQueryTraceRecord::GetLowerBound() const { return Slice(lower_); } diff --git a/trace_replay/trace_replay.cc b/trace_replay/trace_replay.cc index 126a8e248..6ade8e316 100644 --- a/trace_replay/trace_replay.cc +++ b/trace_replay/trace_replay.cc @@ -58,8 +58,8 @@ Status TracerHelper::ParseTraceHeader(const Trace& header, int* trace_version, std::vector s_vec; int begin = 0, end; for (int i = 0; i < 3; i++) { - assert(header.payload.find("\t", begin) != std::string::npos); - end = static_cast(header.payload.find("\t", begin)); + assert(header.payload.find('\t', begin) != std::string::npos); + end = static_cast(header.payload.find('\t', begin)); s_vec.push_back(header.payload.substr(begin, end - begin)); begin = end + 1; } diff --git a/util/comparator.cc b/util/comparator.cc index 98ecef9d2..a5d7a7ca0 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -9,9 +9,8 @@ #include "rocksdb/comparator.h" -#include - #include +#include #include #include #include @@ -30,7 +29,7 @@ namespace ROCKSDB_NAMESPACE { namespace { class BytewiseComparatorImpl : public Comparator { public: - BytewiseComparatorImpl() {} + BytewiseComparatorImpl() = default; static const char* kClassName() { return "leveldb.BytewiseComparator"; } const char* Name() const override { return kClassName(); } @@ -112,7 +111,9 @@ class BytewiseComparatorImpl : public Comparator { } size_t diff_ind = s.difference_offset(t); // same slice - if (diff_ind >= s.size()) return false; + if (diff_ind >= s.size()) { + return false; + } uint8_t byte_s = static_cast(s[diff_ind]); uint8_t byte_t = static_cast(t[diff_ind]); // first different byte must be consecutive, and remaining bytes must be @@ -148,7 +149,7 @@ class BytewiseComparatorImpl : public Comparator { class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl { public: - ReverseBytewiseComparatorImpl() {} + ReverseBytewiseComparatorImpl() = default; static const char* kClassName() { return "rocksdb.ReverseBytewiseComparator"; diff --git a/util/compression_context_cache.cc b/util/compression_context_cache.cc index 52c3fac72..789cc7b62 100644 --- a/util/compression_context_cache.cc +++ b/util/compression_context_cache.cc @@ -67,7 +67,7 @@ static_assert(sizeof(ZSTDCachedData) % CACHE_LINE_SIZE == 0, class CompressionContextCache::Rep { public: - Rep() {} + Rep() = default; ZSTDUncompressCachedData GetZSTDUncompressData() { auto p = per_core_uncompr_.AccessElementAndIndex(); int64_t idx = static_cast(p.second); diff --git a/util/crc32c.cc b/util/crc32c.cc index 9e97045f4..38a69bb50 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -11,9 +11,8 @@ // four bytes at a time. #include "util/crc32c.h" -#include - #include +#include #include #include "port/lang.h" diff --git a/util/dynamic_bloom_test.cc b/util/dynamic_bloom_test.cc index 925c5479a..949ab8f76 100644 --- a/util/dynamic_bloom_test.cc +++ b/util/dynamic_bloom_test.cc @@ -164,10 +164,11 @@ TEST_F(DynamicBloomTest, VaryingLengths) { "%5.2f%% @ num = %6u, bloom_bits = %6u\n", nonseq ? "nonseq" : "seq", rate * 100.0, num, bloom_bits); - if (rate > 0.0125) + if (rate > 0.0125) { mediocre_filters++; // Allowed, but not too often - else + } else { good_filters++; + } } } diff --git a/util/file_checksum_helper.cc b/util/file_checksum_helper.cc index b8c4099b8..59da96fa8 100644 --- a/util/file_checksum_helper.cc +++ b/util/file_checksum_helper.cc @@ -31,7 +31,7 @@ Status FileChecksumListImpl::GetAllFileChecksums( return Status::InvalidArgument("Pointer has not been initiated"); } - for (auto i : checksum_map_) { + for (const auto& i : checksum_map_) { file_numbers->push_back(i.first); checksums->push_back(i.second.first); checksum_func_names->push_back(i.second.second); diff --git a/util/file_reader_writer_test.cc b/util/file_reader_writer_test.cc index 68776612b..036c030dc 100644 --- a/util/file_reader_writer_test.cc +++ b/util/file_reader_writer_test.cc @@ -31,7 +31,7 @@ TEST_F(WritableFileWriterTest, RangeSync) { class FakeWF : public FSWritableFile { public: explicit FakeWF() : size_(0), last_synced_(0) {} - ~FakeWF() override {} + ~FakeWF() override = default; using FSWritableFile::Append; IOStatus Append(const Slice& data, const IOOptions& /*options*/, @@ -134,7 +134,7 @@ TEST_F(WritableFileWriterTest, IncrementalBuffer) { : file_data_(_file_data), use_direct_io_(_use_direct_io), no_flush_(_no_flush) {} - ~FakeWF() override {} + ~FakeWF() override = default; using FSWritableFile::Append; IOStatus Append(const Slice& data, const IOOptions& /*options*/, @@ -588,7 +588,7 @@ class ReadaheadSequentialFileTest : public testing::Test, scratch_.reset(new char[2 * readahead_size_]); ResetSourceStr(); } - ReadaheadSequentialFileTest() {} + ReadaheadSequentialFileTest() = default; std::string Read(size_t n) { Slice result; Status s = test_read_holder_->Read( @@ -919,7 +919,7 @@ class WritableFileWriterIOPriorityTest : public testing::Test { class FakeWF : public FSWritableFile { public: explicit FakeWF(Env::IOPriority io_priority) { SetIOPriority(io_priority); } - ~FakeWF() override {} + ~FakeWF() override = default; IOStatus Append(const Slice& /*data*/, const IOOptions& options, IODebugContext* /*dbg*/) override { diff --git a/util/filelock_test.cc b/util/filelock_test.cc index 82021aec9..7a41449bf 100644 --- a/util/filelock_test.cc +++ b/util/filelock_test.cc @@ -34,7 +34,7 @@ class LockTest : public testing::Test { current_ = this; } - ~LockTest() override {} + ~LockTest() override = default; Status LockFile(FileLock** db_lock) { return env_->LockFile(file_, db_lock); } @@ -94,8 +94,9 @@ class LockTest : public testing::Test { } else if (pid > 0) { // parent process int status; - while (-1 == waitpid(pid, &status, 0)) + while (-1 == waitpid(pid, &status, 0)) { ; + } if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { // child process exited with non success status return false; diff --git a/util/random.cc b/util/random.cc index 7ac6ee19a..17396d32f 100644 --- a/util/random.cc +++ b/util/random.cc @@ -6,10 +6,9 @@ #include "util/random.h" -#include -#include -#include - +#include +#include +#include #include #include From dce3ca5ab8e3ac93c9e2071e0be8392a52fbab23 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Fri, 1 Dec 2023 22:35:34 -0800 Subject: [PATCH 313/386] Remove extra semi colon from internal_repo_rocksdb/repo/monitoring/perf_context_imp.h Summary: `-Wextra-semi` or `-Wextra-semi-stmt` If the code compiles, this is safe to land. Reviewed By: dmm-fb Differential Revision: D51778007 fbshipit-source-id: 5d1b20a3acc4bcc7cd7c204f2f73a14fc8f81883 --- monitoring/perf_context_imp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index 5b66ff2ff..a63d931a7 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -74,7 +74,8 @@ extern thread_local PerfContext perf_context; #define PERF_COUNTER_ADD(metric, value) \ if (perf_level >= PerfLevel::kEnableCount) { \ perf_context.metric += value; \ - } + } \ + static_assert(true, "semicolon required") // Increase metric value #define PERF_COUNTER_BY_LEVEL_ADD(metric, value, level) \ From ba8fa0f546b55c417d2563f53f282ec67285d309 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 4 Dec 2023 11:17:32 -0800 Subject: [PATCH 314/386] internal_repo_rocksdb (4372117296613874540) (#12117) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12117 Reviewed By: ajkr Differential Revision: D51745846 Pulled By: jowlyzhang fbshipit-source-id: 51c806a484b3b43d174b06d2cfe9499191d09914 --- db/import_column_family_test.cc | 4 ++-- db/listener_test.cc | 8 +++---- db/log_reader.cc | 4 ++-- db/log_test.cc | 2 +- db/log_writer.cc | 2 +- db/malloc_stats.cc | 3 +-- db/manual_compaction_test.cc | 2 +- db/memtable.cc | 2 +- db/merge_helper.cc | 4 +++- db/merge_test.cc | 21 +++++++++++------- db/obsolete_files_test.cc | 6 ++--- db/options_file_test.cc | 8 +++---- db/perf_context_test.cc | 4 ++-- db/plain_table_db_test.cc | 2 +- db/prefix_test.cc | 29 ++++++++++++++++++------- db/range_del_aggregator.cc | 1 - db/repair_test.cc | 3 +-- db/table_cache.cc | 2 +- db/table_properties_collector_test.cc | 3 +-- db/version_edit.cc | 1 - db/version_edit_test.cc | 4 +++- db/version_set.cc | 28 +++++++++++++++--------- db/version_set_test.cc | 10 ++++----- db/wide/db_wide_basic_test.cc | 22 +++++++++---------- db/write_batch.cc | 12 +++++----- db/write_callback_test.cc | 7 +++--- db_stress_tool/batched_ops_stress.cc | 4 ++-- db_stress_tool/cf_consistency_stress.cc | 4 ++-- db_stress_tool/db_stress_listener.cc | 2 +- db_stress_tool/db_stress_test_base.cc | 8 +++---- db_stress_tool/expected_state.cc | 2 +- db_stress_tool/multi_ops_txns_stress.cc | 6 ++--- db_stress_tool/no_batched_ops_stress.cc | 8 +++---- env/composite_env.cc | 2 +- env/env.cc | 18 +++++++-------- env/env_basic_test.cc | 2 +- env/env_chroot.cc | 5 +++-- env/env_encryption.cc | 4 ++-- env/env_posix.cc | 16 ++++++++------ env/env_test.cc | 7 +++--- env/file_system.cc | 4 ++-- env/fs_posix.cc | 15 +++++++------ env/io_posix.cc | 15 ++++++++----- env/mock_env.cc | 2 +- env/mock_env_test.cc | 12 +++++----- file/delete_scheduler.cc | 2 +- file/filename.cc | 5 ++--- file/random_access_file_reader_test.cc | 12 +++++++--- 48 files changed, 193 insertions(+), 156 deletions(-) diff --git a/db/import_column_family_test.cc b/db/import_column_family_test.cc index f6c1a0248..89586bcd1 100644 --- a/db/import_column_family_test.cc +++ b/db/import_column_family_test.cc @@ -326,7 +326,7 @@ TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithRangeTombstone) { const SstFileMetaData* file_meta = nullptr; for (const auto& level_meta : import_cf_meta.levels) { if (!level_meta.files.empty()) { - file_meta = &(level_meta.files[0]); + file_meta = level_meta.files.data(); break; } } @@ -389,7 +389,7 @@ TEST_F(ImportColumnFamilyTest, ImportExportedSSTFromAnotherCF) { *metadata_ptr_, &import_cfh2_)); ASSERT_NE(import_cfh2_, nullptr); delete metadata_ptr_; - metadata_ptr_ = NULL; + metadata_ptr_ = nullptr; std::string value1, value2; diff --git a/db/listener_test.cc b/db/listener_test.cc index 41577b92c..d298a86e7 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -132,8 +132,8 @@ class TestCompactionListener : public EventListener { ASSERT_EQ(db->GetEnv()->GetThreadID(), ci.thread_id); ASSERT_GT(ci.thread_id, 0U); - for (auto fl : {ci.input_files, ci.output_files}) { - for (auto fn : fl) { + for (const auto& fl : {ci.input_files, ci.output_files}) { + for (const auto& fn : fl) { auto it = ci.table_properties.find(fn); ASSERT_NE(it, ci.table_properties.end()); auto tp = it->second; @@ -237,7 +237,7 @@ class TestFlushListener : public EventListener { std::vector thread_list; ASSERT_OK(env_->GetThreadList(&thread_list)); bool found_match = false; - for (auto thread_status : thread_list) { + for (const auto& thread_status : thread_list) { if (thread_status.operation_type == ThreadStatus::OP_FLUSH || thread_status.operation_type == ThreadStatus::OP_COMPACTION) { if (thread_id == thread_status.thread_id) { @@ -893,7 +893,7 @@ class MemTableSealedListener : public EventListener { SequenceNumber latest_seq_number_; public: - MemTableSealedListener() {} + MemTableSealedListener() = default; void OnMemTableSealed(const MemTableInfo& info) override { latest_seq_number_ = info.first_seqno; } diff --git a/db/log_reader.cc b/db/log_reader.cc index 4e470616f..ca2f9c55b 100644 --- a/db/log_reader.cc +++ b/db/log_reader.cc @@ -9,7 +9,7 @@ #include "db/log_reader.h" -#include +#include #include "file/sequence_file_reader.h" #include "port/lang.h" @@ -21,7 +21,7 @@ namespace ROCKSDB_NAMESPACE { namespace log { -Reader::Reporter::~Reporter() {} +Reader::Reporter::~Reporter() = default; Reader::Reader(std::shared_ptr info_log, std::unique_ptr&& _file, diff --git a/db/log_test.cc b/db/log_test.cc index fa5e2aa0f..0bf3bf5ae 100644 --- a/db/log_test.cc +++ b/db/log_test.cc @@ -1167,7 +1167,7 @@ TEST_P(StreamingCompressionTest, Basic) { } allocator->Deallocate((void*)output_buffer); } while (remaining > 0); - std::string uncompressed_buffer = ""; + std::string uncompressed_buffer; int ret_val = 0; size_t output_pos; char* uncompressed_output_buffer = (char*)allocator->Allocate(kBlockSize); diff --git a/db/log_writer.cc b/db/log_writer.cc index 86e0286cc..5fc46b33f 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -9,7 +9,7 @@ #include "db/log_writer.h" -#include +#include #include "file/writable_file_writer.h" #include "rocksdb/env.h" diff --git a/db/malloc_stats.cc b/db/malloc_stats.cc index 641e01f9a..33f19725d 100644 --- a/db/malloc_stats.cc +++ b/db/malloc_stats.cc @@ -9,8 +9,7 @@ #include "db/malloc_stats.h" -#include - +#include #include #include "port/jemalloc_helper.h" diff --git a/db/manual_compaction_test.cc b/db/manual_compaction_test.cc index 95b099a66..e84031065 100644 --- a/db/manual_compaction_test.cc +++ b/db/manual_compaction_test.cc @@ -58,7 +58,7 @@ class ManualCompactionTest : public testing::Test { class DestroyAllCompactionFilter : public CompactionFilter { public: - DestroyAllCompactionFilter() {} + DestroyAllCompactionFilter() = default; bool Filter(int /*level*/, const Slice& /*key*/, const Slice& existing_value, std::string* /*new_value*/, diff --git a/db/memtable.cc b/db/memtable.cc index 630d35fed..fe5b4b39e 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -1362,7 +1362,7 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range, range_indexes[num_keys++] = iter.index(); } } - bloom_filter_->MayContain(num_keys, &bloom_keys[0], &may_match[0]); + bloom_filter_->MayContain(num_keys, bloom_keys.data(), may_match.data()); for (int i = 0; i < num_keys; ++i) { if (!may_match[i]) { temp_range.SkipIndex(range_indexes[i]); diff --git a/db/merge_helper.cc b/db/merge_helper.cc index d8b1d788b..2c5e2fe7d 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -428,7 +428,9 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, Status s = ParseInternalKey(original_key, &orig_ikey, allow_data_in_errors); assert(s.ok()); - if (!s.ok()) return s; + if (!s.ok()) { + return s; + } assert(kTypeMerge == orig_ikey.type); diff --git a/db/merge_test.cc b/db/merge_test.cc index 93a8535a7..695ebe688 100644 --- a/db/merge_test.cc +++ b/db/merge_test.cc @@ -3,8 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // -#include - +#include #include #include @@ -87,7 +86,9 @@ class EnvMergeTest : public EnvWrapper { static std::unique_ptr singleton_; static EnvMergeTest* GetInstance() { - if (nullptr == singleton_) singleton_.reset(new EnvMergeTest); + if (nullptr == singleton_) { + singleton_.reset(new EnvMergeTest); + } return singleton_.get(); } }; @@ -145,7 +146,7 @@ class Counters { assert(db_); } - virtual ~Counters() {} + virtual ~Counters() = default; // public interface of Counters. // All four functions return false @@ -194,7 +195,7 @@ class Counters { std::cerr << "value corruption\n"; return false; } - *value = DecodeFixed64(&str[0]); + *value = DecodeFixed64(str.data()); return true; } else { std::cerr << s.ToString() << std::endl; @@ -220,14 +221,18 @@ class Counters { uint64_t value = default_; int result = get(key, &value); assert(result); - if (result == 0) exit(1); // Disable unused variable warning. + if (result == 0) { + exit(1); // Disable unused variable warning. + } return value; } void assert_add(const std::string& key, uint64_t value) { int result = add(key, value); assert(result); - if (result == 0) exit(1); // Disable unused variable warning. + if (result == 0) { + exit(1); // Disable unused variable warning. + } } }; @@ -496,7 +501,7 @@ void testSingleBatchSuccessiveMerge(DB* db, size_t max_num_merges, std::string get_value_str; ASSERT_OK(db->Get(ReadOptions(), key, &get_value_str)); assert(get_value_str.size() == sizeof(uint64_t)); - uint64_t get_value = DecodeFixed64(&get_value_str[0]); + uint64_t get_value = DecodeFixed64(get_value_str.data()); ASSERT_EQ(get_value, num_merges * merge_value); ASSERT_EQ(num_merge_operator_calls, static_cast((num_merges % (max_num_merges + 1)))); diff --git a/db/obsolete_files_test.cc b/db/obsolete_files_test.cc index eec1486c1..d77594d60 100644 --- a/db/obsolete_files_test.cc +++ b/db/obsolete_files_test.cc @@ -7,10 +7,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. - -#include - #include +#include #include #include #include @@ -68,7 +66,7 @@ class ObsoleteFilesTest : public DBTestBase { int log_cnt = 0; int sst_cnt = 0; int manifest_cnt = 0; - for (auto file : filenames) { + for (const auto& file : filenames) { uint64_t number; FileType type; if (ParseFileName(file, &number, &type)) { diff --git a/db/options_file_test.cc b/db/options_file_test.cc index c3adbeb64..7e48f0cf3 100644 --- a/db/options_file_test.cc +++ b/db/options_file_test.cc @@ -28,7 +28,7 @@ void UpdateOptionsFiles(DB* db, uint64_t number; FileType type; *options_files_count = 0; - for (auto filename : filenames) { + for (const auto& filename : filenames) { if (ParseFileName(filename, &number, &type) && type == kOptionsFile) { filename_history->insert(filename); (*options_files_count)++; @@ -44,16 +44,16 @@ void VerifyOptionsFileName( EXPECT_OK(db->GetEnv()->GetChildren(db->GetName(), &filenames)); uint64_t number; FileType type; - for (auto filename : filenames) { + for (const auto& filename : filenames) { if (ParseFileName(filename, &number, &type) && type == kOptionsFile) { current_filenames.insert(filename); } } - for (auto past_filename : past_filenames) { + for (const auto& past_filename : past_filenames) { if (current_filenames.find(past_filename) != current_filenames.end()) { continue; } - for (auto filename : current_filenames) { + for (const auto& filename : current_filenames) { ASSERT_GT(filename, past_filename); } } diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 666ed32f0..2666b8733 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -1050,7 +1050,7 @@ TEST_F(PerfContextTest, MergeOperandCount) { std::vector statuses(num_keys); db->MultiGet(ReadOptions(), db->DefaultColumnFamily(), num_keys, - &key_slices[0], &results[0], &statuses[0]); + key_slices.data(), results.data(), statuses.data()); for (size_t i = 0; i < num_keys; ++i) { ASSERT_OK(statuses[i]); @@ -1068,7 +1068,7 @@ TEST_F(PerfContextTest, MergeOperandCount) { std::vector statuses(num_keys); db->MultiGetEntity(ReadOptions(), db->DefaultColumnFamily(), num_keys, - &key_slices[0], &results[0], &statuses[0]); + key_slices.data(), results.data(), statuses.data()); for (size_t i = 0; i < num_keys; ++i) { ASSERT_OK(statuses[i]); diff --git a/db/plain_table_db_test.cc b/db/plain_table_db_test.cc index a6acb7b18..1fa8d8f54 100644 --- a/db/plain_table_db_test.cc +++ b/db/plain_table_db_test.cc @@ -292,7 +292,7 @@ class TestPlainTableReader : public PlainTableReader { table_properties_ = std::move(props); } - ~TestPlainTableReader() override {} + ~TestPlainTableReader() override = default; private: bool MatchBloom(uint32_t hash) const override { diff --git a/db/prefix_test.cc b/db/prefix_test.cc index bb6e6f7a6..b55956aa8 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -89,8 +89,12 @@ class TestKeyComparator : public Comparator { const TestKey* key_a = &kkey_a; const TestKey* key_b = &kkey_b; if (key_a->prefix != key_b->prefix) { - if (key_a->prefix < key_b->prefix) return -1; - if (key_a->prefix > key_b->prefix) return 1; + if (key_a->prefix < key_b->prefix) { + return -1; + } + if (key_a->prefix > key_b->prefix) { + return 1; + } } else { EXPECT_TRUE(key_a->prefix == key_b->prefix); // note, both a and b could be prefix only @@ -99,8 +103,12 @@ class TestKeyComparator : public Comparator { EXPECT_TRUE( (a.size() == sizeof(uint64_t) && b.size() == sizeof(TestKey)) || (b.size() == sizeof(uint64_t) && a.size() == sizeof(TestKey))); - if (a.size() < b.size()) return -1; - if (a.size() > b.size()) return 1; + if (a.size() < b.size()) { + return -1; + } + if (a.size() > b.size()) { + return 1; + } } else { // both a and b are prefix if (a.size() == sizeof(uint64_t)) { @@ -109,9 +117,15 @@ class TestKeyComparator : public Comparator { // both a and b are whole key EXPECT_TRUE(a.size() == sizeof(TestKey) && b.size() == sizeof(TestKey)); - if (key_a->sorted < key_b->sorted) return -1; - if (key_a->sorted > key_b->sorted) return 1; - if (key_a->sorted == key_b->sorted) return 0; + if (key_a->sorted < key_b->sorted) { + return -1; + } + if (key_a->sorted > key_b->sorted) { + return 1; + } + if (key_a->sorted == key_b->sorted) { + return 0; + } } } return 0; @@ -892,4 +906,3 @@ int main(int argc, char** argv) { } #endif // GFLAGS - diff --git a/db/range_del_aggregator.cc b/db/range_del_aggregator.cc index 6e76f9c72..652afe65a 100644 --- a/db/range_del_aggregator.cc +++ b/db/range_del_aggregator.cc @@ -8,7 +8,6 @@ #include "db/compaction/compaction_iteration_stats.h" #include "db/dbformat.h" #include "db/pinned_iterators_manager.h" -#include "db/range_del_aggregator.h" #include "db/range_tombstone_fragmenter.h" #include "db/version_edit.h" #include "rocksdb/comparator.h" diff --git a/db/repair_test.cc b/db/repair_test.cc index e8cc40aab..8adc06f0c 100644 --- a/db/repair_test.cc +++ b/db/repair_test.cc @@ -378,8 +378,7 @@ TEST_P(RepairTestWithTimestamp, UnflushedSst) { ColumnFamilyOptions cf_options(options); std::vector column_families; - column_families.push_back( - ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); + column_families.emplace_back(kDefaultColumnFamilyName, cf_options); ASSERT_OK(DB::Open(options, dbname_, column_families, &handles_, &db_)); diff --git a/db/table_cache.cc b/db/table_cache.cc index 2b1606c16..2c0092e7d 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -85,7 +85,7 @@ TableCache::TableCache(const ImmutableOptions& ioptions, } } -TableCache::~TableCache() {} +TableCache::~TableCache() = default; Status TableCache::GetTableReader( const ReadOptions& ro, const FileOptions& file_options, diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 437b7e309..e10f02e67 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -180,7 +180,6 @@ class RegularKeysStartWithAInternal : public IntTblPropCollector { uint64_t /* block_compressed_bytes_fast */, uint64_t /* block_compressed_bytes_slow */) override { // Nothing to do. - return; } UserCollectedProperties GetReadableProperties() const override { @@ -227,7 +226,7 @@ class FlushBlockEveryThreePolicy : public FlushBlockPolicy { class FlushBlockEveryThreePolicyFactory : public FlushBlockPolicyFactory { public: - explicit FlushBlockEveryThreePolicyFactory() {} + explicit FlushBlockEveryThreePolicyFactory() = default; const char* Name() const override { return "FlushBlockEveryThreePolicyFactory"; diff --git a/db/version_edit.cc b/db/version_edit.cc index 6459c2ff8..482aa65a7 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -484,7 +484,6 @@ void VersionEdit::EncodeFileBoundaries(std::string* dst, StripTimestampFromInternalKey(&largest_buf, meta.largest.Encode(), ts_sz); PutLengthPrefixedSlice(dst, smallest_buf); PutLengthPrefixedSlice(dst, largest_buf); - return; }; Status VersionEdit::DecodeFrom(const Slice& src) { diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index c47389901..252352069 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -562,7 +562,9 @@ TEST_F(VersionEditTest, AddWalDebug) { std::stringstream ss; ss << "{\"LogNumber\": " << kLogNumbers[i] << ", " << "\"SyncedSizeInBytes\": " << kSizeInBytes[i] << "}"; - if (i < n - 1) ss << ", "; + if (i < n - 1) { + ss << ", "; + } expected_json += ss.str(); } expected_json += "], \"ColumnFamily\": 0}"; diff --git a/db/version_set.cc b/db/version_set.cc index 329322ccb..1d339d129 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -122,7 +122,9 @@ Status OverlapWithIterator(const Comparator* ucmp, ParsedInternalKey seek_result; Status s = ParseInternalKey(iter->key(), &seek_result, false /* log_err_key */); // TODO - if (!s.ok()) return s; + if (!s.ok()) { + return s; + } if (ucmp->CompareWithoutTimestamp(seek_result.user_key, largest_user_key) <= 0) { @@ -835,9 +837,9 @@ Version::~Version() { assert(cfd_ != nullptr); uint32_t path_id = f->fd.GetPathId(); assert(path_id < cfd_->ioptions()->cf_paths.size()); - vset_->obsolete_files_.push_back( - ObsoleteFileInfo(f, cfd_->ioptions()->cf_paths[path_id].path, - cfd_->GetFileMetadataCacheReservationManager())); + vset_->obsolete_files_.emplace_back( + f, cfd_->ioptions()->cf_paths[path_id].path, + cfd_->GetFileMetadataCacheReservationManager()); } } } @@ -3101,7 +3103,9 @@ bool Version::MaybeInitializeFileMetaData(const ReadOptions& read_options, file_meta->fd.GetNumber(), s.ToString().c_str()); return false; } - if (tp.get() == nullptr) return false; + if (tp.get() == nullptr) { + return false; + } file_meta->num_entries = tp->num_entries; file_meta->num_deletions = tp->num_deletions; file_meta->raw_value_size = tp->raw_value_size; @@ -4515,7 +4519,9 @@ const char* VersionStorageInfo::LevelSummary( for (int i = 0; i < num_levels(); i++) { int sz = sizeof(scratch->buffer) - len; int ret = snprintf(scratch->buffer + len, sz, "%d ", int(files_[i].size())); - if (ret < 0 || ret >= sz) break; + if (ret < 0 || ret >= sz) { + break; + } len += ret; } if (len > 0) { @@ -4545,7 +4551,9 @@ const char* VersionStorageInfo::LevelFileSummary(FileSummaryStorage* scratch, "#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ", f->fd.GetNumber(), f->fd.smallest_seqno, sztxt, static_cast(f->being_compacted)); - if (ret < 0 || ret >= sz) break; + if (ret < 0 || ret >= sz) { + break; + } len += ret; } // overwrite the last space (only if files_[level].size() is non-zero) @@ -5384,9 +5392,9 @@ Status VersionSet::ProcessManifestWrites( } for (const auto* cfd : *column_family_set_) { assert(curr_state.find(cfd->GetID()) == curr_state.end()); - curr_state.emplace(std::make_pair( + curr_state.emplace( cfd->GetID(), - MutableCFState(cfd->GetLogNumber(), cfd->GetFullHistoryTsLow()))); + MutableCFState(cfd->GetLogNumber(), cfd->GetFullHistoryTsLow())); } for (const auto& wal : wals_.GetWals()) { @@ -7277,7 +7285,7 @@ ReactiveVersionSet::ReactiveVersionSet( /*db_session_id*/ "", /*daily_offpeak_time_utc*/ "", /*error_handler=*/nullptr) {} -ReactiveVersionSet::~ReactiveVersionSet() {} +ReactiveVersionSet::~ReactiveVersionSet() = default; Status ReactiveVersionSet::Recover( const std::vector& column_families, diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 5eb910c9f..f925ec36e 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -34,7 +34,7 @@ class GenerateLevelFilesBriefTest : public testing::Test { LevelFilesBrief file_level_; Arena arena_; - GenerateLevelFilesBriefTest() {} + GenerateLevelFilesBriefTest() = default; ~GenerateLevelFilesBriefTest() override { for (size_t i = 0; i < files_.size(); i++) { @@ -213,7 +213,7 @@ class VersionStorageInfoTest : public VersionStorageInfoTestBase { public: VersionStorageInfoTest() : VersionStorageInfoTestBase(BytewiseComparator()) {} - ~VersionStorageInfoTest() override {} + ~VersionStorageInfoTest() override = default; }; TEST_F(VersionStorageInfoTest, MaxBytesForLevelStatic) { @@ -928,7 +928,7 @@ class VersionStorageInfoTimestampTest : public VersionStorageInfoTestBase { VersionStorageInfoTimestampTest() : VersionStorageInfoTestBase(test::BytewiseComparatorWithU64TsWrapper()) { } - ~VersionStorageInfoTimestampTest() override {} + ~VersionStorageInfoTimestampTest() override = default; std::string Timestamp(uint64_t ts) const { std::string ret; PutFixed64(&ret, ts); @@ -982,7 +982,7 @@ class FindLevelFileTest : public testing::Test { FindLevelFileTest() : disjoint_sorted_files_(true) {} - ~FindLevelFileTest() override {} + ~FindLevelFileTest() override = default; void LevelFileInit(size_t num = 0) { char* mem = arena_.AllocateAligned(num * sizeof(FdWithKeyRange)); @@ -3662,7 +3662,7 @@ class ChargeFileMetadataTestWithParam : public ChargeFileMetadataTest, public testing::WithParamInterface { public: - ChargeFileMetadataTestWithParam() {} + ChargeFileMetadataTestWithParam() = default; }; INSTANTIATE_TEST_CASE_P( diff --git a/db/wide/db_wide_basic_test.cc b/db/wide/db_wide_basic_test.cc index 2280a3ed2..15d2fdff7 100644 --- a/db/wide/db_wide_basic_test.cc +++ b/db/wide/db_wide_basic_test.cc @@ -94,7 +94,7 @@ TEST_F(DBWideBasicTest, PutEntity) { std::array statuses; db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, - &keys[0], &values[0], &statuses[0]); + keys.data(), values.data(), statuses.data()); ASSERT_OK(statuses[0]); ASSERT_EQ(values[0], first_value_of_default_column); @@ -114,7 +114,7 @@ TEST_F(DBWideBasicTest, PutEntity) { std::array statuses; db_->MultiGetEntity(ReadOptions(), db_->DefaultColumnFamily(), num_keys, - &keys[0], &results[0], &statuses[0]); + keys.data(), results.data(), statuses.data()); ASSERT_OK(statuses[0]); ASSERT_EQ(results[0].columns(), first_columns); @@ -398,8 +398,8 @@ TEST_F(DBWideBasicTest, MultiCFMultiGetEntity) { std::array results; std::array statuses; - db_->MultiGetEntity(ReadOptions(), num_keys, &column_families[0], &keys[0], - &results[0], &statuses[0]); + db_->MultiGetEntity(ReadOptions(), num_keys, column_families.data(), + keys.data(), results.data(), statuses.data()); ASSERT_OK(statuses[0]); ASSERT_EQ(results[0].columns(), first_columns); @@ -642,7 +642,7 @@ TEST_F(DBWideBasicTest, MergePlainKeyValue) { std::array statuses; db_->MultiGetEntity(ReadOptions(), db_->DefaultColumnFamily(), num_keys, - &keys[0], &results[0], &statuses[0]); + keys.data(), results.data(), statuses.data()); ASSERT_OK(statuses[0]); ASSERT_EQ(results[0].columns(), expected_first_columns); @@ -822,7 +822,7 @@ TEST_F(DBWideBasicTest, MergeEntity) { std::array statuses; db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys, - &keys[0], &values[0], &statuses[0]); + keys.data(), values.data(), statuses.data()); ASSERT_EQ(values[0], first_expected_default); ASSERT_OK(statuses[0]); @@ -839,7 +839,7 @@ TEST_F(DBWideBasicTest, MergeEntity) { std::array statuses; db_->MultiGetEntity(ReadOptions(), db_->DefaultColumnFamily(), num_keys, - &keys[0], &results[0], &statuses[0]); + keys.data(), results.data(), statuses.data()); ASSERT_OK(statuses[0]); ASSERT_EQ(results[0].columns(), first_expected_columns); @@ -900,7 +900,7 @@ TEST_F(DBWideBasicTest, MergeEntity) { int number_of_operands = 0; ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - first_key, &merge_operands[0], + first_key, merge_operands.data(), &get_merge_opts, &number_of_operands)); ASSERT_EQ(number_of_operands, num_merge_operands); @@ -913,7 +913,7 @@ TEST_F(DBWideBasicTest, MergeEntity) { int number_of_operands = 0; ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - second_key, &merge_operands[0], + second_key, merge_operands.data(), &get_merge_opts, &number_of_operands)); ASSERT_EQ(number_of_operands, num_merge_operands); @@ -933,7 +933,7 @@ TEST_F(DBWideBasicTest, MergeEntity) { int number_of_operands = 0; ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - first_key, &merge_operands[0], + first_key, merge_operands.data(), &get_merge_opts, &number_of_operands)); ASSERT_EQ(number_of_operands, num_merge_operands); @@ -945,7 +945,7 @@ TEST_F(DBWideBasicTest, MergeEntity) { int number_of_operands = 0; ASSERT_OK(db_->GetMergeOperands(ReadOptions(), db_->DefaultColumnFamily(), - second_key, &merge_operands[0], + second_key, merge_operands.data(), &get_merge_opts, &number_of_operands)); ASSERT_EQ(number_of_operands, num_merge_operands); diff --git a/db/write_batch.cc b/db/write_batch.cc index d5c6df3d6..e3c2c5635 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -233,9 +233,9 @@ WriteBatch& WriteBatch::operator=(WriteBatch&& src) { return *this; } -WriteBatch::~WriteBatch() {} +WriteBatch::~WriteBatch() = default; -WriteBatch::Handler::~Handler() {} +WriteBatch::Handler::~Handler() = default; void WriteBatch::Handler::LogData(const Slice& /*blob*/) { // If the user has not specified something to do with blobs, then we ignore @@ -741,7 +741,7 @@ SequenceNumber WriteBatchInternal::Sequence(const WriteBatch* b) { } void WriteBatchInternal::SetSequence(WriteBatch* b, SequenceNumber seq) { - EncodeFixed64(&b->rep_[0], seq); + EncodeFixed64(b->rep_.data(), seq); } size_t WriteBatchInternal::GetFirstOffset(WriteBatch* /*b*/) { @@ -1856,7 +1856,9 @@ class MemTableInserter : public WriteBatch::Handler { } void DecrementProtectionInfoIdxForTryAgain() { - if (prot_info_ != nullptr) --prot_info_idx_; + if (prot_info_ != nullptr) { + --prot_info_idx_; + } } void ResetProtectionInfo() { @@ -3018,7 +3020,7 @@ class ProtectionInfoUpdater : public WriteBatch::Handler { explicit ProtectionInfoUpdater(WriteBatch::ProtectionInfo* prot_info) : prot_info_(prot_info) {} - ~ProtectionInfoUpdater() override {} + ~ProtectionInfoUpdater() override = default; Status PutCF(uint32_t cf, const Slice& key, const Slice& val) override { return UpdateProtInfo(cf, key, val, kTypeValue); diff --git a/db/write_callback_test.cc b/db/write_callback_test.cc index 1be8593f1..7709257f0 100644 --- a/db/write_callback_test.cc +++ b/db/write_callback_test.cc @@ -64,7 +64,7 @@ class MockWriteCallback : public WriteCallback { bool allow_batching_ = false; std::atomic was_called_{false}; - MockWriteCallback() {} + MockWriteCallback() = default; MockWriteCallback(const MockWriteCallback& other) { should_fail_ = other.should_fail_; @@ -111,7 +111,7 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) { WriteOP(bool should_fail = false) { callback_.should_fail_ = should_fail; } void Put(const string& key, const string& val) { - kvs_.push_back(std::make_pair(key, val)); + kvs_.emplace_back(key, val); ASSERT_OK(write_batch_.Put(key, val)); } @@ -176,8 +176,7 @@ TEST_P(WriteCallbackPTest, WriteWithCallbackTest) { DBOptions db_options(options); ColumnFamilyOptions cf_options(options); std::vector column_families; - column_families.push_back( - ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); + column_families.emplace_back(kDefaultColumnFamilyName, cf_options); std::vector handles; auto open_s = DBImpl::Open(db_options, dbname, column_families, &handles, &db, seq_per_batch_, true /* batch_per_txn */); diff --git a/db_stress_tool/batched_ops_stress.cc b/db_stress_tool/batched_ops_stress.cc index 7fb89b60b..25e4d2372 100644 --- a/db_stress_tool/batched_ops_stress.cc +++ b/db_stress_tool/batched_ops_stress.cc @@ -13,8 +13,8 @@ namespace ROCKSDB_NAMESPACE { class BatchedOpsStressTest : public StressTest { public: - BatchedOpsStressTest() {} - virtual ~BatchedOpsStressTest() {} + BatchedOpsStressTest() = default; + virtual ~BatchedOpsStressTest() = default; bool IsStateTracked() const override { return false; } diff --git a/db_stress_tool/cf_consistency_stress.cc b/db_stress_tool/cf_consistency_stress.cc index a7b0895f3..da382ae3b 100644 --- a/db_stress_tool/cf_consistency_stress.cc +++ b/db_stress_tool/cf_consistency_stress.cc @@ -16,7 +16,7 @@ class CfConsistencyStressTest : public StressTest { public: CfConsistencyStressTest() : batch_id_(0) {} - ~CfConsistencyStressTest() override {} + ~CfConsistencyStressTest() override = default; bool IsStateTracked() const override { return false; } @@ -232,7 +232,7 @@ class CfConsistencyStressTest : public StressTest { } db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(), statuses.data()); - for (auto s : statuses) { + for (const auto& s : statuses) { if (s.ok()) { // found case thread->stats.AddGets(1, 1); diff --git a/db_stress_tool/db_stress_listener.cc b/db_stress_tool/db_stress_listener.cc index e2838c582..64adca877 100644 --- a/db_stress_tool/db_stress_listener.cc +++ b/db_stress_tool/db_stress_listener.cc @@ -67,7 +67,7 @@ UniqueIdVerifier::UniqueIdVerifier(const std::string& db_name, Env* env) std::string id(24U, '\0'); Slice result; for (;;) { - s = reader->Read(id.size(), opts, &result, &id[0], /*dbg*/ nullptr); + s = reader->Read(id.size(), opts, &result, id.data(), /*dbg*/ nullptr); if (!s.ok()) { fprintf(stderr, "Error reading unique id file: %s\n", s.ToString().c_str()); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 20077558f..f2b4f50b8 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1784,7 +1784,7 @@ Status StressTest::TestBackupRestore( // `ListColumnFamilies` to get names because it won't necessarily give // the same order as `column_family_names_`. assert(FLAGS_clear_column_family_one_in == 0); - for (auto name : column_family_names_) { + for (const auto& name : column_family_names_) { cf_descriptors.emplace_back(name, ColumnFamilyOptions(restore_options)); } if (inplace_not_restore) { @@ -2762,12 +2762,12 @@ void StressTest::Open(SharedState* shared, bool reopen) { if (sorted_cfn != existing_column_families) { fprintf(stderr, "Expected column families differ from the existing:\n"); fprintf(stderr, "Expected: {"); - for (auto cf : sorted_cfn) { + for (const auto& cf : sorted_cfn) { fprintf(stderr, "%s ", cf.c_str()); } fprintf(stderr, "}\n"); fprintf(stderr, "Existing: {"); - for (auto cf : existing_column_families) { + for (const auto& cf : existing_column_families) { fprintf(stderr, "%s ", cf.c_str()); } fprintf(stderr, "}\n"); @@ -2775,7 +2775,7 @@ void StressTest::Open(SharedState* shared, bool reopen) { assert(sorted_cfn == existing_column_families); } std::vector cf_descriptors; - for (auto name : column_family_names_) { + for (const auto& name : column_family_names_) { if (name != kDefaultColumnFamilyName) { new_column_family_name_ = std::max(new_column_family_name_.load(), std::stoi(name) + 1); diff --git a/db_stress_tool/expected_state.cc b/db_stress_tool/expected_state.cc index b483e154c..705cd4f54 100644 --- a/db_stress_tool/expected_state.cc +++ b/db_stress_tool/expected_state.cc @@ -185,7 +185,7 @@ ExpectedStateManager::ExpectedStateManager(size_t max_key, num_column_families_(num_column_families), latest_(nullptr) {} -ExpectedStateManager::~ExpectedStateManager() {} +ExpectedStateManager::~ExpectedStateManager() = default; const std::string FileExpectedStateManager::kLatestBasename = "LATEST"; const std::string FileExpectedStateManager::kStateFilenameSuffix = ".state"; diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc index c7d38339b..145a96a75 100644 --- a/db_stress_tool/multi_ops_txns_stress.cc +++ b/db_stress_tool/multi_ops_txns_stress.cc @@ -150,7 +150,7 @@ std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey(uint32_t a) { PutFixed32(&ret, kPrimaryIndexId); PutFixed32(&ret, a); - char* const buf = &ret[0]; + char* const buf = ret.data(); std::reverse(buf, buf + sizeof(kPrimaryIndexId)); std::reverse(buf + sizeof(kPrimaryIndexId), buf + sizeof(kPrimaryIndexId) + sizeof(a)); @@ -162,7 +162,7 @@ std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c) { PutFixed32(&ret, kSecondaryIndexId); PutFixed32(&ret, c); - char* const buf = &ret[0]; + char* const buf = ret.data(); std::reverse(buf, buf + sizeof(kSecondaryIndexId)); std::reverse(buf + sizeof(kSecondaryIndexId), buf + sizeof(kSecondaryIndexId) + sizeof(c)); @@ -176,7 +176,7 @@ std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c, PutFixed32(&ret, c); PutFixed32(&ret, a); - char* const buf = &ret[0]; + char* const buf = ret.data(); std::reverse(buf, buf + sizeof(kSecondaryIndexId)); std::reverse(buf + sizeof(kSecondaryIndexId), buf + sizeof(kSecondaryIndexId) + sizeof(c)); diff --git a/db_stress_tool/no_batched_ops_stress.cc b/db_stress_tool/no_batched_ops_stress.cc index 27a20fd5a..0e6efbbc3 100644 --- a/db_stress_tool/no_batched_ops_stress.cc +++ b/db_stress_tool/no_batched_ops_stress.cc @@ -17,9 +17,9 @@ namespace ROCKSDB_NAMESPACE { class NonBatchedOpsStressTest : public StressTest { public: - NonBatchedOpsStressTest() {} + NonBatchedOpsStressTest() = default; - virtual ~NonBatchedOpsStressTest() {} + virtual ~NonBatchedOpsStressTest() = default; void VerifyDb(ThreadState* thread) const override { // This `ReadOptions` is for validation purposes. Ignore @@ -624,7 +624,7 @@ class NonBatchedOpsStressTest : public StressTest { if (!shared->AllowsOverwrite(rand_key) && shared->Exists(column_family, rand_key)) { // Just do read your write checks for keys that allow overwrites. - ryw_expected_values.push_back(std::nullopt); + ryw_expected_values.emplace_back(std::nullopt); continue; } // With a 1 in 10 probability, insert the just added key in the batch @@ -667,7 +667,7 @@ class NonBatchedOpsStressTest : public StressTest { thread->shared->SafeTerminate(); } } else { - ryw_expected_values.push_back(std::nullopt); + ryw_expected_values.emplace_back(std::nullopt); } } } diff --git a/env/composite_env.cc b/env/composite_env.cc index 8ddc9a1a6..59434785c 100644 --- a/env/composite_env.cc +++ b/env/composite_env.cc @@ -504,7 +504,7 @@ EnvWrapper::EnvWrapper(const std::shared_ptr& t) : target_(t) { RegisterOptions("", &target_, &env_wrapper_type_info); } -EnvWrapper::~EnvWrapper() {} +EnvWrapper::~EnvWrapper() = default; Status EnvWrapper::PrepareOptions(const ConfigOptions& options) { target_.Prepare(); diff --git a/env/env.cc b/env/env.cc index 40493b478..8ad828a83 100644 --- a/env/env.cc +++ b/env/env.cc @@ -359,7 +359,7 @@ class LegacyFileSystemWrapper : public FileSystem { public: // Initialize an EnvWrapper that delegates all calls to *t explicit LegacyFileSystemWrapper(Env* t) : target_(t) {} - ~LegacyFileSystemWrapper() override {} + ~LegacyFileSystemWrapper() override = default; static const char* kClassName() { return "LegacyFileSystem"; } const char* Name() const override { return kClassName(); } @@ -624,7 +624,7 @@ Env::Env(const std::shared_ptr& fs, const std::shared_ptr& clock) : thread_status_updater_(nullptr), file_system_(fs), system_clock_(clock) {} -Env::~Env() {} +Env::~Env() = default; Status Env::NewLogger(const std::string& fname, std::shared_ptr* result) { @@ -797,7 +797,7 @@ std::string Env::GenerateUniqueId() { // Use 36 character format of RFC 4122 result.resize(36U); - char* buf = &result[0]; + char* buf = result.data(); PutBaseChars<16>(&buf, 8, upper >> 32, /*!uppercase*/ false); *(buf++) = '-'; PutBaseChars<16>(&buf, 4, upper >> 16, /*!uppercase*/ false); @@ -817,15 +817,15 @@ std::string Env::GenerateUniqueId() { return result; } -SequentialFile::~SequentialFile() {} +SequentialFile::~SequentialFile() = default; -RandomAccessFile::~RandomAccessFile() {} +RandomAccessFile::~RandomAccessFile() = default; -WritableFile::~WritableFile() {} +WritableFile::~WritableFile() = default; -MemoryMappedFileBuffer::~MemoryMappedFileBuffer() {} +MemoryMappedFileBuffer::~MemoryMappedFileBuffer() = default; -Logger::~Logger() {} +Logger::~Logger() = default; Status Logger::Close() { if (!closed_) { @@ -838,7 +838,7 @@ Status Logger::Close() { Status Logger::CloseImpl() { return Status::NotSupported(); } -FileLock::~FileLock() {} +FileLock::~FileLock() = default; void LogFlush(Logger* info_log) { if (info_log) { diff --git a/env/env_basic_test.cc b/env/env_basic_test.cc index 93bb2dba0..6a3b0390a 100644 --- a/env/env_basic_test.cc +++ b/env/env_basic_test.cc @@ -342,7 +342,7 @@ TEST_P(EnvMoreTestWithParam, GetChildren) { ASSERT_OK(env_->GetChildrenFileAttributes(test_dir_, &childAttr)); ASSERT_EQ(3U, children.size()); ASSERT_EQ(3U, childAttr.size()); - for (auto each : children) { + for (const auto& each : children) { env_->DeleteDir(test_dir_ + "/" + each).PermitUncheckedError(); } // necessary for default POSIX env diff --git a/env/env_chroot.cc b/env/env_chroot.cc index 5ff32a7e4..93dd7acd1 100644 --- a/env/env_chroot.cc +++ b/env/env_chroot.cc @@ -7,10 +7,11 @@ #include "env/env_chroot.h" -#include // errno -#include // realpath, free #include // geteuid +#include // errno +#include // realpath, free + #include "env/composite_env_wrapper.h" #include "env/fs_remap.h" #include "rocksdb/utilities/options_type.h" diff --git a/env/env_encryption.cc b/env/env_encryption.cc index 7b2a531c4..dd6ca54ea 100644 --- a/env/env_encryption.cc +++ b/env/env_encryption.cc @@ -827,7 +827,7 @@ Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char* data, AllocateScratch(scratch); // Encrypt individual blocks. - while (1) { + while (true) { char* block = data; size_t n = std::min(dataSize, blockSize - blockOffset); if (n != blockSize) { @@ -871,7 +871,7 @@ Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char* data, AllocateScratch(scratch); // Decrypt individual blocks. - while (1) { + while (true) { char* block = data; size_t n = std::min(dataSize, blockSize - blockOffset); if (n != blockSize) { diff --git a/env/env_posix.cc b/env/env_posix.cc index ae2f90360..9a02a7d11 100644 --- a/env/env_posix.cc +++ b/env/env_posix.cc @@ -14,19 +14,21 @@ #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION #include #endif -#include #include +#include + #if defined(ROCKSDB_IOURING_PRESENT) #include #endif #include -#include -#include -#include -#include #include #include + +#include +#include +#include +#include #if defined(OS_LINUX) || defined(OS_SOLARIS) || defined(OS_ANDROID) #include #endif @@ -36,10 +38,10 @@ #if defined(ROCKSDB_IOURING_PRESENT) #include #endif -#include #include #include +#include // Get nano time includes #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) #elif defined(__MACH__) @@ -199,7 +201,7 @@ class PosixClock : public SystemClock { std::string dummy; dummy.reserve(maxsize); dummy.resize(maxsize); - char* p = &dummy[0]; + char* p = dummy.data(); port::LocalTimeR(&seconds, &t); snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec); diff --git a/env/env_test.cc b/env/env_test.cc index 1bd176fb0..4cf3c988d 100644 --- a/env/env_test.cc +++ b/env/env_test.cc @@ -26,13 +26,14 @@ #ifdef OS_LINUX #include #include -#include #include #include + +#include #endif #ifdef ROCKSDB_FALLOCATE_PRESENT -#include +#include #endif #include "db/db_impl/db_impl.h" @@ -2955,7 +2956,7 @@ struct NoDuplicateMiniStressTest { NoDuplicateMiniStressTest() { env = Env::Default(); } - virtual ~NoDuplicateMiniStressTest() {} + virtual ~NoDuplicateMiniStressTest() = default; void Run() { std::array threads; diff --git a/env/file_system.cc b/env/file_system.cc index 71fb4d5bc..e01ec12c9 100644 --- a/env/file_system.cc +++ b/env/file_system.cc @@ -22,9 +22,9 @@ namespace ROCKSDB_NAMESPACE { -FileSystem::FileSystem() {} +FileSystem::FileSystem() = default; -FileSystem::~FileSystem() {} +FileSystem::~FileSystem() = default; static int RegisterBuiltinFileSystems(ObjectLibrary& library, const std::string& /*arg*/) { diff --git a/env/fs_posix.cc b/env/fs_posix.cc index dd2f74935..5f403e61e 100644 --- a/env/fs_posix.cc +++ b/env/fs_posix.cc @@ -13,16 +13,17 @@ #ifndef ROCKSDB_NO_DYNAMIC_EXTENSION #include #endif -#include #include #include -#include -#include -#include -#include #include #include #include + +#include +#include +#include +#include +#include #if defined(OS_LINUX) || defined(OS_SOLARIS) || defined(OS_ANDROID) #include #include @@ -30,9 +31,9 @@ #include #include #include -#include #include +#include // Get nano time includes #if defined(OS_LINUX) || defined(OS_FREEBSD) #elif defined(__MACH__) @@ -143,7 +144,7 @@ class PosixFileSystem : public FileSystem { const char* Name() const override { return kClassName(); } const char* NickName() const override { return kDefaultName(); } - ~PosixFileSystem() override {} + ~PosixFileSystem() override = default; bool IsInstanceOf(const std::string& name) const override { if (name == "posix") { return true; diff --git a/env/io_posix.cc b/env/io_posix.cc index 0ec0e9c83..7fde1d020 100644 --- a/env/io_posix.cc +++ b/env/io_posix.cc @@ -10,23 +10,24 @@ #ifdef ROCKSDB_LIB_IO_POSIX #include "env/io_posix.h" -#include #include #include +#include #if defined(OS_LINUX) #include #ifndef FALLOC_FL_KEEP_SIZE #include #endif #endif -#include -#include -#include #include #include #include #include + +#include +#include +#include #ifdef OS_LINUX #include #include @@ -437,7 +438,7 @@ void LogicalBlockSizeCache::UnrefAndTryRemoveCachedLogicalBlockSize( size_t LogicalBlockSizeCache::GetLogicalBlockSize(const std::string& fname, int fd) { - std::string dir = fname.substr(0, fname.find_last_of("/")); + std::string dir = fname.substr(0, fname.find_last_of('/')); if (dir.empty()) { dir = "/"; } @@ -654,7 +655,9 @@ IOStatus PosixRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs, size_t this_reqs = (num_reqs - reqs_off) + incomplete_rq_list.size(); // If requests exceed depth, split it into batches - if (this_reqs > kIoUringDepth) this_reqs = kIoUringDepth; + if (this_reqs > kIoUringDepth) { + this_reqs = kIoUringDepth; + } assert(incomplete_rq_list.size() <= this_reqs); for (size_t i = 0; i < this_reqs; i++) { diff --git a/env/mock_env.cc b/env/mock_env.cc index c232af61e..e206593a2 100644 --- a/env/mock_env.cc +++ b/env/mock_env.cc @@ -483,7 +483,7 @@ class TestMemLogger : public Logger { options_(options), dbg_(dbg), flush_pending_(false) {} - ~TestMemLogger() override {} + ~TestMemLogger() override = default; void Flush() override { if (flush_pending_) { diff --git a/env/mock_env_test.cc b/env/mock_env_test.cc index be174bd73..23c4baa12 100644 --- a/env/mock_env_test.cc +++ b/env/mock_env_test.cc @@ -37,28 +37,28 @@ TEST_F(MockEnvTest, Corrupt) { Slice result; std::unique_ptr rand_file; ASSERT_OK(env_->NewRandomAccessFile(kFileName, &rand_file, soptions_)); - ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); + ASSERT_OK(rand_file->Read(0, kGood.size(), &result, scratch.data())); ASSERT_EQ(result.compare(kGood), 0); // Sync + corrupt => no change ASSERT_OK(writable_file->Fsync()); ASSERT_OK(dynamic_cast(env_)->CorruptBuffer(kFileName)); result.clear(); - ASSERT_OK(rand_file->Read(0, kGood.size(), &result, &(scratch[0]))); + ASSERT_OK(rand_file->Read(0, kGood.size(), &result, scratch.data())); ASSERT_EQ(result.compare(kGood), 0); // Add new data and corrupt it ASSERT_OK(writable_file->Append(kCorrupted)); ASSERT_TRUE(writable_file->GetFileSize() == kGood.size() + kCorrupted.size()); result.clear(); - ASSERT_OK( - rand_file->Read(kGood.size(), kCorrupted.size(), &result, &(scratch[0]))); + ASSERT_OK(rand_file->Read(kGood.size(), kCorrupted.size(), &result, + scratch.data())); ASSERT_EQ(result.compare(kCorrupted), 0); // Corrupted ASSERT_OK(dynamic_cast(env_)->CorruptBuffer(kFileName)); result.clear(); - ASSERT_OK( - rand_file->Read(kGood.size(), kCorrupted.size(), &result, &(scratch[0]))); + ASSERT_OK(rand_file->Read(kGood.size(), kCorrupted.size(), &result, + scratch.data())); ASSERT_NE(result.compare(kCorrupted), 0); } diff --git a/file/delete_scheduler.cc b/file/delete_scheduler.cc index 78ea6f7fe..9e7dd3d60 100644 --- a/file/delete_scheduler.cc +++ b/file/delete_scheduler.cc @@ -177,7 +177,7 @@ Status DeleteScheduler::CleanupDirectory(Env* env, SstFileManagerImpl* sfm, Status DeleteScheduler::MarkAsTrash(const std::string& file_path, std::string* trash_file) { // Sanity check of the path - size_t idx = file_path.rfind("/"); + size_t idx = file_path.rfind('/'); if (idx == std::string::npos || idx == file_path.size() - 1) { return Status::InvalidArgument("file_path is corrupted"); } diff --git a/file/filename.cc b/file/filename.cc index 1e04c7339..fb7d25472 100644 --- a/file/filename.cc +++ b/file/filename.cc @@ -8,10 +8,9 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "file/filename.h" -#include -#include - +#include #include +#include #include #include "file/writable_file_writer.h" diff --git a/file/random_access_file_reader_test.cc b/file/random_access_file_reader_test.cc index 6b7b7eb68..f081795b9 100644 --- a/file/random_access_file_reader_test.cc +++ b/file/random_access_file_reader_test.cc @@ -425,7 +425,9 @@ TEST(FSReadRequest, TryMerge) { src.scratch = nullptr; ASSERT_OK(src.status); - if (reverse) std::swap(dest, src); + if (reverse) { + std::swap(dest, src); + } ASSERT_TRUE(TryMerge(&dest, src)); ASSERT_EQ(dest.offset, 0); ASSERT_EQ(dest.len, 10); @@ -448,7 +450,9 @@ TEST(FSReadRequest, TryMerge) { src.scratch = nullptr; ASSERT_OK(src.status); - if (reverse) std::swap(dest, src); + if (reverse) { + std::swap(dest, src); + } ASSERT_TRUE(TryMerge(&dest, src)); ASSERT_EQ(dest.offset, 0); ASSERT_EQ(dest.len, 10); @@ -471,7 +475,9 @@ TEST(FSReadRequest, TryMerge) { src.scratch = nullptr; ASSERT_OK(src.status); - if (reverse) std::swap(dest, src); + if (reverse) { + std::swap(dest, src); + } ASSERT_TRUE(TryMerge(&dest, src)); ASSERT_EQ(dest.offset, 0); ASSERT_EQ(dest.len, 10); From 2045fe4693dd8957796ccce3a4abbf3f689226b3 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Mon, 4 Dec 2023 13:20:28 -0800 Subject: [PATCH 315/386] Mention PR 11892 in the changelog (#12118) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12118 Reviewed By: jaykorean Differential Revision: D51820703 fbshipit-source-id: d2a86a4781618747c6b7c71971862d510a25e103 --- unreleased_history/new_features/wbwi_wide_columns.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 unreleased_history/new_features/wbwi_wide_columns.md diff --git a/unreleased_history/new_features/wbwi_wide_columns.md b/unreleased_history/new_features/wbwi_wide_columns.md new file mode 100644 index 000000000..908279e1c --- /dev/null +++ b/unreleased_history/new_features/wbwi_wide_columns.md @@ -0,0 +1 @@ +Added initial wide-column support in `WriteBatchWithIndex`. This includes the `PutEntity` API and support for wide columns in the existing read APIs (`GetFromBatch`, `GetFromBatchAndDB`, `MultiGetFromBatchAndDB`, and `BaseDeltaIterator`). From 0ebe1614cb657000127da2dc490fc7a2a706d2f7 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Tue, 5 Dec 2023 14:07:42 -0800 Subject: [PATCH 316/386] Eliminate some code duplication in MergeHelper (#12121) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12121 The patch eliminates some code duplication by unifying the two sets of `MergeHelper::TimedFullMerge` overloads using variadic templates. It also brings the order of parameters into sync when it comes to the various `TimedFullMerge*` methods. Reviewed By: jaykorean Differential Revision: D51862483 fbshipit-source-id: e3f832a6ff89ba34591451655cf11025d0a0d018 --- db/db_iter.cc | 12 +- db/memtable.cc | 16 +- db/merge_helper.cc | 174 ++---------------- db/merge_helper.h | 98 +++++----- db/version_set.cc | 9 +- db/write_batch.cc | 10 +- table/get_context.cc | 16 +- .../write_batch_with_index_internal.h | 8 +- 8 files changed, 103 insertions(+), 240 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index 418c538d4..991ec8fc4 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -1293,8 +1293,8 @@ bool DBIter::MergeWithNoBaseValue(const Slice& user_key) { const Status s = MergeHelper::TimedFullMerge( merge_operator_, user_key, MergeHelper::kNoBaseValue, merge_context_.GetOperands(), logger_, statistics_, clock_, - /* update_num_ops_stats */ true, &saved_value_, &pinned_value_, - &result_type, /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + &saved_value_, &pinned_value_, &result_type); return SetValueAndColumnsFromMergeResult(s, result_type); } @@ -1306,8 +1306,8 @@ bool DBIter::MergeWithPlainBaseValue(const Slice& value, const Status s = MergeHelper::TimedFullMerge( merge_operator_, user_key, MergeHelper::kPlainBaseValue, value, merge_context_.GetOperands(), logger_, statistics_, clock_, - /* update_num_ops_stats */ true, &saved_value_, &pinned_value_, - &result_type, /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + &saved_value_, &pinned_value_, &result_type); return SetValueAndColumnsFromMergeResult(s, result_type); } @@ -1319,8 +1319,8 @@ bool DBIter::MergeWithWideColumnBaseValue(const Slice& entity, const Status s = MergeHelper::TimedFullMerge( merge_operator_, user_key, MergeHelper::kWideBaseValue, entity, merge_context_.GetOperands(), logger_, statistics_, clock_, - /* update_num_ops_stats */ true, &saved_value_, &pinned_value_, - &result_type, /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + &saved_value_, &pinned_value_, &result_type); return SetValueAndColumnsFromMergeResult(s, result_type); } diff --git a/db/memtable.cc b/db/memtable.cc index fe5b4b39e..0b8786bc2 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -1061,8 +1061,8 @@ static bool SaveValue(void* arg, const char* entry) { merge_operator, s->key->user_key(), MergeHelper::kPlainBaseValue, v, merge_context->GetOperands(), s->logger, s->statistics, s->clock, - /* update_num_ops_stats */ true, s->value, s->columns, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + s->value, s->columns); } } else if (s->value) { s->value->assign(v.data(), v.size()); @@ -1114,8 +1114,8 @@ static bool SaveValue(void* arg, const char* entry) { *(s->status) = MergeHelper::TimedFullMerge( merge_operator, s->key->user_key(), MergeHelper::kWideBaseValue, v, merge_context->GetOperands(), s->logger, s->statistics, - s->clock, /* update_num_ops_stats */ true, s->value, s->columns, - /* op_failure_scope */ nullptr); + s->clock, /* update_num_ops_stats */ true, + /* op_failure_scope */ nullptr, s->value, s->columns); } } else if (s->value) { Slice value_of_default; @@ -1152,8 +1152,8 @@ static bool SaveValue(void* arg, const char* entry) { *(s->status) = MergeHelper::TimedFullMerge( merge_operator, s->key->user_key(), MergeHelper::kNoBaseValue, merge_context->GetOperands(), s->logger, s->statistics, - s->clock, /* update_num_ops_stats */ true, s->value, s->columns, - /* op_failure_scope */ nullptr); + s->clock, /* update_num_ops_stats */ true, + /* op_failure_scope */ nullptr, s->value, s->columns); } else { // We have found a final value (a base deletion) and have newer // merge operands that we do not intend to merge. Nothing remains @@ -1192,8 +1192,8 @@ static bool SaveValue(void* arg, const char* entry) { *(s->status) = MergeHelper::TimedFullMerge( merge_operator, s->key->user_key(), MergeHelper::kNoBaseValue, merge_context->GetOperands(), s->logger, s->statistics, - s->clock, /* update_num_ops_stats */ true, s->value, s->columns, - /* op_failure_scope */ nullptr); + s->clock, /* update_num_ops_stats */ true, + /* op_failure_scope */ nullptr, s->value, s->columns); } *(s->found_final_value) = true; diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 2c5e2fe7d..703909010 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -12,7 +12,6 @@ #include "db/blob/prefetch_buffer_collection.h" #include "db/compaction/compaction_iteration_stats.h" #include "db/dbformat.h" -#include "db/wide/wide_column_serialization.h" #include "db/wide/wide_columns_helper.h" #include "logging/logging.h" #include "monitoring/perf_context_imp.h" @@ -111,9 +110,9 @@ Status MergeHelper::TimedFullMergeImpl( const MergeOperator* merge_operator, const Slice& key, MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, const std::vector& operands, Logger* logger, Statistics* statistics, - SystemClock* clock, bool update_num_ops_stats, std::string* result, - Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope) { + SystemClock* clock, bool update_num_ops_stats, + MergeOperator::OpFailureScope* op_failure_scope, std::string* result, + Slice* result_operand, ValueType* result_type) { assert(result); assert(result_type); @@ -173,9 +172,9 @@ Status MergeHelper::TimedFullMergeImpl( const MergeOperator* merge_operator, const Slice& key, MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, const std::vector& operands, Logger* logger, Statistics* statistics, - SystemClock* clock, bool update_num_ops_stats, std::string* result_value, - PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope) { + SystemClock* clock, bool update_num_ops_stats, + MergeOperator::OpFailureScope* op_failure_scope, std::string* result_value, + PinnableWideColumns* result_entity) { assert(result_value || result_entity); assert(!result_value || !result_entity); @@ -245,141 +244,6 @@ Status MergeHelper::TimedFullMergeImpl( op_failure_scope, std::move(visitor)); } -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, NoBaseValueTag, - const std::vector& operands, Logger* logger, Statistics* statistics, - SystemClock* clock, bool update_num_ops_stats, std::string* result, - Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result, result_operand, - result_type, op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, - const Slice& value, const std::vector& operands, Logger* logger, - Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result, Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value(value); - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result, result_operand, - result_type, op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, - const Slice& entity, const std::vector& operands, Logger* logger, - Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result, Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - - Slice entity_copy(entity); - WideColumns existing_columns; - - const Status s = - WideColumnSerialization::Deserialize(entity_copy, existing_columns); - if (!s.ok()) { - return s; - } - - existing_value = std::move(existing_columns); - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result, result_operand, - result_type, op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, - const WideColumns& columns, const std::vector& operands, - Logger* logger, Statistics* statistics, SystemClock* clock, - bool update_num_ops_stats, std::string* result, Slice* result_operand, - ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value(columns); - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result, result_operand, - result_type, op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, NoBaseValueTag, - const std::vector& operands, Logger* logger, Statistics* statistics, - SystemClock* clock, bool update_num_ops_stats, std::string* result_value, - PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result_value, result_entity, - op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, - const Slice& value, const std::vector& operands, Logger* logger, - Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value(value); - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result_value, result_entity, - op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, - const Slice& entity, const std::vector& operands, Logger* logger, - Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - - Slice entity_copy(entity); - WideColumns existing_columns; - - const Status s = - WideColumnSerialization::Deserialize(entity_copy, existing_columns); - if (!s.ok()) { - return s; - } - - existing_value = std::move(existing_columns); - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result_value, result_entity, - op_failure_scope); -} - -Status MergeHelper::TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, - const WideColumns& columns, const std::vector& operands, - Logger* logger, Statistics* statistics, SystemClock* clock, - bool update_num_ops_stats, std::string* result_value, - PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope) { - MergeOperator::MergeOperationInputV3::ExistingValue existing_value(columns); - - return TimedFullMergeImpl(merge_operator, key, std::move(existing_value), - operands, logger, statistics, clock, - update_num_ops_stats, result_value, result_entity, - op_failure_scope); -} - // PRE: iter points to the first merge type entry // POST: iter points to the first entry beyond the merge process (or the end) // keys_, operands_ are updated to reflect the merge result. @@ -519,14 +383,14 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, s = TimedFullMerge(user_merge_operator_, ikey.user_key, kNoBaseValue, merge_context_.GetOperands(), logger_, stats_, clock_, /* update_num_ops_stats */ false, - &merge_result, /* result_operand */ nullptr, - &merge_result_type, &op_failure_scope); + &op_failure_scope, &merge_result, + /* result_operand */ nullptr, &merge_result_type); } else if (ikey.type == kTypeValue) { s = TimedFullMerge(user_merge_operator_, ikey.user_key, kPlainBaseValue, iter->value(), merge_context_.GetOperands(), logger_, stats_, clock_, /* update_num_ops_stats */ false, - &merge_result, /* result_operand */ nullptr, - &merge_result_type, &op_failure_scope); + &op_failure_scope, &merge_result, + /* result_operand */ nullptr, &merge_result_type); } else if (ikey.type == kTypeBlobIndex) { BlobIndex blob_index; @@ -559,20 +423,20 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, s = TimedFullMerge(user_merge_operator_, ikey.user_key, kPlainBaseValue, blob_value, merge_context_.GetOperands(), logger_, stats_, clock_, /* update_num_ops_stats */ false, - &merge_result, /* result_operand */ nullptr, - &merge_result_type, &op_failure_scope); + &op_failure_scope, &merge_result, + /* result_operand */ nullptr, &merge_result_type); } else if (ikey.type == kTypeWideColumnEntity) { s = TimedFullMerge(user_merge_operator_, ikey.user_key, kWideBaseValue, iter->value(), merge_context_.GetOperands(), logger_, stats_, clock_, /* update_num_ops_stats */ false, - &merge_result, /* result_operand */ nullptr, - &merge_result_type, &op_failure_scope); + &op_failure_scope, &merge_result, + /* result_operand */ nullptr, &merge_result_type); } else { s = TimedFullMerge(user_merge_operator_, ikey.user_key, kNoBaseValue, merge_context_.GetOperands(), logger_, stats_, clock_, /* update_num_ops_stats */ false, - &merge_result, /* result_operand */ nullptr, - &merge_result_type, &op_failure_scope); + &op_failure_scope, &merge_result, + /* result_operand */ nullptr, &merge_result_type); } // We store the result in keys_.back() and operands_.back() @@ -714,9 +578,9 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, MergeOperator::OpFailureScope op_failure_scope; s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, kNoBaseValue, merge_context_.GetOperands(), logger_, stats_, clock_, - /* update_num_ops_stats */ false, &merge_result, - /* result_operand */ nullptr, &merge_result_type, - &op_failure_scope); + /* update_num_ops_stats */ false, &op_failure_scope, + &merge_result, + /* result_operand */ nullptr, &merge_result_type); if (s.ok()) { // The original key encountered // We are certain that keys_ is not empty here (see assertions couple of diff --git a/db/merge_helper.h b/db/merge_helper.h index 84c5f3535..39bd15f60 100644 --- a/db/merge_helper.h +++ b/db/merge_helper.h @@ -12,6 +12,7 @@ #include "db/merge_context.h" #include "db/range_del_aggregator.h" #include "db/snapshot_checker.h" +#include "db/wide/wide_column_serialization.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" #include "rocksdb/merge_operator.h" @@ -60,74 +61,73 @@ class MergeHelper { struct WideBaseValueTag {}; static constexpr WideBaseValueTag kWideBaseValue{}; - // Variants that expose the merge result directly (in serialized form for wide - // columns) as well as its value type. Used by iterator and compaction. + template static Status TimedFullMerge(const MergeOperator* merge_operator, const Slice& key, NoBaseValueTag, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result, Slice* result_operand, - ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope); + MergeOperator::OpFailureScope* op_failure_scope, + ResultTs... results) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; + + return TimedFullMergeImpl( + merge_operator, key, std::move(existing_value), operands, logger, + statistics, clock, update_num_ops_stats, op_failure_scope, results...); + } + template static Status TimedFullMerge( const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, const Slice& value, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result, Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope); + MergeOperator::OpFailureScope* op_failure_scope, ResultTs... results) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(value); + + return TimedFullMergeImpl( + merge_operator, key, std::move(existing_value), operands, logger, + statistics, clock, update_num_ops_stats, op_failure_scope, results...); + } + template static Status TimedFullMerge( const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, const Slice& entity, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result, Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope); + MergeOperator::OpFailureScope* op_failure_scope, ResultTs... results) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value; - static Status TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, - const WideColumns& columns, const std::vector& operands, - Logger* logger, Statistics* statistics, SystemClock* clock, - bool update_num_ops_stats, std::string* result, Slice* result_operand, - ValueType* result_type, MergeOperator::OpFailureScope* op_failure_scope); + Slice entity_copy(entity); + WideColumns existing_columns; - // Variants that expose the merge result translated to the form requested by - // the client. (For example, if the result is a wide-column structure but the - // client requested the results in plain-value form, the value of the default - // column is returned.) Used by point lookups. - static Status TimedFullMerge(const MergeOperator* merge_operator, - const Slice& key, NoBaseValueTag, - const std::vector& operands, - Logger* logger, Statistics* statistics, - SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, - PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope); + const Status s = + WideColumnSerialization::Deserialize(entity_copy, existing_columns); + if (!s.ok()) { + return s; + } - static Status TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, PlainBaseValueTag, - const Slice& value, const std::vector& operands, Logger* logger, - Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope); + existing_value = std::move(existing_columns); - static Status TimedFullMerge( - const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, - const Slice& entity, const std::vector& operands, Logger* logger, - Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope); + return TimedFullMergeImpl( + merge_operator, key, std::move(existing_value), operands, logger, + statistics, clock, update_num_ops_stats, op_failure_scope, results...); + } + template static Status TimedFullMerge(const MergeOperator* merge_operator, const Slice& key, WideBaseValueTag, const WideColumns& columns, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, - PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope); + MergeOperator::OpFailureScope* op_failure_scope, + ResultTs... results) { + MergeOperator::MergeOperationInputV3::ExistingValue existing_value(columns); + + return TimedFullMergeImpl( + merge_operator, key, std::move(existing_value), operands, logger, + statistics, clock, update_num_ops_stats, op_failure_scope, results...); + } // During compaction, merge entries until we hit // - a corrupted key @@ -271,21 +271,27 @@ class MergeHelper { Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, MergeOperator::OpFailureScope* op_failure_scope, Visitor&& visitor); + // Variant that exposes the merge result directly (in serialized form for wide + // columns) as well as its value type. Used by iterator and compaction. static Status TimedFullMergeImpl( const MergeOperator* merge_operator, const Slice& key, MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result, Slice* result_operand, ValueType* result_type, - MergeOperator::OpFailureScope* op_failure_scope); + MergeOperator::OpFailureScope* op_failure_scope, std::string* result, + Slice* result_operand, ValueType* result_type); + // Variant that exposes the merge result translated into the form requested by + // the client. (For example, if the result is a wide-column structure but the + // client requested the results in plain-value form, the value of the default + // column is returned.) Used by point lookups. static Status TimedFullMergeImpl( const MergeOperator* merge_operator, const Slice& key, MergeOperator::MergeOperationInputV3::ExistingValue&& existing_value, const std::vector& operands, Logger* logger, Statistics* statistics, SystemClock* clock, bool update_num_ops_stats, - std::string* result_value, PinnableWideColumns* result_entity, - MergeOperator::OpFailureScope* op_failure_scope); + MergeOperator::OpFailureScope* op_failure_scope, + std::string* result_value, PinnableWideColumns* result_entity); }; // MergeOutputIterator can be used to iterate over the result of a merge. diff --git a/db/version_set.cc b/db/version_set.cc index 1d339d129..926c768ca 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2536,8 +2536,8 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k, *status = MergeHelper::TimedFullMerge( merge_operator_, user_key, MergeHelper::kNoBaseValue, merge_context->GetOperands(), info_log_, db_statistics_, clock_, - /* update_num_ops_stats */ true, value ? value->GetSelf() : nullptr, - columns, /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + value ? value->GetSelf() : nullptr, columns); if (status->ok()) { if (LIKELY(value != nullptr)) { value->PinSelf(); @@ -2782,9 +2782,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range, *status = MergeHelper::TimedFullMerge( merge_operator_, user_key, MergeHelper::kNoBaseValue, iter->merge_context.GetOperands(), info_log_, db_statistics_, clock_, - /* update_num_ops_stats */ true, - iter->value ? iter->value->GetSelf() : nullptr, iter->columns, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + iter->value ? iter->value->GetSelf() : nullptr, iter->columns); if (LIKELY(iter->value != nullptr)) { iter->value->PinSelf(); range->AddValueSize(iter->value->size()); diff --git a/db/write_batch.cc b/db/write_batch.cc index e3c2c5635..75f6e1eb4 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -2545,9 +2545,8 @@ class MemTableInserter : public WriteBatch::Handler { WideColumnsHelper::GetDefaultColumn(columns), {value}, moptions->info_log, moptions->statistics, SystemClock::Default().get(), - /* update_num_ops_stats */ false, &new_value, - /* result_operand */ nullptr, &new_value_type, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ false, /* op_failure_scope */ nullptr, + &new_value, /* result_operand */ nullptr, &new_value_type); } else { // `op_failure_scope` (an output parameter) is not provided (set to // nullptr) since a failure must be propagated regardless of its @@ -2556,9 +2555,8 @@ class MemTableInserter : public WriteBatch::Handler { merge_operator, key, MergeHelper::kWideBaseValue, columns, {value}, moptions->info_log, moptions->statistics, SystemClock::Default().get(), - /* update_num_ops_stats */ false, &new_value, - /* result_operand */ nullptr, &new_value_type, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ false, /* op_failure_scope */ nullptr, + &new_value, /* result_operand */ nullptr, &new_value_type); } if (!merge_status.ok()) { diff --git a/table/get_context.cc b/table/get_context.cc index 23b1abd64..7dafbd7d4 100644 --- a/table/get_context.cc +++ b/table/get_context.cc @@ -232,7 +232,6 @@ bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, return true; // to continue to the next seq } - if (seq_ != nullptr) { // Set the sequence number if it is uninitialized if (*seq_ == kMaxSequenceNumber) { @@ -496,9 +495,8 @@ void GetContext::MergeWithNoBaseValue() { const Status s = MergeHelper::TimedFullMerge( merge_operator_, user_key_, MergeHelper::kNoBaseValue, merge_context_->GetOperands(), logger_, statistics_, clock_, - /* update_num_ops_stats */ true, - pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_); PostprocessMerge(s); } @@ -512,9 +510,8 @@ void GetContext::MergeWithPlainBaseValue(const Slice& value) { const Status s = MergeHelper::TimedFullMerge( merge_operator_, user_key_, MergeHelper::kPlainBaseValue, value, merge_context_->GetOperands(), logger_, statistics_, clock_, - /* update_num_ops_stats */ true, - pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_); PostprocessMerge(s); } @@ -528,9 +525,8 @@ void GetContext::MergeWithWideColumnBaseValue(const Slice& entity) { const Status s = MergeHelper::TimedFullMerge( merge_operator_, user_key_, MergeHelper::kWideBaseValue, entity, merge_context_->GetOperands(), logger_, statistics_, clock_, - /* update_num_ops_stats */ true, - pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_, - /* op_failure_scope */ nullptr); + /* update_num_ops_stats */ true, /* op_failure_scope */ nullptr, + pinnable_val_ ? pinnable_val_->GetSelf() : nullptr, columns_); PostprocessMerge(s); } diff --git a/utilities/write_batch_with_index/write_batch_with_index_internal.h b/utilities/write_batch_with_index/write_batch_with_index_internal.h index 38904593f..e1c97a9eb 100644 --- a/utilities/write_batch_with_index/write_batch_with_index_internal.h +++ b/utilities/write_batch_with_index/write_batch_with_index_internal.h @@ -408,8 +408,8 @@ class WriteBatchWithIndexInternal { return MergeHelper::TimedFullMerge( ioptions->merge_operator.get(), key, MergeHelper::kNoBaseValue, context.GetOperands(), ioptions->logger, ioptions->stats, - ioptions->clock, /* update_num_ops_stats */ false, results..., - /* op_failure_scope */ nullptr); + ioptions->clock, /* update_num_ops_stats */ false, + /* op_failure_scope */ nullptr, results...); } template @@ -432,8 +432,8 @@ class WriteBatchWithIndexInternal { return MergeHelper::TimedFullMerge( ioptions->merge_operator.get(), key, base_tag, value, context.GetOperands(), ioptions->logger, ioptions->stats, - ioptions->clock, /* update_num_ops_stats */ false, results..., - /* op_failure_scope */ nullptr); + ioptions->clock, /* update_num_ops_stats */ false, + /* op_failure_scope */ nullptr, results...); } // If batch contains a value for key, store it in *value and return kFound. From c77b50a4fdd7ff2d21909ccc436bd3d2f3f008fd Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Wed, 6 Dec 2023 13:48:15 -0800 Subject: [PATCH 317/386] Add AsyncIO support for tuning readahead_size by block cache lookup (#11936) Summary: Add support for tuning of readahead_size by block cache lookup for async_io. **Design/ Implementation** - **BlockBasedTableIterator.cc** - `BlockCacheLookupForReadAheadSize` callback API lookups in the block cache and tries to reduce the start and end offset passed. This function looks into the block cache for the blocks between `start_offset` and `end_offset` and add all the handles in the queue. It then iterates from the end in the handles to find first miss block and update the end offset to that block. It also iterates from the start and find first miss block and update the start offset to that block. ``` _read_curr_block_ argument : True if this call was due to miss in the cache and caller wants to read that block synchronously. False if current call is to prefetch additional data in extra buffers (due to ReadAsync call in FilePrefetchBuffer) ``` In case there is no data to be read in that callback (because of upper_bound or all blocks are in cache), it updates start and end offset to be equal and that `FilePrefetchBuffer` interprets that as 0 length to be read. **FilePrefetchBuffer.cc** - FilePrefetchBuffer calls the callback - `ReadAheadSizeTuning` and pass the start and end offset to that callback to get updated start and end offset to read based on cache hits/misses. 1. In case of Read calls (when offset passed to FilePrefetchBuffer is on cache miss and that data needs to be read), _read_curr_block_ is passed true. 2. In case of ReadAsync calls, when buffer is all consumed and can go for additional prefetching, the start offset passed is the initial end offset of prev buffer (without any updated offset based on cache hit/miss). Foreg. if following are the data blocks with cache hit/miss and start offset and Read API found miss on DB1 and based on readahead_size (50) it passes end offset to be 50. [DB1 - miss- 0 ] [DB2 - hit -10] [DB3 - miss -20] [DB4 - miss-30] [DB5 - hit-40] [DB6 - hit-50] [DB7 - miss-60] [DB8 - miss - 70] [DB9 - hit - 80] [DB6 - hit 90] - For Read call - updated start offset remains 0 but end offset updates to DB4, as DB5 is in cache. - Read calls saves initial end offset 50 as that was meant to be prefetched. - Now for next ReadAsync call - the start offset will be 50 (previous buffer initial end offset) and based on readahead_size, end offset will be 100 - On callback, because of cache hits - callback will update the start offset to 60 and end offset to 80 to read only 2 data blocks (DB7 and DB8). - And for that ReadAsync call - initial end offset will be set to 100 which will again used by next ReadAsync call as start offset. - `initial_end_offset_` in `BufferInfo` is used to save the initial end offset of that buffer. - If let's say DB5 and DB6 overlaps in 2 buffers (because of alignment), `prev_buf_end_offset` is passed to make sure already prefetched data is not prefetched again in second buffer. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11936 Test Plan: - Ran crash_test several times. - New unit tests added. Reviewed By: anand1976 Differential Revision: D50906217 Pulled By: akankshamahajan15 fbshipit-source-id: 0d75d3c98274e98aa34901b201b8fb05232139cf --- file/file_prefetch_buffer.cc | 392 ++++++++------ file/file_prefetch_buffer.h | 63 ++- file/prefetch_test.cc | 4 + file/random_access_file_reader.cc | 5 +- .../block_based/block_based_table_iterator.cc | 269 +++++++--- .../block_based/block_based_table_iterator.h | 21 +- table/block_based/block_based_table_reader.cc | 11 + table/block_based/block_based_table_reader.h | 7 +- table/block_based/block_prefetcher.cc | 2 +- table/block_based/block_prefetcher.h | 2 +- table/table_test.cc | 477 +++++++++++++++++- .../async_support_tune_readahead.md | 1 + 12 files changed, 977 insertions(+), 277 deletions(-) create mode 100644 unreleased_history/new_features/async_support_tune_readahead.md diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index da4a1d0b9..d392537b9 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -82,9 +82,9 @@ void FilePrefetchBuffer::CalculateOffsetAndLen(size_t alignment, Status FilePrefetchBuffer::Read(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t read_len, uint64_t chunk_len, - uint64_t rounddown_start, uint32_t index) { + uint64_t start_offset, uint32_t index) { Slice result; - Status s = reader->Read(opts, rounddown_start + chunk_len, read_len, &result, + Status s = reader->Read(opts, start_offset + chunk_len, read_len, &result, bufs_[index].buffer_.BufferStart() + chunk_len, /*aligned_buf=*/nullptr); #ifndef NDEBUG @@ -102,15 +102,15 @@ Status FilePrefetchBuffer::Read(const IOOptions& opts, RecordTick(stats_, PREFETCH_BYTES, read_len); } // Update the buffer offset and size. - bufs_[index].offset_ = rounddown_start; + bufs_[index].offset_ = start_offset; bufs_[index].buffer_.Size(static_cast(chunk_len) + result.size()); return s; } Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts, RandomAccessFileReader* reader, - uint64_t read_len, - uint64_t rounddown_start, uint32_t index) { + uint64_t read_len, uint64_t start_offset, + uint32_t index) { TEST_SYNC_POINT("FilePrefetchBuffer::ReadAsync"); // callback for async read request. auto fp = std::bind(&FilePrefetchBuffer::PrefetchAsyncCallback, this, @@ -118,7 +118,7 @@ Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts, FSReadRequest req; Slice result; req.len = read_len; - req.offset = rounddown_start; + req.offset = start_offset; req.result = result; req.scratch = bufs_[index].buffer_.BufferStart(); bufs_[index].async_req_len_ = req.len; @@ -129,6 +129,7 @@ Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts, /*aligned_buf=*/nullptr); req.status.PermitUncheckedError(); if (s.ok()) { + RecordTick(stats_, PREFETCH_BYTES, read_len); bufs_[index].async_read_in_progress_ = true; } return s; @@ -149,19 +150,17 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts, } size_t alignment = reader->file()->GetRequiredBufferAlignment(); - size_t offset_ = static_cast(offset); - uint64_t rounddown_offset = Rounddown(offset_, alignment); - uint64_t roundup_end = Roundup(offset_ + n, alignment); - uint64_t roundup_len = roundup_end - rounddown_offset; - assert(roundup_len >= alignment); - assert(roundup_len % alignment == 0); + uint64_t rounddown_offset = offset, roundup_end = 0, chunk_len = 0; + size_t read_len = 0; - uint64_t chunk_len = 0; - CalculateOffsetAndLen(alignment, offset, roundup_len, curr_, - true /*refit_tail*/, chunk_len); - size_t read_len = static_cast(roundup_len - chunk_len); + ReadAheadSizeTuning(/*read_curr_block=*/true, /*refit_tail=*/true, + rounddown_offset, curr_, alignment, 0, n, + rounddown_offset, roundup_end, read_len, chunk_len); - Status s = Read(opts, reader, read_len, chunk_len, rounddown_offset, curr_); + Status s; + if (read_len > 0) { + s = Read(opts, reader, read_len, chunk_len, rounddown_offset, curr_); + } if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && s.ok()) { RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len); @@ -196,7 +195,7 @@ void FilePrefetchBuffer::CopyDataToBuffer(uint32_t src, uint64_t& offset, // length > 0 indicates it has consumed all data from the src buffer and it // still needs to read more other buffer. if (length > 0) { - bufs_[src].buffer_.Clear(); + bufs_[src].ClearBuffer(); } } @@ -266,28 +265,35 @@ void FilePrefetchBuffer::AbortAllIOs() { // Clear the buffers if it contains outdated data. Outdated data can be // because previous sequential reads were read from the cache instead of these // buffer. -void FilePrefetchBuffer::UpdateBuffersIfNeeded(uint64_t offset) { +void FilePrefetchBuffer::UpdateBuffersIfNeeded(uint64_t offset, size_t length) { uint32_t second = curr_ ^ 1; + if (IsBufferOutdated(offset, curr_)) { - bufs_[curr_].buffer_.Clear(); + bufs_[curr_].ClearBuffer(); } if (IsBufferOutdated(offset, second)) { - bufs_[second].buffer_.Clear(); + bufs_[second].ClearBuffer(); } { - // In case buffers do not align, reset second buffer. This can happen in - // case readahead_size is set. + // In case buffers do not align, reset second buffer if requested data needs + // to be read in second buffer. if (!bufs_[second].async_read_in_progress_ && !bufs_[curr_].async_read_in_progress_) { if (DoesBufferContainData(curr_)) { if (bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize() != bufs_[second].offset_) { - bufs_[second].buffer_.Clear(); + if (DoesBufferContainData(second) && + IsOffsetInBuffer(offset, curr_) && + (offset + length > + bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize())) { + bufs_[second].ClearBuffer(); + } } } else { - if (!IsOffsetInBuffer(offset, second)) { - bufs_[second].buffer_.Clear(); + if (DoesBufferContainData(second) && + !IsOffsetInBuffer(offset, second)) { + bufs_[second].ClearBuffer(); } } } @@ -308,7 +314,8 @@ void FilePrefetchBuffer::UpdateBuffersIfNeeded(uint64_t offset) { } } -void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) { +void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset, + size_t length) { if (bufs_[curr_].async_read_in_progress_ && fs_ != nullptr) { if (bufs_[curr_].io_handle_ != nullptr) { // Wait for prefetch data to complete. @@ -324,7 +331,79 @@ void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) { // completed. DestroyAndClearIOHandle(curr_); } - UpdateBuffersIfNeeded(offset); + UpdateBuffersIfNeeded(offset, length); +} + +// ReadAheadSizeTuning API calls readaheadsize_cb_ +// (BlockBasedTableIterator::BlockCacheLookupForReadAheadSize) to lookup in the +// cache and tune the start and end offsets based on cache hits/misses. +// +// Arguments - +// read_curr_block : True if this call was due to miss in the cache and +// FilePrefetchBuffer wants to read that block +// synchronously. +// False if current call is to prefetch additional data in +// extra buffers through ReadAsync API. +// prev_buf_end_offset : End offset of the previous buffer. It's used in case +// of ReadAsync to make sure it doesn't read anything from +// previous buffer which is already prefetched. +void FilePrefetchBuffer::ReadAheadSizeTuning( + bool read_curr_block, bool refit_tail, uint64_t prev_buf_end_offset, + uint32_t index, size_t alignment, size_t length, size_t readahead_size, + uint64_t& start_offset, uint64_t& end_offset, size_t& read_len, + uint64_t& chunk_len) { + uint64_t updated_start_offset = Rounddown(start_offset, alignment); + uint64_t updated_end_offset = + Roundup(start_offset + length + readahead_size, alignment); + uint64_t initial_end_offset = updated_end_offset; + + // Callback to tune the start and end offsets. + if (readaheadsize_cb_ != nullptr && readahead_size > 0) { + readaheadsize_cb_(read_curr_block, updated_start_offset, + updated_end_offset); + } + + // read_len will be 0 and there is nothing to read/prefetch. + if (updated_start_offset == updated_end_offset) { + return; + } + + assert(updated_start_offset < updated_end_offset); + + if (!read_curr_block) { + // Handle the case when callback added block handles which are already + // prefetched and nothing new needs to be prefetched. In that case end + // offset updated by callback will be less than prev_buf_end_offset which + // means data has been already prefetched. + if (updated_end_offset <= prev_buf_end_offset) { + start_offset = end_offset = prev_buf_end_offset; + return; + } + } + + // Realign if start and end offsets are not aligned after tuning. + start_offset = Rounddown(updated_start_offset, alignment); + end_offset = Roundup(updated_end_offset, alignment); + + if (!read_curr_block && start_offset < prev_buf_end_offset) { + // Previous buffer already contains the data till prev_buf_end_offset + // because of alignment. Update the start offset after that to avoid + // prefetching it again. + start_offset = prev_buf_end_offset; + } + + uint64_t roundup_len = end_offset - start_offset; + + CalculateOffsetAndLen(alignment, start_offset, roundup_len, index, refit_tail, + chunk_len); + assert(roundup_len >= chunk_len); + + // Update the buffer offset. + bufs_[index].offset_ = start_offset; + // Update the initial end offset of this buffer which will be the starting + // offset of next prefetch. + bufs_[index].initial_end_offset_ = initial_end_offset; + read_len = static_cast(roundup_len - chunk_len); } Status FilePrefetchBuffer::HandleOverlappingData( @@ -340,7 +419,7 @@ Status FilePrefetchBuffer::HandleOverlappingData( // by Seek, but the next access is at another offset. if (bufs_[curr_].async_read_in_progress_ && IsOffsetInBufferWithAsyncProgress(offset, curr_)) { - PollAndUpdateBuffersIfNeeded(offset); + PollAndUpdateBuffersIfNeeded(offset, length); } second = curr_ ^ 1; @@ -354,7 +433,7 @@ Status FilePrefetchBuffer::HandleOverlappingData( (bufs_[second].async_read_in_progress_ || DoesBufferContainData(second)))) { // Allocate new buffer to third buffer; - bufs_[2].buffer_.Clear(); + bufs_[2].ClearBuffer(); bufs_[2].buffer_.Alignment(alignment); bufs_[2].buffer_.AllocateNewBuffer(length); bufs_[2].offset_ = offset; @@ -363,31 +442,29 @@ Status FilePrefetchBuffer::HandleOverlappingData( CopyDataToBuffer(curr_, tmp_offset, tmp_length); // Call async prefetching on curr_ since data has been consumed in curr_ - // only if data lies within second buffer. + // only if requested data lies within second buffer. size_t second_size = bufs_[second].async_read_in_progress_ ? bufs_[second].async_req_len_ : bufs_[second].buffer_.CurrentSize(); - uint64_t rounddown_start = bufs_[second].offset_ + second_size; + uint64_t start_offset = bufs_[second].initial_end_offset_; // Second buffer might be out of bound if first buffer already prefetched // that data. if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size && - !IsOffsetOutOfBound(rounddown_start)) { - uint64_t roundup_end = - Roundup(rounddown_start + readahead_size, alignment); - uint64_t roundup_len = roundup_end - rounddown_start; - uint64_t chunk_len = 0; - CalculateOffsetAndLen(alignment, rounddown_start, roundup_len, curr_, - false, chunk_len); - assert(chunk_len == 0); - assert(roundup_len >= chunk_len); - - bufs_[curr_].offset_ = rounddown_start; - uint64_t read_len = static_cast(roundup_len - chunk_len); - s = ReadAsync(opts, reader, read_len, rounddown_start, curr_); - if (!s.ok()) { - DestroyAndClearIOHandle(curr_); - bufs_[curr_].buffer_.Clear(); - return s; + !IsOffsetOutOfBound(start_offset)) { + size_t read_len = 0; + uint64_t end_offset = start_offset, chunk_len = 0; + + ReadAheadSizeTuning(/*read_curr_block=*/false, /*refit_tail=*/false, + bufs_[second].offset_ + second_size, curr_, alignment, + /*length=*/0, readahead_size, start_offset, + end_offset, read_len, chunk_len); + if (read_len > 0) { + s = ReadAsync(opts, reader, read_len, start_offset, curr_); + if (!s.ok()) { + DestroyAndClearIOHandle(curr_); + bufs_[curr_].ClearBuffer(); + return s; + } } } curr_ = curr_ ^ 1; @@ -430,13 +507,14 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, Status s; uint64_t tmp_offset = offset; size_t tmp_length = length; + size_t original_length = length; // 1. Abort IO and swap buffers if needed to point curr_ to first buffer with // data. if (!explicit_prefetch_submitted_) { AbortIOIfNeeded(offset); } - UpdateBuffersIfNeeded(offset); + UpdateBuffersIfNeeded(offset, length); // 2. Handle overlapping data over two buffers. If data is overlapping then // during this call: @@ -458,14 +536,15 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, if (!bufs_[curr_].async_read_in_progress_ && DoesBufferContainData(curr_) && IsDataBlockInBuffer(offset, length, curr_)) { // Whole data is in curr_. - UpdateBuffersIfNeeded(offset); + UpdateBuffersIfNeeded(offset, length); if (!IsSecondBuffEligibleForPrefetching()) { + UpdateStats(/*found_in_buffer=*/true, original_length); return s; } } else { // After poll request, curr_ might be empty because of IOError in // callback while reading or may contain required data. - PollAndUpdateBuffersIfNeeded(offset); + PollAndUpdateBuffersIfNeeded(offset, length); } if (copy_to_third_buffer) { @@ -488,6 +567,7 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, return s; } if (!IsSecondBuffEligibleForPrefetching()) { + UpdateStats(/*found_in_buffer=*/true, original_length); return s; } } @@ -509,7 +589,7 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, } } DestroyAndClearIOHandle(second); - bufs_[second].buffer_.Clear(); + bufs_[second].ClearBuffer(); } // 5. Data is overlapping i.e. some of the data has been copied to third @@ -524,73 +604,61 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, // and sync prefetching and copy the remaining data to third buffer in the // end. if (length == 0) { + UpdateStats(/*found_in_buffer=*/true, original_length); return s; } } // 6. Go for ReadAsync and Read (if needed). - size_t prefetch_size = length + readahead_size; - size_t _offset = static_cast(offset); - - // offset and size alignment for curr_ buffer with synchronous prefetching - uint64_t rounddown_start1 = Rounddown(_offset, alignment); - uint64_t roundup_end1 = Roundup(_offset + prefetch_size, alignment); - uint64_t roundup_len1 = roundup_end1 - rounddown_start1; - assert(roundup_len1 >= alignment); - assert(roundup_len1 % alignment == 0); - uint64_t chunk_len1 = 0; - uint64_t read_len1 = 0; - assert(!bufs_[second].async_read_in_progress_ && !DoesBufferContainData(second)); + // offset and size alignment for curr_ buffer with synchronous prefetching + uint64_t start_offset1 = offset, end_offset1 = 0, chunk_len1 = 0; + size_t read_len1 = 0; + // For length == 0, skip the synchronous prefetching. read_len1 will be 0. if (length > 0) { - CalculateOffsetAndLen(alignment, offset, roundup_len1, curr_, - false /*refit_tail*/, chunk_len1); - assert(roundup_len1 >= chunk_len1); - read_len1 = static_cast(roundup_len1 - chunk_len1); + ReadAheadSizeTuning(/*read_curr_block=*/true, /*refit_tail=*/false, + start_offset1, curr_, alignment, length, readahead_size, + start_offset1, end_offset1, read_len1, chunk_len1); + UpdateStats(/*found_in_buffer=*/false, + /*length_found=*/original_length - length); + } else { + end_offset1 = bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize(); + UpdateStats(/*found_in_buffer=*/true, original_length); } - // Prefetch in second buffer only if readahead_size_ > 0. - if (readahead_size_ > 0) { + // Prefetch in second buffer only if readahead_size > 0. + if (readahead_size > 0) { // offset and size alignment for second buffer for asynchronous - // prefetching - uint64_t rounddown_start2 = roundup_end1; - uint64_t roundup_end2 = - Roundup(rounddown_start2 + readahead_size, alignment); - - // For length == 0, do the asynchronous prefetching in second instead of - // synchronous prefetching in curr_. - if (length == 0) { - rounddown_start2 = - bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize(); - roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment); - } + // prefetching. + uint64_t start_offset2 = bufs_[curr_].initial_end_offset_; // Second buffer might be out of bound if first buffer already prefetched // that data. - if (!IsOffsetOutOfBound(rounddown_start2)) { - uint64_t roundup_len2 = roundup_end2 - rounddown_start2; - uint64_t chunk_len2 = 0; - CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, - false /*refit_tail*/, chunk_len2); - assert(chunk_len2 == 0); - // Update the buffer offset. - bufs_[second].offset_ = rounddown_start2; - assert(roundup_len2 >= chunk_len2); - uint64_t read_len2 = static_cast(roundup_len2 - chunk_len2); - s = ReadAsync(opts, reader, read_len2, rounddown_start2, second); - if (!s.ok()) { - DestroyAndClearIOHandle(second); - bufs_[second].buffer_.Clear(); - return s; + if (!IsOffsetOutOfBound(start_offset2)) { + // Find updated readahead size after tuning + size_t read_len2 = 0; + uint64_t end_offset2 = start_offset2, chunk_len2 = 0; + ReadAheadSizeTuning(/*read_curr_block=*/false, /*refit_tail=*/false, + /*prev_buf_end_offset=*/end_offset1, second, + alignment, + /*length=*/0, readahead_size, start_offset2, + end_offset2, read_len2, chunk_len2); + if (read_len2 > 0) { + s = ReadAsync(opts, reader, read_len2, start_offset2, second); + if (!s.ok()) { + DestroyAndClearIOHandle(second); + bufs_[second].ClearBuffer(); + return s; + } } } } if (read_len1 > 0) { - s = Read(opts, reader, read_len1, chunk_len1, rounddown_start1, curr_); + s = Read(opts, reader, read_len1, chunk_len1, start_offset1, curr_); if (!s.ok()) { if (bufs_[second].io_handle_ != nullptr) { std::vector handles; @@ -602,11 +670,12 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, } } DestroyAndClearIOHandle(second); - bufs_[second].buffer_.Clear(); - bufs_[curr_].buffer_.Clear(); + bufs_[second].ClearBuffer(); + bufs_[curr_].ClearBuffer(); return s; } } + // Copy remaining requested bytes to third_buffer. if (copy_to_third_buffer && length > 0) { CopyDataToBuffer(curr_, offset, length); @@ -668,8 +737,8 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( return false; } } - size_t current_readahead_size = ReadAheadSizeTuning(offset, n); - s = Prefetch(opts, reader, offset, n + current_readahead_size); + UpdateReadAheadSizeForUpperBound(offset, n); + s = Prefetch(opts, reader, offset, n + readahead_size_); } if (!s.ok()) { if (status) { @@ -729,8 +798,8 @@ bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( // Random offset called. So abort the IOs. if (prev_offset_ != offset) { AbortAllIOs(); - bufs_[curr_].buffer_.Clear(); - bufs_[curr_ ^ 1].buffer_.Clear(); + bufs_[curr_].ClearBuffer(); + bufs_[curr_ ^ 1].ClearBuffer(); explicit_prefetch_submitted_ = false; return false; } @@ -788,6 +857,8 @@ bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( } else { return false; } + } else { + UpdateStats(/*found_in_buffer=*/true, n); } UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/); @@ -860,14 +931,19 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, AbortAllIOs(); // 2. Clear outdated data. - UpdateBuffersIfNeeded(offset); + UpdateBuffersIfNeeded(offset, n); uint32_t second = curr_ ^ 1; - // Since PrefetchAsync can be called on non sequential reads. So offset can - // be less than curr_ buffers' offset. In that case also it clears both - // buffers. - if (DoesBufferContainData(curr_) && !IsOffsetInBuffer(offset, curr_)) { - bufs_[curr_].buffer_.Clear(); - bufs_[second].buffer_.Clear(); + + // - Since PrefetchAsync can be called on non sequential reads. So offset can + // be less than curr_ buffers' offset. In that case it clears both + // buffers. + // - In case of tuning of readahead_size, on Reseek, we have to clear both + // buffers otherwise, we may end up with inconsistent BlockHandles in queue + // and data in buffer. + if (readaheadsize_cb_ != nullptr || + (DoesBufferContainData(curr_) && !IsOffsetInBuffer(offset, curr_))) { + bufs_[curr_].ClearBuffer(); + bufs_[second].ClearBuffer(); } UpdateReadPattern(offset, n, /*decrease_readaheadsize=*/false); @@ -879,6 +955,8 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, uint64_t offset_in_buffer = offset - bufs_[curr_].offset_; *result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n); data_found = true; + UpdateStats(/*found_in_buffer=*/true, n); + // Update num_file_reads_ as TryReadFromCacheAsync won't be called for // poll and update num_file_reads_ if data is found. num_file_reads_++; @@ -890,91 +968,79 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, } } else { // Partial data in curr_. - bufs_[curr_].buffer_.Clear(); + bufs_[curr_].ClearBuffer(); } - bufs_[second].buffer_.Clear(); + bufs_[second].ClearBuffer(); + + std::string msg; Status s; size_t alignment = reader->file()->GetRequiredBufferAlignment(); - size_t prefetch_size = is_eligible_for_prefetching ? readahead_size_ / 2 : 0; + size_t readahead_size = is_eligible_for_prefetching ? readahead_size_ / 2 : 0; size_t offset_to_read = static_cast(offset); - uint64_t rounddown_start1 = 0; - uint64_t roundup_end1 = 0; - uint64_t rounddown_start2 = 0; - uint64_t roundup_end2 = 0; - uint64_t chunk_len1 = 0; - uint64_t chunk_len2 = 0; - size_t read_len1 = 0; - size_t read_len2 = 0; + uint64_t start_offset1 = offset, end_offset1 = 0, start_offset2 = 0, + chunk_len1 = 0; + size_t read_len1 = 0, read_len2 = 0; // - If curr_ is empty. - // - Call async read for full data + prefetch_size on curr_. - // - Call async read for prefetch_size on second if eligible. + // - Call async read for full data + readahead_size on curr_. + // - Call async read for readahead_size on second if eligible. // - If curr_ is filled. - // - prefetch_size on second. + // - readahead_size on second. // Calculate length and offsets for reading. if (!DoesBufferContainData(curr_)) { uint64_t roundup_len1; - // Prefetch full data + prefetch_size in curr_. + // Prefetch full data + readahead_size in curr_. if (is_eligible_for_prefetching || reader->use_direct_io()) { - rounddown_start1 = Rounddown(offset_to_read, alignment); - roundup_end1 = Roundup(offset_to_read + n + prefetch_size, alignment); - roundup_len1 = roundup_end1 - rounddown_start1; - assert(roundup_len1 >= alignment); - assert(roundup_len1 % alignment == 0); + ReadAheadSizeTuning(/*read_curr_block=*/true, /*refit_tail=*/false, + /*prev_buf_end_offset=*/start_offset1, curr_, + alignment, n, readahead_size, start_offset1, + end_offset1, read_len1, chunk_len1); } else { - rounddown_start1 = offset_to_read; - roundup_end1 = offset_to_read + n; - roundup_len1 = roundup_end1 - rounddown_start1; + // No alignment or extra prefetching. + start_offset1 = offset_to_read; + end_offset1 = offset_to_read + n; + roundup_len1 = end_offset1 - start_offset1; + CalculateOffsetAndLen(alignment, start_offset1, roundup_len1, curr_, + false, chunk_len1); + assert(chunk_len1 == 0); + assert(roundup_len1 >= chunk_len1); + read_len1 = static_cast(roundup_len1); + bufs_[curr_].offset_ = start_offset1; } - CalculateOffsetAndLen(alignment, rounddown_start1, roundup_len1, curr_, - false, chunk_len1); - assert(chunk_len1 == 0); - assert(roundup_len1 >= chunk_len1); - read_len1 = static_cast(roundup_len1); - bufs_[curr_].offset_ = rounddown_start1; } if (is_eligible_for_prefetching) { - if (DoesBufferContainData(curr_)) { - rounddown_start2 = - bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize(); - } else { - rounddown_start2 = roundup_end1; - } - + start_offset2 = bufs_[curr_].initial_end_offset_; // Second buffer might be out of bound if first buffer already prefetched // that data. - if (!IsOffsetOutOfBound(rounddown_start2)) { - roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment); - uint64_t roundup_len2 = roundup_end2 - rounddown_start2; - - CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second, - false, chunk_len2); - assert(chunk_len2 == 0); - assert(roundup_len2 >= chunk_len2); - read_len2 = static_cast(roundup_len2 - chunk_len2); - // Update the buffer offset. - bufs_[second].offset_ = rounddown_start2; + if (!IsOffsetOutOfBound(start_offset2)) { + uint64_t end_offset2 = start_offset2, chunk_len2 = 0; + ReadAheadSizeTuning(/*read_curr_block=*/false, /*refit_tail=*/false, + /*prev_buf_end_offset=*/end_offset1, second, + alignment, + /*length=*/0, readahead_size, start_offset2, + end_offset2, read_len2, chunk_len2); } } if (read_len1) { - s = ReadAsync(opts, reader, read_len1, rounddown_start1, curr_); + s = ReadAsync(opts, reader, read_len1, start_offset1, curr_); if (!s.ok()) { DestroyAndClearIOHandle(curr_); - bufs_[curr_].buffer_.Clear(); + bufs_[curr_].ClearBuffer(); return s; } explicit_prefetch_submitted_ = true; prev_len_ = 0; } + if (read_len2) { TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:ExtraPrefetching"); - s = ReadAsync(opts, reader, read_len2, rounddown_start2, second); + s = ReadAsync(opts, reader, read_len2, start_offset2, second); if (!s.ok()) { DestroyAndClearIOHandle(second); - bufs_[second].buffer_.Clear(); + bufs_[second].ClearBuffer(); return s; } readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2); diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index d71b28ab8..b0aa1f1c6 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -32,6 +32,11 @@ struct IOOptions; class RandomAccessFileReader; struct BufferInfo { + void ClearBuffer() { + buffer_.Clear(); + initial_end_offset_ = 0; + } + AlignedBuffer buffer_; uint64_t offset_ = 0; @@ -52,6 +57,18 @@ struct BufferInfo { // pos represents the index of this buffer in vector of BufferInfo. uint32_t pos_ = 0; + + // initial_end_offset is used to keep track of the end offset of the buffer + // that was originally called. It's helpful in case of autotuning of readahead + // size when callback is made to BlockBasedTableIterator. + // initial end offset of this buffer which will be the starting + // offset of next prefetch. + // + // For example - if end offset of previous buffer was 100 and because of + // readahead_size optimization, end_offset was trimmed to 60. Then for next + // prefetch call, start_offset should be intialized to 100 i.e start_offset = + // buf->initial_end_offset_. + uint64_t initial_end_offset_ = 0; }; enum class FilePrefetchBufferUsage { @@ -91,7 +108,7 @@ class FilePrefetchBuffer { uint64_t num_file_reads_for_auto_readahead = 0, uint64_t upper_bound_offset = 0, FileSystem* fs = nullptr, SystemClock* clock = nullptr, Statistics* stats = nullptr, - const std::function& cb = nullptr, + const std::function& cb = nullptr, FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) : curr_(0), readahead_size_(readahead_size), @@ -239,9 +256,6 @@ class FilePrefetchBuffer { void UpdateReadPattern(const uint64_t& offset, const size_t& len, bool decrease_readaheadsize) { if (decrease_readaheadsize) { - // Since this block was eligible for prefetch but it was found in - // cache, so check and decrease the readahead_size by 8KB (default) - // if eligible. DecreaseReadAheadIfEligible(offset, len); } prev_offset_ = offset; @@ -287,6 +301,12 @@ class FilePrefetchBuffer { readahead_size_ = initial_auto_readahead_size_; } + void TEST_GetBufferOffsetandSize(uint32_t index, uint64_t& offset, + size_t& len) { + offset = bufs_[index].offset_; + len = bufs_[index].buffer_.CurrentSize(); + } + private: // Calculates roundoff offset and length to be prefetched based on alignment // and data present in buffer_. It also allocates new buffer or refit tail if @@ -299,12 +319,12 @@ class FilePrefetchBuffer { void AbortAllIOs(); - void UpdateBuffersIfNeeded(uint64_t offset); + void UpdateBuffersIfNeeded(uint64_t offset, size_t len); // It calls Poll API if any there is any pending asynchronous request. It then // checks if data is in any buffer. It clears the outdated data and swaps the // buffers if required. - void PollAndUpdateBuffersIfNeeded(uint64_t offset); + void PollAndUpdateBuffersIfNeeded(uint64_t offset, size_t len); Status PrefetchAsyncInternal(const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, @@ -312,11 +332,11 @@ class FilePrefetchBuffer { bool& copy_to_third_buffer); Status Read(const IOOptions& opts, RandomAccessFileReader* reader, - uint64_t read_len, uint64_t chunk_len, uint64_t rounddown_start, + uint64_t read_len, uint64_t chunk_len, uint64_t start_offset, uint32_t index); Status ReadAsync(const IOOptions& opts, RandomAccessFileReader* reader, - uint64_t read_len, uint64_t rounddown_start, uint32_t index); + uint64_t read_len, uint64_t start_offset, uint32_t index); // Copy the data from src to third buffer. void CopyDataToBuffer(uint32_t src, uint64_t& offset, size_t& length); @@ -402,7 +422,7 @@ class FilePrefetchBuffer { return false; } - bufs_[second].buffer_.Clear(); + bufs_[second].ClearBuffer(); return true; } @@ -451,19 +471,20 @@ class FilePrefetchBuffer { return false; } - // Performs tuning to calculate readahead_size. - size_t ReadAheadSizeTuning(uint64_t offset, size_t n) { - UpdateReadAheadSizeForUpperBound(offset, n); + void ReadAheadSizeTuning(bool read_curr_block, bool refit_tail, + uint64_t prev_buf_end_offset, uint32_t index, + size_t alignment, size_t length, + size_t readahead_size, uint64_t& offset, + uint64_t& end_offset, size_t& read_len, + uint64_t& chunk_len); - if (readaheadsize_cb_ != nullptr && readahead_size_ > 0) { - size_t updated_readahead_size = 0; - readaheadsize_cb_(offset, readahead_size_, updated_readahead_size); - if (readahead_size_ != updated_readahead_size) { - RecordTick(stats_, READAHEAD_TRIMMED); - } - return updated_readahead_size; + void UpdateStats(bool found_in_buffer, size_t length_found) { + if (found_in_buffer) { + RecordTick(stats_, PREFETCH_HITS); + } + if (length_found > 0) { + RecordTick(stats_, PREFETCH_BYTES_USEFUL, length_found); } - return readahead_size_; } std::vector bufs_; @@ -512,6 +533,6 @@ class FilePrefetchBuffer { // ReadOptions.auto_readahead_size are set to trim readahead_size upto // upper_bound_offset_ during prefetching. uint64_t upper_bound_offset_ = 0; - std::function readaheadsize_cb_; + std::function readaheadsize_cb_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 84932440a..2c0e8817a 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -1325,6 +1325,10 @@ TEST_P(PrefetchTest, PrefetchWithBlockLookupAutoTuneTest) { ropts.readahead_size = cmp_ro.readahead_size = 32768; } + if (std::get<1>(GetParam())) { + ropts.async_io = true; + } + // With and without tuning readahead_size. { ASSERT_OK(options.statistics->Reset()); diff --git a/file/random_access_file_reader.cc b/file/random_access_file_reader.cc index 2025ce65b..5de9d1305 100644 --- a/file/random_access_file_reader.cc +++ b/file/random_access_file_reader.cc @@ -492,8 +492,9 @@ IOStatus RandomAccessFileReader::ReadAsync( auto read_async_callback = std::bind(&RandomAccessFileReader::ReadAsyncCallback, this, std::placeholders::_1, std::placeholders::_2); - ReadAsyncInfo* read_async_info = - new ReadAsyncInfo(cb, cb_arg, clock_->NowMicros()); + + ReadAsyncInfo* read_async_info = new ReadAsyncInfo( + cb, cb_arg, (clock_ != nullptr ? clock_->NowMicros() : 0)); if (ShouldNotifyListeners()) { read_async_info->fs_start_ts_ = FileOperationInfo::StartNow(); diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 8107e58f2..f1b95fb35 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -16,25 +16,44 @@ void BlockBasedTableIterator::Seek(const Slice& target) { SeekImpl(&target, true); } +void BlockBasedTableIterator::SeekSecondPass(const Slice* target) { + AsyncInitDataBlock(/*is_first_pass=*/false); + + if (target) { + block_iter_.Seek(*target); + } else { + block_iter_.SeekToFirst(); + } + FindKeyForward(); + + CheckOutOfBound(); + + if (target) { + assert(!Valid() || icomp_.Compare(*target, key()) <= 0); + } +} + void BlockBasedTableIterator::SeekImpl(const Slice* target, bool async_prefetch) { - ResetBlockCacheLookupVar(); bool is_first_pass = !async_read_in_progress_; + + if (!is_first_pass) { + SeekSecondPass(target); + return; + } + + ResetBlockCacheLookupVar(); + bool autotune_readaheadsize = is_first_pass && read_options_.auto_readahead_size && read_options_.iterate_upper_bound; if (autotune_readaheadsize && table_->get_rep()->table_options.block_cache.get() && - !read_options_.async_io && direction_ == IterDirection::kForward) { + direction_ == IterDirection::kForward) { readahead_cache_lookup_ = true; } - // Second pass. - if (async_read_in_progress_) { - AsyncInitDataBlock(false); - } - is_out_of_bound_ = false; is_at_first_key_from_index_ = false; seek_stat_state_ = kNone; @@ -57,7 +76,8 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, bool need_seek_index = true; // In case of readahead_cache_lookup_, index_iter_ could change to find the - // readahead size in BlockCacheLookupForReadAheadSize so it needs to reseek. + // readahead size in BlockCacheLookupForReadAheadSize so it needs to + // reseek. if (IsIndexAtCurr() && block_iter_points_to_real_block_ && block_iter_.Valid()) { // Reseek. @@ -111,7 +131,6 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, // After reseek, index_iter_ point to the right key i.e. target in // case of readahead_cache_lookup_. So index_iter_ can be used directly. - IndexValue v = index_iter_->value(); const bool same_block = block_iter_points_to_real_block_ && v.handle.offset() == prev_block_offset_; @@ -130,14 +149,12 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, // Need to use the data block. if (!same_block) { if (read_options_.async_io && async_prefetch) { - if (is_first_pass) { - AsyncInitDataBlock(is_first_pass); - } + AsyncInitDataBlock(/*is_first_pass=*/true); if (async_read_in_progress_) { // Status::TryAgain indicates asynchronous request for retrieval of // data blocks has been submitted. So it should return at this point - // and Seek should be called again to retrieve the requested block and - // execute the remaining code. + // and Seek should be called again to retrieve the requested block + // and execute the remaining code. return; } } else { @@ -288,8 +305,9 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) { void BlockBasedTableIterator::Prev() { // Return Error. if (readahead_cache_lookup_) { - block_iter_.Invalidate(Status::NotSupported( - "auto tuning of readahead_size is not supported with Prev operation.")); + block_iter_.Invalidate( + Status::NotSupported("auto tuning of readahead_size in is not " + "supported with Prev operation.")); return; } @@ -346,7 +364,7 @@ void BlockBasedTableIterator::InitDataBlock() { } else { auto* rep = table_->get_rep(); - std::function readaheadsize_cb = + std::function readaheadsize_cb = nullptr; if (readahead_cache_lookup_) { readaheadsize_cb = std::bind( @@ -389,10 +407,11 @@ void BlockBasedTableIterator::InitDataBlock() { } void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { - BlockHandle data_block_handle = index_iter_->value().handle; + BlockHandle data_block_handle; bool is_for_compaction = lookup_context_.caller == TableReaderCaller::kCompaction; if (is_first_pass) { + data_block_handle = index_iter_->value().handle; if (!block_iter_points_to_real_block_ || data_block_handle.offset() != prev_block_offset_ || // if previous attempt of reading the block missed cache, try again @@ -402,7 +421,7 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { } auto* rep = table_->get_rep(); - std::function readaheadsize_cb = + std::function readaheadsize_cb = nullptr; if (readahead_cache_lookup_) { readaheadsize_cb = std::bind( @@ -441,13 +460,30 @@ void BlockBasedTableIterator::AsyncInitDataBlock(bool is_first_pass) { } else { // Second pass will call the Poll to get the data block which has been // requested asynchronously. + bool is_in_cache = false; + + if (DoesContainBlockHandles()) { + data_block_handle = block_handles_.front().handle_; + is_in_cache = block_handles_.front().is_cache_hit_; + } else { + data_block_handle = index_iter_->value().handle; + } + Status s; - table_->NewDataBlockIterator( - read_options_, data_block_handle, &block_iter_, BlockType::kData, - /*get_context=*/nullptr, &lookup_context_, - block_prefetcher_.prefetch_buffer(), - /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, - /*use_block_cache_for_lookup=*/false); + // Initialize Data Block From CacheableEntry. + if (is_in_cache) { + block_iter_.Invalidate(Status::OK()); + table_->NewDataBlockIterator( + read_options_, (block_handles_.front().cachable_entry_).As(), + &block_iter_, s); + } else { + table_->NewDataBlockIterator( + read_options_, data_block_handle, &block_iter_, BlockType::kData, + /*get_context=*/nullptr, &lookup_context_, + block_prefetcher_.prefetch_buffer(), + /*for_compaction=*/is_for_compaction, /*async_read=*/false, s, + /*use_block_cache_for_lookup=*/false); + } } block_iter_points_to_real_block_ = true; CheckDataBlockWithinUpperBound(); @@ -672,65 +708,144 @@ void BlockBasedTableIterator::FindReadAheadSizeUpperBound() { total_bytes_till_upper_bound); } -void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( - uint64_t offset, size_t readahead_size, size_t& updated_readahead_size) { - updated_readahead_size = readahead_size; +void BlockBasedTableIterator::InitializeStartAndEndOffsets( + bool read_curr_block, bool& found_first_miss_block, + uint64_t& start_updated_offset, uint64_t& end_updated_offset, + size_t& prev_handles_size) { + prev_handles_size = block_handles_.size(); + size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); - // readahead_cache_lookup_ can be set false after Seek, if after Seek or Next - // there is SeekForPrev or any other backward operation. - if (!readahead_cache_lookup_) { - return; + // It initialize start and end offset to begin which is covered by following + // scenarios + if (read_curr_block) { + if (!DoesContainBlockHandles()) { + // Scenario 1 : read_curr_block (callback made on miss block which caller + // was reading) and it has no existing handles in queue. i.e. + // index_iter_ is pointing to block that is being read by + // caller. + // + // Add current block here as it doesn't need any lookup. + BlockHandleInfo block_handle_info; + block_handle_info.handle_ = index_iter_->value().handle; + block_handle_info.SetFirstInternalKey( + index_iter_->value().first_internal_key); + + end_updated_offset = block_handle_info.handle_.offset() + footer + + block_handle_info.handle_.size(); + block_handles_.emplace_back(std::move(block_handle_info)); + + index_iter_->Next(); + is_index_at_curr_block_ = false; + found_first_miss_block = true; + } else { + // Scenario 2 : read_curr_block (callback made on miss block which caller + // was reading) but the queue already has some handles. + // + // It can be due to reading error in second buffer in FilePrefetchBuffer. + // BlockHandles already added to the queue but there was error in fetching + // those data blocks. So in this call they need to be read again. + assert(block_handles_.front().is_cache_hit_ == false); + found_first_miss_block = true; + // Initialize prev_handles_size to 0 as all those handles need to be read + // again. + prev_handles_size = 0; + start_updated_offset = block_handles_.front().handle_.offset(); + end_updated_offset = block_handles_.back().handle_.offset() + footer + + block_handles_.back().handle_.size(); + } + } else { + // Scenario 3 : read_curr_block is false (callback made to do additional + // prefetching in buffers) and the queue already has some + // handles from first buffer. + if (DoesContainBlockHandles()) { + start_updated_offset = block_handles_.back().handle_.offset() + footer + + block_handles_.back().handle_.size(); + end_updated_offset = start_updated_offset; + } else { + // Scenario 4 : read_curr_block is false (callback made to do additional + // prefetching in buffers) but the queue has no handle + // from first buffer. + // + // It can be when Reseek is from block cache (which doesn't clear the + // buffers in FilePrefetchBuffer but clears block handles from queue) and + // reseek also lies within the buffer. So Next will get data from + // exisiting buffers untill this callback is made to prefetch additional + // data. All handles need to be added to the queue starting from + // index_iter_. + assert(index_iter_->Valid()); + start_updated_offset = index_iter_->value().handle.offset(); + end_updated_offset = start_updated_offset; + } } +} - assert(!DoesContainBlockHandles()); - assert(index_iter_->value().handle.offset() == offset); +// BlockCacheLookupForReadAheadSize API lookups in the block cache and tries to +// reduce the start and end offset passed. +// +// Implementation - +// This function looks into the block cache for the blocks between start_offset +// and end_offset and add all the handles in the queue. +// It then iterates from the end to find first miss block and update the end +// offset to that block. +// It also iterates from the start and find first miss block and update the +// start offset to that block. +// +// Arguments - +// start_offset : Offset from which the caller wants to read. +// end_offset : End offset till which the caller wants to read. +// read_curr_block : True if this call was due to miss in the cache and +// caller wants to read that block. +// False if current call is to prefetch additional data in +// extra buffers. +void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( + bool read_curr_block, uint64_t& start_offset, uint64_t& end_offset) { + uint64_t start_updated_offset = start_offset; - // Error. current offset should be equal to what's requested for prefetching. - if (index_iter_->value().handle.offset() != offset) { + // readahead_cache_lookup_ can be set false, if after Seek and Next + // there is SeekForPrev or any other backward operation. + if (!readahead_cache_lookup_) { return; } - if (IsNextBlockOutOfBound()) { - updated_readahead_size = 0; + size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); + if (read_curr_block && !DoesContainBlockHandles() && + IsNextBlockOutOfBound()) { + end_offset = index_iter_->value().handle.offset() + footer + + index_iter_->value().handle.size(); return; } - size_t current_readahead_size = 0; - size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); + uint64_t end_updated_offset = start_updated_offset; + bool found_first_miss_block = false; + size_t prev_handles_size; - // Add the current block to block_handles_. - { - BlockHandleInfo block_handle_info; - block_handle_info.handle_ = index_iter_->value().handle; - block_handle_info.SetFirstInternalKey( - index_iter_->value().first_internal_key); - block_handles_.emplace_back(std::move(block_handle_info)); - } - - // Current block is included in length. Readahead should start from next - // block. - index_iter_->Next(); - is_index_at_curr_block_ = false; + // Initialize start and end offsets based on exisiting handles in the queue + // and read_curr_block argument passed. + InitializeStartAndEndOffsets(read_curr_block, found_first_miss_block, + start_updated_offset, end_updated_offset, + prev_handles_size); - while (index_iter_->Valid()) { + while (index_iter_->Valid() && !is_index_out_of_bound_) { BlockHandle block_handle = index_iter_->value().handle; - // Adding this data block exceeds passed down readahead_size. So this data + // Adding this data block exceeds end offset. So this data // block won't be added. - if (current_readahead_size + block_handle.size() + footer > - readahead_size) { + // There can be a case where passed end offset is smaller than + // block_handle.size() + footer because of readahead_size truncated to + // upper_bound. So we prefer to read the block rather than skip it to avoid + // sync read calls in case of async_io. + if (start_updated_offset != end_updated_offset && + (end_updated_offset + block_handle.size() + footer > end_offset)) { break; } - current_readahead_size += block_handle.size(); - current_readahead_size += footer; - // For current data block, do the lookup in the cache. Lookup should pin the - // data block and add the placeholder for cache. + // data block in cache. BlockHandleInfo block_handle_info; block_handle_info.handle_ = index_iter_->value().handle; block_handle_info.SetFirstInternalKey( index_iter_->value().first_internal_key); + end_updated_offset += footer + block_handle_info.handle_.size(); Status s = table_->LookupAndPinBlocksInCache( read_options_, block_handle, @@ -743,6 +858,12 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( (block_handle_info.cachable_entry_.GetValue() || block_handle_info.cachable_entry_.GetCacheHandle()); + // If this is the first miss block, update start offset to this block. + if (!found_first_miss_block && !block_handle_info.is_cache_hit_) { + found_first_miss_block = true; + start_updated_offset = block_handle_info.handle_.offset(); + } + // Add the handle to the queue. block_handles_.emplace_back(std::move(block_handle_info)); @@ -756,16 +877,28 @@ void BlockBasedTableIterator::BlockCacheLookupForReadAheadSize( break; } index_iter_->Next(); + is_index_at_curr_block_ = false; }; - // Iterate cache hit block handles from the end till a Miss is there, to - // update the readahead_size. - for (auto it = block_handles_.rbegin(); - it != block_handles_.rend() && (*it).is_cache_hit_ == true; ++it) { - current_readahead_size -= (*it).handle_.size(); - current_readahead_size -= footer; + if (found_first_miss_block) { + // Iterate cache hit block handles from the end till a Miss is there, to + // truncate and update the end offset till that Miss. + auto it = block_handles_.rbegin(); + auto it_end = + block_handles_.rbegin() + (block_handles_.size() - prev_handles_size); + + while (it != it_end && (*it).is_cache_hit_) { + it++; + } + end_updated_offset = (*it).handle_.offset() + footer + (*it).handle_.size(); + } else { + // Nothing to read. Can be because of IOError in index_iter_->Next() or + // reached upper_bound. + end_updated_offset = start_updated_offset; } - updated_readahead_size = current_readahead_size; + + end_offset = end_updated_offset; + start_offset = start_updated_offset; ResetPreviousBlockOffset(); } diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index 7ed7e3375..77fe6aa6d 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -199,6 +199,10 @@ class BlockBasedTableIterator : public InternalIteratorBase { } } + FilePrefetchBuffer* prefetch_buffer() { + return block_prefetcher_.prefetch_buffer(); + } + std::unique_ptr> index_iter_; private: @@ -325,6 +329,8 @@ class BlockBasedTableIterator : public InternalIteratorBase { // is used to disable the lookup. IterDirection direction_ = IterDirection::kForward; + void SeekSecondPass(const Slice* target); + // If `target` is null, seek to first. void SeekImpl(const Slice* target, bool async_prefetch); @@ -367,10 +373,11 @@ class BlockBasedTableIterator : public InternalIteratorBase { // *** BEGIN APIs relevant to auto tuning of readahead_size *** void FindReadAheadSizeUpperBound(); - // This API is called to lookup the data blocks ahead in the cache to estimate - // the current readahead_size. - void BlockCacheLookupForReadAheadSize(uint64_t offset, size_t readahead_size, - size_t& updated_readahead_size); + // This API is called to lookup the data blocks ahead in the cache to tune + // the start and end offsets passed. + void BlockCacheLookupForReadAheadSize(bool read_curr_block, + uint64_t& start_offset, + uint64_t& end_offset); void ResetBlockCacheLookupVar() { is_index_out_of_bound_ = false; @@ -399,6 +406,12 @@ class BlockBasedTableIterator : public InternalIteratorBase { bool DoesContainBlockHandles() { return !block_handles_.empty(); } + void InitializeStartAndEndOffsets(bool read_curr_block, + bool& found_first_miss_block, + uint64_t& start_updated_offset, + uint64_t& end_updated_offset, + size_t& prev_handles_size); + // *** END APIs relevant to auto tuning of readahead_size *** }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 7e605726d..385622461 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2651,6 +2651,17 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, return TEST_BlockInCache(iiter->value().handle); } +void BlockBasedTable::TEST_GetDataBlockHandle(const ReadOptions& options, + const Slice& key, + BlockHandle& handle) { + std::unique_ptr> iiter(NewIndexIterator( + options, /*disable_prefix_seek=*/false, /*input_iter=*/nullptr, + /*get_context=*/nullptr, /*lookup_context=*/nullptr)); + iiter->Seek(key); + assert(iiter->Valid()); + handle = iiter->value().handle; +} + // REQUIRES: The following fields of rep_ should have already been populated: // 1. file // 2. index_handle, diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 22361b505..43ea1602d 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -188,6 +188,9 @@ class BlockBasedTable : public TableReader { // REQUIRES: key is in this table && block cache enabled bool TEST_KeyInCache(const ReadOptions& options, const Slice& key); + void TEST_GetDataBlockHandle(const ReadOptions& options, const Slice& key, + BlockHandle& handle); + // Set up the table for Compaction. Might change some parameters with // posix_fadvise void SetupForCompaction() override; @@ -697,7 +700,7 @@ struct BlockBasedTable::Rep { std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, uint64_t upper_bound_offset, - const std::function& readaheadsize_cb, + const std::function& readaheadsize_cb, FilePrefetchBufferUsage usage) const { fpb->reset(new FilePrefetchBuffer( readahead_size, max_readahead_size, @@ -713,7 +716,7 @@ struct BlockBasedTable::Rep { std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, uint64_t upper_bound_offset, - const std::function& readaheadsize_cb, + const std::function& readaheadsize_cb, FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) const { if (!(*fpb)) { CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb, diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 54848b785..4e750d799 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -16,7 +16,7 @@ void BlockPrefetcher::PrefetchIfNeeded( const BlockBasedTable::Rep* rep, const BlockHandle& handle, const size_t readahead_size, bool is_for_compaction, const bool no_sequential_checking, const ReadOptions& read_options, - const std::function& readaheadsize_cb) { + const std::function& readaheadsize_cb) { const size_t len = BlockBasedTable::BlockSizeWithTrailer(handle); const size_t offset = handle.offset(); if (is_for_compaction) { diff --git a/table/block_based/block_prefetcher.h b/table/block_based/block_prefetcher.h index 7e075c08e..af0a63018 100644 --- a/table/block_based/block_prefetcher.h +++ b/table/block_based/block_prefetcher.h @@ -22,7 +22,7 @@ class BlockPrefetcher { const BlockBasedTable::Rep* rep, const BlockHandle& handle, size_t readahead_size, bool is_for_compaction, const bool no_sequential_checking, const ReadOptions& read_options, - const std::function& readaheadsize_cb); + const std::function& readaheadsize_cb); FilePrefetchBuffer* prefetch_buffer() { return prefetch_buffer_.get(); } void UpdateReadPattern(const uint64_t& offset, const size_t& len) { diff --git a/table/table_test.cc b/table/table_test.cc index c3981289e..2904792c2 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -52,6 +52,7 @@ #include "table/block_based/block.h" #include "table/block_based/block_based_table_builder.h" #include "table/block_based/block_based_table_factory.h" +#include "table/block_based/block_based_table_iterator.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "table/block_based/filter_policy_internal.h" @@ -469,6 +470,7 @@ class TableConstructor : public Constructor { } BlockCacheTracer block_cache_tracer_; + Env* env_; private: void Reset() { @@ -491,7 +493,6 @@ class TableConstructor : public Constructor { static uint64_t cur_file_num_; EnvOptions soptions; - Env* env_; }; uint64_t TableConstructor::cur_file_num_ = 1; @@ -1136,9 +1137,7 @@ class BlockBasedTableTest : public BlockBasedTableTestBase, virtual public ::testing::WithParamInterface { public: - BlockBasedTableTest() : format_(GetParam()) { - env_ = ROCKSDB_NAMESPACE::Env::Default(); - } + BlockBasedTableTest() : format_(GetParam()) { env_ = Env::Default(); } BlockBasedTableOptions GetBlockBasedTableOptions() { BlockBasedTableOptions options; @@ -3151,6 +3150,453 @@ TEST_P(BlockBasedTableTest, TracingGetTest) { c.ResetTableReader(); } +void GenerateKVMap(TableConstructor* c) { + int num_block = 100; + Random rnd(101); + uint32_t key = 0; + for (int block = 0; block < num_block; block++) { + for (int i = 0; i < 16; i++) { + char k[9] = {0}; + // Internal key is constructed directly from this key, + // and internal key size is required to be >= 8 bytes, + // so use %08u as the format string. + snprintf(k, sizeof(k), "%08u", key); + std::string v = rnd.RandomString(256); + InternalKey ikey(std::string(k), 0, kTypeValue); + c->Add(ikey.Encode().ToString(), rnd.RandomString(256)); + key++; + } + } +} + +void WarmUpCache(TableConstructor* c, const MutableCFOptions& moptions, + const std::vector& warm_keys) { + ReadOptions ro; + std::unique_ptr iter(c->GetTableReader()->NewIterator( + ro, moptions.prefix_extractor.get(), nullptr, false, + TableReaderCaller::kUncategorized)); + size_t i = 0; + while (i < warm_keys.size()) { + InternalKey ikey(warm_keys[i], 0, kTypeValue); + iter->Seek(ikey.Encode().ToString()); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + i++; + } +} + +TEST_P(BlockBasedTableTest, BlockCacheLookupSeqScans) { + Options options; + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.create_if_missing = true; + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + table_options.block_cache = NewLRUCache(1024 * 1024, 0); + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + table_options.block_align = true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + TableConstructor c(BytewiseComparator()); + GenerateKVMap(&c); + + std::vector keys; + stl_wrappers::KVMap kvmap; + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + BlockBasedTable* bbt = reinterpret_cast(c.GetTableReader()); + BlockHandle block_handle; + + ReadOptions read_options; + read_options.auto_readahead_size = true; + Slice ub = Slice("00000805"); + Slice* ub_ptr = &ub; + read_options.iterate_upper_bound = ub_ptr; + read_options.readahead_size = 16384; + uint64_t buffer_offset; + size_t buffer_len; + + // Test various functionalities - + // 5 blocks prefetched - Current + 4 additional (readahead_size). + { + // Check the behavior when it's - + // Miss(200), Hit(210), Hit(225), Hit(240), Hit(255). + // It should only prefetch current block (200). + { + std::vector warm_keys{"00000210", "00000225", "00000240", + "00000255"}; + WarmUpCache(&c, moptions, warm_keys); + + std::unique_ptr iter(c.GetTableReader()->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + // Seek key - + InternalKey ikey("00000200", 0, kTypeValue); + auto kv_iter = kvmap.find(ikey.Encode().ToString()); + + iter->Seek(kv_iter->first); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + FilePrefetchBuffer* prefetch_buffer = + (reinterpret_cast(iter.get())) + ->prefetch_buffer(); + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, block_handle); + // It won't prefetch the data of cache hit. + // One block data. + ASSERT_EQ(buffer_len, 4096); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + + { + // Check the behavior when it's - + // First Prefetch - Miss(315), Miss(330), Miss(345), Hit(360), Hit(375), + // Second Prefetch - Miss(390), Miss(405) ... + // First prefetch should only prefetch from 315 to 345. + std::vector warm_keys{"00000360", "00000375"}; + WarmUpCache(&c, moptions, warm_keys); + + std::unique_ptr iter(c.GetTableReader()->NewIterator( + read_options, moptions.prefix_extractor.get(), nullptr, false, + TableReaderCaller::kUncategorized)); + + // Seek key - + InternalKey ikey("00000315", 0, kTypeValue); + auto kv_iter = kvmap.find(ikey.Encode().ToString()); + + iter->Seek(kv_iter->first); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + FilePrefetchBuffer* prefetch_buffer = + (reinterpret_cast(iter.get())) + ->prefetch_buffer(); + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, block_handle); + + // It won't prefetch the data of cache hit. + // 3 blocks data. + ASSERT_EQ(buffer_len, 12288); + ASSERT_EQ(buffer_offset, block_handle.offset()); + + for (; kv_iter != kvmap.end() && iter->Valid(); kv_iter++) { + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + iter->Next(); + ASSERT_OK(iter->status()); + + if (iter->user_key().ToString() == "00000400") { + break; + } + } + + // Second Prefetch. + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, block_handle); + ASSERT_EQ(buffer_offset, 106496); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + } + c.ResetTableReader(); +} + +TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { + Options options; + TableConstructor c(BytewiseComparator()); + std::unique_ptr env( + new CompositeEnvWrapper(c.env_, FileSystem::Default())); + options.env = env.get(); + c.env_ = env.get(); + + BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); + options.create_if_missing = true; + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + table_options.block_cache = NewLRUCache(1024 * 1024, 0); + table_options.cache_index_and_filter_blocks = true; + table_options.filter_policy.reset(NewBloomFilterPolicy(10, true)); + table_options.block_align = true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + + GenerateKVMap(&c); + + std::vector keys; + stl_wrappers::KVMap kvmap; + ImmutableOptions ioptions(options); + MutableCFOptions moptions(options); + const InternalKeyComparator internal_comparator(options.comparator); + + c.Finish(options, ioptions, moptions, table_options, internal_comparator, + &keys, &kvmap); + + BlockBasedTable* bbt = reinterpret_cast(c.GetTableReader()); + BlockHandle block_handle; + + ReadOptions read_options; + read_options.auto_readahead_size = true; + Slice ub = Slice("00000805"); + Slice* ub_ptr = &ub; + read_options.iterate_upper_bound = ub_ptr; + read_options.readahead_size = 16384; + read_options.async_io = true; + uint64_t buffer_offset; + size_t buffer_len; + + // Test Various functionalities - + // 3 blocks prefetched - Current + 2 additional (readahead_size/2). + { + // Check the behavior when it's - + // 1st Prefetch - Miss(200), Hit(210), Hit(225), + // 2nd Prefetch - Hit(240), Hit(255) + // First Prefetch will be for 200 offset. + // Second prefetch will be 0. + { + std::vector warm_keys{"00000210", "00000225", "00000240", + "00000255"}; + WarmUpCache(&c, moptions, warm_keys); + + std::unique_ptr iter(c.GetTableReader()->NewIterator( + read_options, moptions.prefix_extractor.get(), nullptr, false, + TableReaderCaller::kUncategorized)); + + // Seek key - + InternalKey ikey("00000200", 0, kTypeValue); + auto kv_iter = kvmap.find(ikey.Encode().ToString()); + + iter->Seek(kv_iter->first); + ASSERT_TRUE(iter->status().IsTryAgain()); + iter->Seek(kv_iter->first); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + FilePrefetchBuffer* prefetch_buffer = + (reinterpret_cast(iter.get())) + ->prefetch_buffer(); + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, block_handle); + ASSERT_EQ(buffer_len, 4096); + ASSERT_EQ(buffer_offset, block_handle.offset()); + prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, + buffer_len); + ASSERT_EQ(buffer_len, 0); + } + { + // Check the behavior when it's - + // First Prefetch - Miss(315), Miss(330), Hit(345), + // Second Prefetch - Miss(360), Miss(375), ... + // First prefetch should only prefetch from 315 to 330. + // Second prefetch should start from 360. + std::vector warm_keys{"00000345"}; + WarmUpCache(&c, moptions, warm_keys); + + std::unique_ptr iter(c.GetTableReader()->NewIterator( + read_options, moptions.prefix_extractor.get(), nullptr, false, + TableReaderCaller::kUncategorized)); + + // Seek key - + InternalKey ikey("00000315", 0, kTypeValue); + auto kv_iter = kvmap.find(ikey.Encode().ToString()); + + iter->Seek(kv_iter->first); + ASSERT_TRUE(iter->status().IsTryAgain()); + iter->Seek(kv_iter->first); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + FilePrefetchBuffer* prefetch_buffer = + (reinterpret_cast(iter.get())) + ->prefetch_buffer(); + { + // 1st Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, + block_handle); + ASSERT_EQ(buffer_len, 8192); + ASSERT_EQ(buffer_offset, block_handle.offset()); + + // 2nd Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, + buffer_len); + InternalKey ikey_tmp("00000360", 0, kTypeValue); + bbt->TEST_GetDataBlockHandle(read_options, ikey_tmp.Encode().ToString(), + block_handle); + ASSERT_EQ(buffer_len, 8192); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + } + + { + // Check the behavior when it's - + // First Prefetch - Miss(495), Miss(510), Hit(525), prefetch len- 8192 + // Second Prefetch async - Miss(540), Miss(555), - 8192 + // Third Prefetch Async - Hit(570), Miss(585), - 4096 + // 4th Prefetch Async - Hit(600), Miss(615), - 4096 + // 5th Prefetch Async - Miss(630), Miss(645) - 8192 + std::vector warm_keys{"00000525", "00000570", "00000600"}; + WarmUpCache(&c, moptions, warm_keys); + + std::unique_ptr iter(c.GetTableReader()->NewIterator( + read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, + /*skip_filters=*/false, TableReaderCaller::kUncategorized)); + + // Seek key - + InternalKey ikey("00000495", 0, kTypeValue); + auto kv_iter = kvmap.find(ikey.Encode().ToString()); + + // First and Second Prefetch. + iter->Seek(kv_iter->first); + ASSERT_TRUE(iter->status().IsTryAgain()); + iter->Seek(kv_iter->first); + ASSERT_OK(iter->status()); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + FilePrefetchBuffer* prefetch_buffer = + (reinterpret_cast(iter.get())) + ->prefetch_buffer(); + { + // 1st Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, + block_handle); + ASSERT_EQ(buffer_len, 8192); + ASSERT_EQ(buffer_offset, block_handle.offset()); + + // 2nd Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, + buffer_len); + InternalKey ikey_tmp("00000540", 0, kTypeValue); + bbt->TEST_GetDataBlockHandle(read_options, ikey_tmp.Encode().ToString(), + block_handle); + ASSERT_EQ(buffer_len, 8192); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + + // Third prefetch ReadAsync (buffers will swap). + for (; kv_iter != kvmap.end() && iter->Valid(); kv_iter++) { + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + if (iter->user_key() == "00000540") { + break; + } + + iter->Next(); + ASSERT_OK(iter->status()); + } + + { + // 1st Buffer Verification. + // curr buffer - 1. + prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, + block_handle); + ASSERT_EQ(buffer_offset, block_handle.offset()); + ASSERT_EQ(buffer_len, 8192); + + // 2nd Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + InternalKey ikey_tmp("00000585", 0, kTypeValue); + bbt->TEST_GetDataBlockHandle(read_options, ikey_tmp.Encode().ToString(), + block_handle); + ASSERT_EQ(buffer_len, 4096); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + + // 4th Prefetch ReadAsync (buffers will swap). + for (; kv_iter != kvmap.end() && iter->Valid(); kv_iter++) { + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + if (iter->user_key() == "00000585") { + break; + } + + iter->Next(); + ASSERT_OK(iter->status()); + } + + { + // 1st Buffer Verification. + // curr buffer - 0. + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, + block_handle); + ASSERT_EQ(buffer_offset, block_handle.offset()); + ASSERT_EQ(buffer_len, 4096); + + // 2nd Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, + buffer_len); + InternalKey ikey_tmp("00000615", 0, kTypeValue); + bbt->TEST_GetDataBlockHandle(read_options, ikey_tmp.Encode().ToString(), + block_handle); + ASSERT_EQ(buffer_len, 4096); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + + // 5th Prefetch ReadAsync. + for (; kv_iter != kvmap.end() && iter->Valid(); kv_iter++) { + ASSERT_EQ(iter->key(), kv_iter->first); + ASSERT_EQ(iter->value().ToString(), kv_iter->second); + + if (iter->user_key() == "00000615") { + break; + } + + iter->Next(); + ASSERT_OK(iter->status()); + } + + { + // 1st Buffer Verification. + // curr_ - 1. + prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, + buffer_len); + ASSERT_EQ(buffer_len, 4096); + bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, + block_handle); + ASSERT_EQ(buffer_offset, block_handle.offset()); + + // 2nd Buffer Verification. + prefetch_buffer->TEST_GetBufferOffsetandSize(0, buffer_offset, + buffer_len); + InternalKey ikey_tmp("00000630", 0, kTypeValue); + bbt->TEST_GetDataBlockHandle(read_options, ikey_tmp.Encode().ToString(), + block_handle); + ASSERT_EQ(buffer_len, 8192); + ASSERT_EQ(buffer_offset, block_handle.offset()); + } + } + } + c.ResetTableReader(); +} + struct HitMissCountingCache : public CacheWrapper { using CacheWrapper::CacheWrapper; const char* Name() const override { return "HitMissCountingCache"; } @@ -3313,8 +3759,8 @@ TEST_P(BlockBasedTableTest, TracingMultiGetTest) { record.block_type = TraceType::kBlockTraceFilterBlock; expected_records.push_back(record); } - // Then we should have three records for one index, one filter, and one data - // block access. (The two keys share a data block.) + // Then we should have three records for one index, one filter, and one + // data block access. (The two keys share a data block.) record.get_id = get_id_offset; record.block_type = TraceType::kBlockTraceFilterBlock; record.caller = TableReaderCaller::kUserMultiGet; @@ -3430,8 +3876,8 @@ TEST_P(BlockBasedTableTest, TracingIterator) { record.is_cache_hit = false; expected_records.push_back(record); expected_records.push_back(record); - // When we iterate this file for the second time, we should observe all cache - // hits. + // When we iterate this file for the second time, we should observe all + // cache hits. record.block_type = TraceType::kBlockTraceIndexBlock; record.is_cache_hit = true; expected_records.push_back(record); @@ -3505,8 +3951,8 @@ class BlockCachePropertiesSnapshot { int64_t block_cache_bytes_write = 0; }; -// Make sure, by default, index/filter blocks were pre-loaded (meaning we won't -// use block cache to store them). +// Make sure, by default, index/filter blocks were pre-loaded (meaning we +// won't use block cache to store them). TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) { Options options; options.create_if_missing = true; @@ -3752,7 +4198,8 @@ void ValidateBlockRestartInterval(int value, int expected) { } TEST_P(BlockBasedTableTest, InvalidOptions) { - // invalid values for block_size_deviation (<0 or >100) are silently set to 0 + // invalid values for block_size_deviation (<0 or >100) are silently set to + // 0 ValidateBlockSizeDeviation(-10, 0); ValidateBlockSizeDeviation(-1, 0); ValidateBlockSizeDeviation(0, 0); @@ -3860,8 +4307,8 @@ TEST_P(BlockBasedTableTest, BlockReadCountTest) { TEST_P(BlockBasedTableTest, BlockCacheLeak) { // Check that when we reopen a table we don't lose access to blocks already - // in the cache. This test checks whether the Table actually makes use of the - // unique ID from the file. + // in the cache. This test checks whether the Table actually makes use of + // the unique ID from the file. Options opt; std::unique_ptr ikc; @@ -4185,7 +4632,6 @@ TEST_F(PlainTableTest, Crc32cFileChecksum) { EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str()); } - TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) { TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */); c.Add("k01", "hello"); @@ -4314,7 +4760,8 @@ TEST_F(GeneralTableTest, ApproximateKeyAnchors) { std::vector anchors; ASSERT_OK(c.GetTableReader()->ApproximateKeyAnchors(ReadOptions(), anchors)); - // The target is 128 anchors. But in reality it can be slightly more or fewer. + // The target is 128 anchors. But in reality it can be slightly more or + // fewer. ASSERT_GT(anchors.size(), 120); ASSERT_LT(anchors.size(), 140); diff --git a/unreleased_history/new_features/async_support_tune_readahead.md b/unreleased_history/new_features/async_support_tune_readahead.md new file mode 100644 index 000000000..8794a493a --- /dev/null +++ b/unreleased_history/new_features/async_support_tune_readahead.md @@ -0,0 +1 @@ +Provide support for async_io to trim readahead_size by doing block cache lookup From 179d2c76464d99bdefe34ad027673018bd0babf1 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Fri, 8 Dec 2023 10:22:14 -0800 Subject: [PATCH 318/386] Intensify "xxx_one_in"'s default value in crash test (#12127) Summary: **Context/Summary:** My experimental stress runs with more frequent "xxx_one_in" surfaced a couple interesting bugs/issues with RocksDB or crash test framework in the past. We now consider changing the default value so they are run more frequently in production testing environment. Increase frequency by 2 orders of magnitude for most parameters, except for error-prone features e.g, manual compaction and file ingestion (increased by 3 orders) and expensive features e.g, checksum verification (increased by 1 order) Pull Request resolved: https://github.com/facebook/rocksdb/pull/12127 Test Plan: Monitor CI to see if it did surface more interesting bugs/issues. If not, we may consider intensify even more. Reviewed By: pdillinger Differential Revision: D51954235 Pulled By: hx235 fbshipit-source-id: 92046cb7c52a37212f19ab7965b40f77b90b08b1 --- tools/db_crashtest.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 303131602..2f8b0d0fc 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -31,10 +31,10 @@ default_params = { - "acquire_snapshot_one_in": 10000, + "acquire_snapshot_one_in": lambda: random.choice([100, 10000]), "backup_max_size": 100 * 1024 * 1024, # Consider larger number when backups considered more stable - "backup_one_in": 100000, + "backup_one_in": lambda: random.choice([1000, 100000]), "batch_protection_bytes_per_key": lambda: random.choice([0, 8]), "memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]), "block_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]), @@ -48,7 +48,7 @@ "charge_filter_construction": lambda: random.choice([0, 1]), "charge_table_reader": lambda: random.choice([0, 1]), "charge_file_metadata": lambda: random.choice([0, 1]), - "checkpoint_one_in": 1000000, + "checkpoint_one_in": lambda: random.choice([10000, 1000000]), "compression_type": lambda: random.choice( ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"] ), @@ -67,8 +67,8 @@ "compression_use_zstd_dict_trainer": lambda: random.randint(0, 1), "compression_checksum": lambda: random.randint(0, 1), "clear_column_family_one_in": 0, - "compact_files_one_in": 1000000, - "compact_range_one_in": 1000000, + "compact_files_one_in": lambda: random.choice([1000, 1000000]), + "compact_range_one_in": lambda: random.choice([1000, 1000000]), "compaction_pri": random.randint(0, 4), "data_block_index_type": lambda: random.choice([0, 1]), "delpercent": 4, @@ -78,19 +78,19 @@ "enable_compaction_filter": lambda: random.choice([0, 0, 0, 1]), "expected_values_dir": lambda: setup_expected_values_dir(), "fail_if_options_file_error": lambda: random.randint(0, 1), - "flush_one_in": 1000000, - "manual_wal_flush_one_in": lambda: random.choice([0, 0, 1000, 1000000]), + "flush_one_in": lambda: random.choice([1000, 1000000]), + "manual_wal_flush_one_in": lambda: random.choice([0, 1000]), "file_checksum_impl": lambda: random.choice(["none", "crc32c", "xxh64", "big"]), - "get_live_files_one_in": 1000000, + "get_live_files_one_in": lambda: random.choice([10000, 1000000]), # Note: the following two are intentionally disabled as the corresponding # APIs are not guaranteed to succeed. "get_sorted_wal_files_one_in": 0, "get_current_wal_file_one_in": 0, # Temporarily disable hash index "index_type": lambda: random.choice([0, 0, 0, 2, 2, 3]), - "ingest_external_file_one_in": 1000000, + "ingest_external_file_one_in": lambda: random.choice([1000, 1000000]), "iterpercent": 10, - "lock_wal_one_in": 1000000, + "lock_wal_one_in": lambda: random.choice([10000, 1000000]), "mark_for_compaction_one_file_in": lambda: 10 * random.randint(0, 1), "max_background_compactions": 20, "max_bytes_for_level_base": 10485760, @@ -105,7 +105,7 @@ "optimize_filters_for_memory": lambda: random.randint(0, 1), "partition_filters": lambda: random.randint(0, 1), "partition_pinning": lambda: random.randint(0, 3), - "pause_background_one_in": 1000000, + "pause_background_one_in": lambda: random.choice([10000, 1000000]), "prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]), "prefixpercent": 5, "progress_reports": 0, @@ -175,9 +175,9 @@ [16, 64, 1024 * 1024, 16 * 1024 * 1024] ), "level_compaction_dynamic_level_bytes": lambda: random.randint(0, 1), - "verify_checksum_one_in": 1000000, - "verify_file_checksums_one_in": 1000000, - "verify_db_one_in": 100000, + "verify_checksum_one_in": lambda: random.choice([100000, 1000000]), + "verify_file_checksums_one_in": lambda: random.choice([100000, 1000000]), + "verify_db_one_in": lambda: random.choice([10000, 100000]), "continuous_verification_interval": 0, "max_key_len": 3, "key_len_percent_dist": "1,30,69", @@ -187,7 +187,7 @@ "open_write_fault_one_in": lambda: random.choice([0, 0, 16]), "open_read_fault_one_in": lambda: random.choice([0, 0, 32]), "sync_fault_injection": lambda: random.randint(0, 1), - "get_property_one_in": 1000000, + "get_property_one_in": lambda: random.choice([100000, 1000000]), "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]), "max_write_buffer_size_to_maintain": lambda: random.choice( [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024] From a143f932364dd24eb70944c817721934897606df Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 8 Dec 2023 10:34:07 -0800 Subject: [PATCH 319/386] Turn the default Timer in PeriodicTaskScheduler into a leaky Meyers singleton (#12128) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12128 The patch turns the `Timer` Meyers singleton in `PeriodicTaskScheduler::Default()` into one of the leaky variety in order to prevent static destruction order issues. Reviewed By: akankshamahajan15 Differential Revision: D51963950 fbshipit-source-id: 0fc34113ad03c51fdc83bdb8c2cfb6c9f6913948 --- db/periodic_task_scheduler.cc | 3 +-- unreleased_history/bug_fixes/avoid_destroying_timer.md | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 unreleased_history/bug_fixes/avoid_destroying_timer.md diff --git a/db/periodic_task_scheduler.cc b/db/periodic_task_scheduler.cc index 1306f45da..1c4fc16b1 100644 --- a/db/periodic_task_scheduler.cc +++ b/db/periodic_task_scheduler.cc @@ -94,7 +94,7 @@ Status PeriodicTaskScheduler::Unregister(PeriodicTaskType task_type) { } Timer* PeriodicTaskScheduler::Default() { - static Timer timer(SystemClock::Default().get()); + STATIC_AVOID_DESTRUCTION(Timer, timer)(SystemClock::Default().get()); return &timer; } @@ -108,4 +108,3 @@ void PeriodicTaskScheduler::TEST_OverrideTimer(SystemClock* clock) { #endif // NDEBUG } // namespace ROCKSDB_NAMESPACE - diff --git a/unreleased_history/bug_fixes/avoid_destroying_timer.md b/unreleased_history/bug_fixes/avoid_destroying_timer.md new file mode 100644 index 000000000..2a70b6b31 --- /dev/null +++ b/unreleased_history/bug_fixes/avoid_destroying_timer.md @@ -0,0 +1 @@ +Avoid destroying the periodic task scheduler's default timer in order to prevent static destruction order issues. From 44fd9141285d5bf5b7b64e119b08f8af302b0bec Mon Sep 17 00:00:00 2001 From: Kevin Mingtarja Date: Fri, 8 Dec 2023 17:12:11 -0800 Subject: [PATCH 320/386] Fix double counting of BYTES_WRITTEN ticker (#12111) Summary: Fixes https://github.com/facebook/rocksdb/issues/12061. We were double counting the `BYTES_WRITTEN` ticker when doing writes with transactions. During transactions, after writing, a client can call `Prepare()`, which writes the values to WAL but not to the Memtable. After that, they can call `Commit()`, which writes a commit marker to the WAL and the values to Memtable. The cause of this bug is previously during writes, we didn't take into account `writer->ShouldWriteToMemtable()` before adding to `total_byte_size`, so it is still added to during the `Prepare()` phase even though we're not writing to the Memtable, which was why we saw the value to be double of what's written to WAL. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12111 Test Plan: Added a test in `db/db_statistics_test.cc` that tests writes with and without transactions, by comparing the values of `BYTES_WRITTEN` and `WAL_FILE_BYTES` after doing writes. Reviewed By: jaykorean Differential Revision: D51954327 Pulled By: jowlyzhang fbshipit-source-id: 57a0986a14e5b94eb5188715d819212529110d2c --- db/db_impl/db_impl_write.cc | 8 +- db/db_statistics_test.cc | 74 +++++++++++++++++++ monitoring/perf_context_imp.h | 2 +- ...ix_bytes_written_ticker_double_counting.md | 1 + 4 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index f72b9e481..8add1e990 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -437,10 +437,10 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, valid_batches += writer->batch_cnt; if (writer->ShouldWriteToMemtable()) { total_count += WriteBatchInternal::Count(writer->batch); + total_byte_size = WriteBatchInternal::AppendedByteSize( + total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); parallel = parallel && !writer->batch->HasMerge(); } - total_byte_size = WriteBatchInternal::AppendedByteSize( - total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); if (writer->pre_release_callback) { pre_release_callback_cnt++; } @@ -720,11 +720,11 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options, if (writer->ShouldWriteToMemtable()) { writer->sequence = next_sequence; size_t count = WriteBatchInternal::Count(writer->batch); + total_byte_size = WriteBatchInternal::AppendedByteSize( + total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); next_sequence += count; total_count += count; } - total_byte_size = WriteBatchInternal::AppendedByteSize( - total_byte_size, WriteBatchInternal::ByteSize(writer->batch)); } } if (w.disable_wal) { diff --git a/db/db_statistics_test.cc b/db/db_statistics_test.cc index 054fbc56c..f430811d3 100644 --- a/db/db_statistics_test.cc +++ b/db/db_statistics_test.cc @@ -6,9 +6,11 @@ #include #include "db/db_test_util.h" +#include "db/write_batch_internal.h" #include "monitoring/thread_status_util.h" #include "port/stack_trace.h" #include "rocksdb/statistics.h" +#include "rocksdb/utilities/transaction_db.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { @@ -283,6 +285,78 @@ TEST_F(DBStatisticsTest, BlockChecksumStats) { options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT)); } +TEST_F(DBStatisticsTest, BytesWrittenStats) { + Options options = CurrentOptions(); + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + options.statistics->set_stats_level(StatsLevel::kExceptHistogramOrTimers); + Reopen(options); + + EXPECT_EQ(0, options.statistics->getAndResetTickerCount(WAL_FILE_BYTES)); + EXPECT_EQ(0, options.statistics->getAndResetTickerCount(BYTES_WRITTEN)); + + const int kNumKeysWritten = 100; + + // Scenario 0: Not using transactions. + // This will write to WAL and memtable directly. + ASSERT_OK(options.statistics->Reset()); + + for (int i = 0; i < kNumKeysWritten; ++i) { + ASSERT_OK(Put(Key(i), "val")); + } + + EXPECT_EQ(options.statistics->getAndResetTickerCount(WAL_FILE_BYTES), + options.statistics->getAndResetTickerCount(BYTES_WRITTEN)); + + // Scenario 1: Using transactions. + // This should not double count BYTES_WRITTEN (issue #12061). + for (bool enable_pipelined_write : {false, true}) { + ASSERT_OK(options.statistics->Reset()); + + // Destroy the DB to recreate as a TransactionDB. + Destroy(options, true); + + // Create a TransactionDB. + TransactionDB* txn_db = nullptr; + TransactionDBOptions txn_db_opts; + txn_db_opts.write_policy = TxnDBWritePolicy::WRITE_COMMITTED; + options.enable_pipelined_write = enable_pipelined_write; + ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db)); + ASSERT_NE(txn_db, nullptr); + db_ = txn_db->GetBaseDB(); + + WriteOptions wopts; + TransactionOptions txn_opts; + Transaction* txn = txn_db->BeginTransaction(wopts, txn_opts, nullptr); + ASSERT_NE(txn, nullptr); + ASSERT_OK(txn->SetName("txn1")); + + for (int i = 0; i < kNumKeysWritten; ++i) { + ASSERT_OK(txn->Put(Key(i), "val")); + } + + // Prepare() writes to WAL, but not to memtable. (WriteCommitted) + ASSERT_OK(txn->Prepare()); + EXPECT_NE(0, options.statistics->getTickerCount(WAL_FILE_BYTES)); + // BYTES_WRITTEN would have been non-zero previously (issue #12061). + EXPECT_EQ(0, options.statistics->getTickerCount(BYTES_WRITTEN)); + + // Commit() writes to memtable and also a commit marker to WAL. + ASSERT_OK(txn->Commit()); + delete txn; + + // The WAL has an extra header of size `kHeader` written to it, + // as we are writing twice to it (first during Prepare, second during + // Commit). + EXPECT_EQ(options.statistics->getAndResetTickerCount(WAL_FILE_BYTES), + options.statistics->getAndResetTickerCount(BYTES_WRITTEN) + + WriteBatchInternal::kHeader); + + // Cleanup + db_ = nullptr; + delete txn_db; + } +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index a63d931a7..f7c9908cc 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -74,7 +74,7 @@ extern thread_local PerfContext perf_context; #define PERF_COUNTER_ADD(metric, value) \ if (perf_level >= PerfLevel::kEnableCount) { \ perf_context.metric += value; \ - } \ + } \ static_assert(true, "semicolon required") // Increase metric value diff --git a/unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md b/unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md new file mode 100644 index 000000000..fe6859874 --- /dev/null +++ b/unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md @@ -0,0 +1 @@ +Fix double counting of BYTES_WRITTEN ticker when doing writes with transactions. From 4f04f967427007a6ae10d9bc927a3619eed1ab34 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Fri, 8 Dec 2023 17:21:52 -0800 Subject: [PATCH 321/386] Remove extra semi colon from infrasec/authorization/audit/AclAuditor.cpp Summary: `-Wextra-semi` or `-Wextra-semi-stmt` If the code compiles, this is safe to land. Reviewed By: palmje Differential Revision: D51995065 fbshipit-source-id: 9b55a0d8abd0927b76376cb7751bf0fcab10518c --- util/autovector.h | 2 +- .../lock/range/range_tree/lib/util/partitioned_counter.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/util/autovector.h b/util/autovector.h index 79ee5de57..39c7aabee 100644 --- a/util/autovector.h +++ b/util/autovector.h @@ -61,7 +61,7 @@ class autovector { using iterator_category = std::random_access_iterator_tag; iterator_impl(TAutoVector* vect, size_t index) - : vect_(vect), index_(index){}; + : vect_(vect), index_(index){} iterator_impl(const iterator_impl&) = default; ~iterator_impl() {} iterator_impl& operator=(const iterator_impl&) = default; diff --git a/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h b/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h index f20eeedf2..53ca5aab1 100644 --- a/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h +++ b/utilities/transactions/lock/range/range_tree/lib/util/partitioned_counter.h @@ -123,7 +123,7 @@ void partitioned_counters_destroy(void); // Effect: Destroy any partitioned counters data structures. #if defined(__cplusplus) -}; +} #endif #if 0 From 5a063ecd345eefdf82c51d837acf8824949205b6 Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Mon, 11 Dec 2023 11:03:17 -0800 Subject: [PATCH 322/386] Java API consistency between RocksDB.put() , .merge() and Transaction.put() , .merge() (#11019) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: ### Implement new Java API get()/put()/merge() methods, and transactional variants. The Java API methods are very inconsistent in terms of how they pass parameters (byte[], ByteBuffer), and what variants and defaulted parameters they support. We try to bring some consistency to this. * All APIs should support calls with ByteBuffer parameters. * Similar methods (RocksDB.get() vs Transaction.get()) should support as similar as possible sets of parameters for predictability. * get()-like methods should provide variants where the caller supplies the target buffer, for the sake of efficiency. Allocation costs in Java can be significant when large buffers are repeatedly allocated and freed. ### API Additions 1. RockDB.get implement indirect ByteBuffers. Added indirect ByteBuffers and supporting native methods for get(). 2. RocksDB.Iterator implement missing (byte[], offset, length) variants for key() and value() parameters. 3. Transaction.get() implement missing methods, based on RocksDB.get. Added ByteBuffer.get with and without column family. Added byte[]-as-target get. 4. Transaction.iterator() implement a getIterator() which defaults ReadOptions; as per RocksDB.iterator(). Rationalize support API for this and RocksDB.iterator() 5. RocksDB.merge implement ByteBuffer methods; both direct and indirect buffers. Shadow the methods of RocksDB.put; RocksDB.put only offers ByteBuffer API with explicit WriteOptions. Duplicated this with RocksDB.merge 6. Transaction.merge implement methods as per RocksDB.merge methods. Transaction is already constructed with WriteOptions, so no explicit WriteOptions methods required. 7. Transaction.mergeUntracked implement the same API methods as Transaction.merge except the ones that use assumeTracked, because that’s not a feature of merge untracked. ### Support Changes (C++) The current JNI code in C++ supports multiple variants of methods through a number of helper functions. There are numerous TODO suggestions in the code proposing that the helpers be re-factored/shared. We have taken a different approach for the new methods; we have created wrapper classes `JDirectBufferSlice`, `JDirectBufferPinnableSlice`, `JByteArraySlice` and `JByteArrayPinnableSlice` RAII classes which construct slices from JNI parameters and can then be passed directly to RocksDB methods. For instance, the `Java_org_rocksdb_Transaction_getDirect` method is implemented like this: ``` try { ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, jkey_part_len); ROCKSDB_NAMESPACE::JDirectBufferPinnableSlice value(env, jval_bb, jval_off, jval_part_len); ROCKSDB_NAMESPACE::KVException::ThrowOnError( env, txn->Get(*read_options, column_family_handle, key.slice(), &value.pinnable_slice())); return value.Fetch(); } catch (const ROCKSDB_NAMESPACE::KVException& e) { return e.Code(); } ``` Notice the try/catch mechanism with the `KVException` class, which combined with RAII and the wrapper classes means that there is no ad-hoc cleanup necessary in the JNI methods. We propose to extend this mechanism to existing JNI methods as further work. ### Support Changes (Java) Where there are multiple parameter-variant versions of the same method, we use fewer or just one supporting native method for all of them. This makes maintenance a bit easier and reduces the opportunity for coding errors mixing up (untyped) object handles. In order to support this efficiently, some classes need to have default values for column families and read options added and cached so that they are not re-constructed on every method call. This PR closes https://github.com/facebook/rocksdb/issues/9776 Pull Request resolved: https://github.com/facebook/rocksdb/pull/11019 Reviewed By: ajkr Differential Revision: D52039446 Pulled By: jowlyzhang fbshipit-source-id: 45d0140a4887e42134d2e56520e9b8efbd349660 --- java/CMakeLists.txt | 2 + .../{GetBenchmarks.md => GetPutBenchmarks.md} | 97 +- java/Makefile | 4 + .../java/org/rocksdb/jmh/PutBenchmarks.java | 125 ++- java/pmd-rules.xml | 1 + java/rocksjni/kv_helper.h | 284 +++++ java/rocksjni/rocksjni.cc | 277 ++--- java/rocksjni/transaction.cc | 566 ++++++---- java/src/main/java/org/rocksdb/GetStatus.java | 32 + .../org/rocksdb/OptimisticTransactionDB.java | 1 + .../main/java/org/rocksdb/ReadOptions.java | 2 - java/src/main/java/org/rocksdb/RocksDB.java | 203 +++- .../main/java/org/rocksdb/RocksIterator.java | 82 +- .../main/java/org/rocksdb/Transaction.java | 993 ++++++++++++++++-- .../main/java/org/rocksdb/TransactionDB.java | 2 + .../java/org/rocksdb/util/BufferUtil.java | 16 + .../org/rocksdb/AbstractTransactionTest.java | 492 ++++++++- .../java/org/rocksdb/KeyMayExistTest.java | 22 +- .../java/org/rocksdb/MergeCFVariantsTest.java | 126 +++ java/src/test/java/org/rocksdb/MergeTest.java | 4 +- .../java/org/rocksdb/MergeVariantsTest.java | 95 ++ .../rocksdb/OptimisticTransactionTest.java | 11 +- .../java/org/rocksdb/PutCFVariantsTest.java | 126 +++ .../java/org/rocksdb/PutVariantsTest.java | 92 ++ .../test/java/org/rocksdb/RocksDBTest.java | 75 +- .../java/org/rocksdb/RocksIteratorTest.java | 117 ++- .../java/org/rocksdb/TransactionTest.java | 11 +- .../java_api_consistency.md | 16 + 28 files changed, 3304 insertions(+), 570 deletions(-) rename java/{GetBenchmarks.md => GetPutBenchmarks.md} (66%) create mode 100644 java/rocksjni/kv_helper.h create mode 100644 java/src/main/java/org/rocksdb/GetStatus.java create mode 100644 java/src/main/java/org/rocksdb/util/BufferUtil.java create mode 100644 java/src/test/java/org/rocksdb/MergeCFVariantsTest.java create mode 100644 java/src/test/java/org/rocksdb/MergeVariantsTest.java create mode 100644 java/src/test/java/org/rocksdb/PutCFVariantsTest.java create mode 100644 java/src/test/java/org/rocksdb/PutVariantsTest.java create mode 100644 unreleased_history/performance_improvements/java_api_consistency.md diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 0fc503e69..cf859ae95 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -168,6 +168,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/FlushJobInfo.java src/main/java/org/rocksdb/FlushReason.java src/main/java/org/rocksdb/FlushOptions.java + src/main/java/org/rocksdb/GetStatus.java src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java src/main/java/org/rocksdb/HashSkipListMemTableConfig.java src/main/java/org/rocksdb/HistogramData.java @@ -285,6 +286,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/WriteBufferManager.java src/main/java/org/rocksdb/WriteStallCondition.java src/main/java/org/rocksdb/WriteStallInfo.java + src/main/java/org/rocksdb/util/BufferUtil.java src/main/java/org/rocksdb/util/ByteUtil.java src/main/java/org/rocksdb/util/BytewiseComparator.java src/main/java/org/rocksdb/util/Environment.java diff --git a/java/GetBenchmarks.md b/java/GetPutBenchmarks.md similarity index 66% rename from java/GetBenchmarks.md rename to java/GetPutBenchmarks.md index b66a897e2..600b6377c 100644 --- a/java/GetBenchmarks.md +++ b/java/GetPutBenchmarks.md @@ -8,16 +8,16 @@ Mac ``` make clean jclean DEBUG_LEVEL=0 make -j12 rocksdbjava -(cd java/target; cp rocksdbjni-7.9.0-osx.jar rocksdbjni-7.9.0-SNAPSHOT-osx.jar) -mvn install:install-file -Dfile=./java/target/rocksdbjni-7.9.0-SNAPSHOT-osx.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=7.9.0-SNAPSHOT -Dpackaging=jar +(cd java/target; cp rocksdbjni-7.10.0-osx.jar rocksdbjni-7.10.0-SNAPSHOT-osx.jar) +mvn install:install-file -Dfile=./java/target/rocksdbjni-7.10.0-SNAPSHOT-osx.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=7.10.0-SNAPSHOT -Dpackaging=jar ``` Linux ``` make clean jclean DEBUG_LEVEL=0 make -j12 rocksdbjava -(cd java/target; cp rocksdbjni-7.9.0-linux64.jar rocksdbjni-7.9.0-SNAPSHOT-linux64.jar) -mvn install:install-file -Dfile=./java/target/rocksdbjni-7.9.0-SNAPSHOT-linux64.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=7.9.0-SNAPSHOT -Dpackaging=jar +(cd java/target; cp rocksdbjni-7.10.0-linux64.jar rocksdbjni-7.10.0-SNAPSHOT-linux64.jar) +mvn install:install-file -Dfile=./java/target/rocksdbjni-7.10.0-SNAPSHOT-linux64.jar -DgroupId=org.rocksdb -DartifactId=rocksdbjni -Dversion=7.10.0-SNAPSHOT -Dpackaging=jar ``` Build jmh test package, on either platform @@ -35,31 +35,10 @@ The long performance run (as big as we can make it on our Ubuntu box without fil java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,50000 -p keySize=128 -p valueSize=1024,16384 -p columnFamilyTestType="1_column_family","20_column_families" GetBenchmarks.get GetBenchmarks.preallocatedByteBufferGet GetBenchmarks.preallocatedGet ``` -## Results (small runs, Mac) - -These are run on a 10-core M1 with 64GB of memory and 2TB of SSD. -They probably reflect the absolute best case for this optimization, hitting in-memory buffers and completely eliminating a buffer copy. - -### Before -Benchmark (columnFamilyTestType) (keyCount) (keySize) (multiGetSize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_family 1000 128 N/A 32768 thrpt 25 43496.578 ± 5743.090 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_family 1000 128 N/A 32768 thrpt 25 70765.578 ± 697.548 ops/s -GetBenchmarks.preallocatedGet no_column_family 1000 128 N/A 32768 thrpt 25 69883.554 ± 944.184 ops/s - -### After fixing byte[] (.get and .preallocatedGet) - -Benchmark (columnFamilyTestType) (keyCount) (keySize) (multiGetSize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_family 1000 128 N/A 32768 thrpt 25 149207.681 ± 2261.671 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_family 1000 128 N/A 32768 thrpt 25 68920.489 ± 1574.664 ops/s -GetBenchmarks.preallocatedGet no_column_family 1000 128 N/A 32768 thrpt 25 177399.022 ± 2107.375 ops/s +## Results (Ubuntu, big runs) -### After fixing ByteBuffer (.preallocatedByteBufferGet) +NB - we have removed some test results we initially observed on Mac which were not later reproducible. -Benchmark (columnFamilyTestType) (keyCount) (keySize) (multiGetSize) (valueSize) Mode Cnt Score Error Units -GetBenchmarks.get no_column_family 1000 128 N/A 32768 thrpt 25 150389.259 ± 1371.473 ops/s -GetBenchmarks.preallocatedByteBufferGet no_column_family 1000 128 N/A 32768 thrpt 25 179919.468 ± 1670.714 ops/s -GetBenchmarks.preallocatedGet no_column_family 1000 128 N/A 32768 thrpt 25 178261.938 ± 2630.571 ops/s -## Results (Ubuntu, big runs) These take 3-4 hours ``` java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,50000 -p keySize=128 -p valueSize=1024,16384 -p columnFamilyTestType="1_column_family","20_column_families" GetBenchmarks.get GetBenchmarks.preallocatedByteBufferGet GetBenchmarks.preallocatedGet @@ -67,6 +46,13 @@ java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,500 It's clear that all `get()` variants have noticeably improved performance, though not the spectacular gains of the M1. ### With fixes for all of the `get()` instances +The tests which use methods which have had performance improvements applied are: +```java +get() +preallocatedGet() +preallocatedByteBufferGet() +``` + Benchmark (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units GetBenchmarks.get 1_column_family 1000 128 1024 thrpt 25 935648.793 ± 22879.910 ops/s GetBenchmarks.get 1_column_family 1000 128 16384 thrpt 25 204366.301 ± 1326.570 ops/s @@ -159,3 +145,60 @@ GetBenchmarks.preallocatedGet no_column_families 1000 The performance improvement is real. +# Put Performance Benchmarks + +Results associated with [Java API consistency between RocksDB.put() , .merge() and Transaction.put() , .merge()](https://github.com/facebook/rocksdb/pull/11019) + +This work was not designed specifically as a performance optimization, but we want to confirm that it has not regressed what it has changed, and to provide +a baseline for future possible performance work. + +## Build/Run + +Building is as above. Running is a different invocation of the same JMH jar. +``` +java -jar target/rocksdbjni-jmh-1.0-SNAPSHOT-benchmarks.jar -p keyCount=1000,50000 -p keySize=128 -p valueSize=1024,32768 -p columnFamilyTestType="no_column_family" PutBenchmarks +``` + +## Before Changes + +These results were generated in a private branch with the `PutBenchmarks` from the PR backported onto the current *main*. + +Benchmark (bufferListSize) (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units +PutBenchmarks.put 16 no_column_family 1000 128 1024 thrpt 25 76670.200 ± 2555.248 ops/s +PutBenchmarks.put 16 no_column_family 1000 128 32768 thrpt 25 3913.692 ± 225.690 ops/s +PutBenchmarks.put 16 no_column_family 50000 128 1024 thrpt 25 74479.589 ± 988.361 ops/s +PutBenchmarks.put 16 no_column_family 50000 128 32768 thrpt 25 4070.800 ± 194.838 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 1000 128 1024 thrpt 25 72150.853 ± 1744.216 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 1000 128 32768 thrpt 25 3896.646 ± 188.629 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 50000 128 1024 thrpt 25 71753.287 ± 1053.904 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 50000 128 32768 thrpt 25 3928.503 ± 264.443 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 1000 128 1024 thrpt 25 72595.105 ± 1027.258 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 1000 128 32768 thrpt 25 3890.100 ± 199.131 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 50000 128 1024 thrpt 25 70878.133 ± 1181.601 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 50000 128 32768 thrpt 25 3863.181 ± 215.888 ops/s + +## After Changes + +These results were generated on the PR branch. + +Benchmark (bufferListSize) (columnFamilyTestType) (keyCount) (keySize) (valueSize) Mode Cnt Score Error Units +PutBenchmarks.put 16 no_column_family 1000 128 1024 thrpt 25 75178.751 ± 2644.775 ops/s +PutBenchmarks.put 16 no_column_family 1000 128 32768 thrpt 25 3937.175 ± 257.039 ops/s +PutBenchmarks.put 16 no_column_family 50000 128 1024 thrpt 25 74375.519 ± 1776.654 ops/s +PutBenchmarks.put 16 no_column_family 50000 128 32768 thrpt 25 4013.413 ± 257.706 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 1000 128 1024 thrpt 25 71418.303 ± 1610.977 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 1000 128 32768 thrpt 25 4027.581 ± 227.900 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 50000 128 1024 thrpt 25 71229.107 ± 2720.083 ops/s +PutBenchmarks.putByteArrays 16 no_column_family 50000 128 32768 thrpt 25 4022.635 ± 212.540 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 1000 128 1024 thrpt 25 71718.501 ± 787.537 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 1000 128 32768 thrpt 25 4078.050 ± 176.331 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 50000 128 1024 thrpt 25 72736.754 ± 828.971 ops/s +PutBenchmarks.putByteBuffers 16 no_column_family 50000 128 32768 thrpt 25 3987.232 ± 205.577 ops/s + +## Discussion + +The changes don't appear to have had a material effect on performance. We are happy with this. + + * We would obviously advise running future changes before and after to confirm they have no adverse effects. + + diff --git a/java/Makefile b/java/Makefile index e71589e9e..a887a24b3 100644 --- a/java/Makefile +++ b/java/Makefile @@ -150,7 +150,9 @@ JAVA_TESTS = \ org.rocksdb.LRUCacheTest\ org.rocksdb.MemoryUtilTest\ org.rocksdb.MemTableTest\ + org.rocksdb.MergeCFVariantsTest\ org.rocksdb.MergeTest\ + org.rocksdb.MergeVariantsTest\ org.rocksdb.MultiColumnRegressionTest \ org.rocksdb.MultiGetManyKeysTest\ org.rocksdb.MultiGetTest\ @@ -167,6 +169,8 @@ JAVA_TESTS = \ org.rocksdb.OptionsTest\ org.rocksdb.PerfLevelTest \ org.rocksdb.PerfContextTest \ + org.rocksdb.PutCFVariantsTest\ + org.rocksdb.PutVariantsTest\ org.rocksdb.PlainTableConfigTest\ org.rocksdb.RateLimiterTest\ org.rocksdb.ReadOnlyTest\ diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java index 5aae21cb9..cf82401c1 100644 --- a/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java +++ b/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java @@ -6,18 +6,19 @@ */ package org.rocksdb.jmh; -import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; +import static org.rocksdb.util.KVUtils.ba; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; - -import static org.rocksdb.util.KVUtils.ba; +import org.openjdk.jmh.annotations.*; +import org.rocksdb.*; +import org.rocksdb.util.FileUtils; @State(Scope.Benchmark) public class PutBenchmarks { @@ -30,12 +31,24 @@ public class PutBenchmarks { }) String columnFamilyTestType; + @Param({"1000", "100000"}) int keyCount; + + @Param({"12", "64", "128"}) int keySize; + + @Param({"64", "1024", "65536"}) int valueSize; + + @Param({"16"}) int bufferListSize; + Path dbDir; DBOptions options; int cfs = 0; // number of column families private AtomicInteger cfHandlesIdx; ColumnFamilyHandle[] cfHandles; RocksDB db; + List keyBuffers = new ArrayList<>(bufferListSize); + List valueBuffers = new ArrayList<>(bufferListSize); + List keyBuffersBB = new ArrayList<>(bufferListSize); + List valueBuffersBB = new ArrayList<>(bufferListSize); @Setup(Level.Trial) public void setup() throws IOException, RocksDBException { @@ -68,6 +81,34 @@ public void setup() throws IOException, RocksDBException { final List cfHandlesList = new ArrayList<>(cfDescriptors.size()); db = RocksDB.open(options, dbDir.toAbsolutePath().toString(), cfDescriptors, cfHandlesList); cfHandles = cfHandlesList.toArray(new ColumnFamilyHandle[0]); + + for (int i = 0; i < bufferListSize; i++) { + final byte[] keyArr = new byte[keySize]; + Arrays.fill(keyArr, (byte) 0x30); + keyBuffers.add(keyArr); + } + + for (int i = 0; i < bufferListSize; i++) { + final byte[] valueArr = new byte[valueSize]; + Arrays.fill(valueArr, (byte) 0x30); + valueBuffers.add(valueArr); + } + + for (int i = 0; i < bufferListSize; i++) { + final ByteBuffer keyBB = ByteBuffer.allocateDirect(keySize); + byte[] keyArr = new byte[keySize]; + Arrays.fill(keyArr, (byte) 0x30); + keyBB.put(keyArr); + keyBuffersBB.add(keyBB); + } + + for (int i = 0; i < bufferListSize; i++) { + final ByteBuffer valueBB = ByteBuffer.allocateDirect(valueSize); + byte[] valueArr = new byte[valueSize]; + Arrays.fill(valueArr, (byte) 0x30); + valueBB.put(valueArr); + valueBuffersBB.add(valueBB); + } } @TearDown(Level.Trial) @@ -104,9 +145,79 @@ public int next() { } } + private T borrow(final List buffers) { + synchronized (buffers) { + while (true) { + if (buffers.isEmpty()) { + try { + Thread.sleep(1000); + } catch (InterruptedException ie) { + return null; + } + continue; + } + return buffers.remove(0); + } + } + } + + private void repay(final List buffers, final T buffer) { + synchronized (buffers) { + buffers.add(buffer); + } + } + @Benchmark - public void put(final ComparatorBenchmarks.Counter counter) throws RocksDBException { + public void put(final Counter counter) throws RocksDBException { + byte[] keyBuf = borrow(keyBuffers); + byte[] valueBuf = borrow(valueBuffers); + + final int i = counter.next(); + final byte[] keyPrefix = ba("key" + i); + final byte[] valuePrefix = ba("value" + i); + System.arraycopy(keyPrefix, 0, keyBuf, 0, keyPrefix.length); + System.arraycopy(valuePrefix, 0, valueBuf, 0, valuePrefix.length); + db.put(getColumnFamily(), keyBuf, valueBuf); + + repay(keyBuffers, keyBuf); + repay(valueBuffers, valueBuf); + } + + @Benchmark + public void putByteArrays(final Counter counter) throws RocksDBException { + byte[] keyBuf = borrow(keyBuffers); + byte[] valueBuf = borrow(valueBuffers); + + final int i = counter.next(); + final byte[] keyPrefix = ba("key" + i); + final byte[] valuePrefix = ba("value" + i); + System.arraycopy(keyPrefix, 0, keyBuf, 0, keyPrefix.length); + System.arraycopy(valuePrefix, 0, valueBuf, 0, valuePrefix.length); + db.put(getColumnFamily(), new WriteOptions(), keyBuf, valueBuf); + + repay(keyBuffers, keyBuf); + repay(valueBuffers, valueBuf); + } + + @Benchmark + public void putByteBuffers(final Counter counter) throws RocksDBException { + ByteBuffer keyBuf = borrow(keyBuffersBB); + keyBuf.clear(); + ByteBuffer valueBuf = borrow(valueBuffersBB); + valueBuf.clear(); + final int i = counter.next(); - db.put(getColumnFamily(), ba("key" + i), ba("value" + i)); + final byte[] keyPrefix = ba("key" + i); + final byte[] valuePrefix = ba("value" + i); + keyBuf.put(keyPrefix, 0, keyPrefix.length); + keyBuf.position(keySize); + keyBuf.flip(); + valueBuf.put(valuePrefix, 0, valuePrefix.length); + valueBuf.position(valueSize); + valueBuf.flip(); + db.put(getColumnFamily(), new WriteOptions(), keyBuf, valueBuf); + + repay(keyBuffersBB, keyBuf); + repay(valueBuffersBB, valueBuf); } } diff --git a/java/pmd-rules.xml b/java/pmd-rules.xml index b710277f1..97ce03629 100644 --- a/java/pmd-rules.xml +++ b/java/pmd-rules.xml @@ -21,6 +21,7 @@ + diff --git a/java/rocksjni/kv_helper.h b/java/rocksjni/kv_helper.h new file mode 100644 index 000000000..0eb2c6eb0 --- /dev/null +++ b/java/rocksjni/kv_helper.h @@ -0,0 +1,284 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). +// +// This file defines helper methods for Java API write methods +// + +#pragma once + +#include + +#include +#include +#include +#include + +#include "rocksdb/rocksdb_namespace.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "rocksjni/portal.h" + +namespace ROCKSDB_NAMESPACE { + +/** + * @brief Exception class used to make the flow of key/value (Put(), Get(), + * Merge(), ...) calls clearer. + * + * This class is used by Java API JNI methods in try { save/fetch } catch { ... + * } style. + * + */ +class KVException : public std::exception { + public: + // These values are expected on Java API calls to represent the result of a + // Get() which has failed; a negative length is returned to indicate an error. + static const int kNotFound = -1; // the key was not found in RocksDB + static const int kStatusError = + -2; // there was some other error fetching the value for the key + + /** + * @brief Throw a KVException (and potentially a Java exception) if the + * RocksDB status is "bad" + * + * @param env JNI environment needed to create a Java exception + * @param status RocksDB status to examine + */ + static void ThrowOnError(JNIEnv* env, const Status& status) { + if (status.ok()) { + return; + } + if (status.IsNotFound()) { + // IsNotFound does not generate a Java Exception, any other bad status + // does.. + throw KVException(kNotFound); + } + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + throw KVException(kStatusError); + } + + /** + * @brief Throw a KVException and a Java exception + * + * @param env JNI environment needed to create a Java exception + * @param message content of the exception we will throw + */ + static void ThrowNew(JNIEnv* env, const std::string& message) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, message); + throw KVException(kStatusError); + } + + /** + * @brief Throw a KVException if there is already a Java exception in the JNI + * enviroment + * + * @param env + */ + static void ThrowOnError(JNIEnv* env) { + if (env->ExceptionCheck()) { + throw KVException(kStatusError); + } + } + + KVException(jint code) : kCode_(code){}; + + virtual const char* what() const throw() { + return "Exception raised by JNI. There may be a Java exception in the " + "JNIEnv. Please check!"; + } + + jint Code() const { return kCode_; } + + private: + jint kCode_; +}; + +/** + * @brief Construct a slice with the contents of a Java byte array + * + * The slice refers to an array into which the Java byte array's whole region is + * copied + */ +class JByteArraySlice { + public: + JByteArraySlice(JNIEnv* env, const jbyteArray& jarr, const jint jarr_off, + const jint jarr_len) + : arr_(new jbyte[jarr_len]), + slice_(reinterpret_cast(arr_), jarr_len) { + env->GetByteArrayRegion(jarr, jarr_off, jarr_len, arr_); + KVException::ThrowOnError(env); + }; + + ~JByteArraySlice() { + slice_.clear(); + delete[] arr_; + }; + + Slice& slice() { return slice_; } + + private: + jbyte* arr_; + Slice slice_; +}; + +/** + * @brief Construct a slice with the contents of a direct Java ByterBuffer + * + * The slice refers directly to the contents of the buffer, no copy is made. + * + */ +class JDirectBufferSlice { + public: + JDirectBufferSlice(JNIEnv* env, const jobject& jbuffer, + const jint jbuffer_off, const jint jbuffer_len) + : slice_(static_cast(env->GetDirectBufferAddress(jbuffer)) + + jbuffer_off, + jbuffer_len) { + KVException::ThrowOnError(env); + jlong capacity = env->GetDirectBufferCapacity(jbuffer); + if (capacity < jbuffer_off + jbuffer_len) { + auto message = "Direct buffer offset " + std::to_string(jbuffer_off) + + " + length " + std::to_string(jbuffer_len) + + " exceeds capacity " + std::to_string(capacity); + KVException::ThrowNew(env, message); + slice_.clear(); + } + } + + ~JDirectBufferSlice() { slice_.clear(); }; + + Slice& slice() { return slice_; } + + private: + Slice slice_; +}; + +/** + * @brief Wrap a pinnable slice with a method to retrieve the contents back into + * Java + * + * The Java Byte Array version sets the byte array's region from the slice + */ +class JByteArrayPinnableSlice { + public: + /** + * @brief Construct a new JByteArrayPinnableSlice object referring to an + * existing java byte buffer + * + * @param env + * @param jbuffer + * @param jbuffer_off + * @param jbuffer_len + */ + JByteArrayPinnableSlice(JNIEnv* env, const jbyteArray& jbuffer, + const jint jbuffer_off, const jint jbuffer_len) + : env_(env), + jbuffer_(jbuffer), + jbuffer_off_(jbuffer_off), + jbuffer_len_(jbuffer_len){}; + + /** + * @brief Construct an empty new JByteArrayPinnableSlice object + * + */ + JByteArrayPinnableSlice(JNIEnv* env) : env_(env){}; + + PinnableSlice& pinnable_slice() { return pinnable_slice_; } + + ~JByteArrayPinnableSlice() { pinnable_slice_.Reset(); }; + + /** + * @brief copy back contents of the pinnable slice into the Java ByteBuffer + * + * @return jint min of size of buffer and number of bytes in value for + * requested key + */ + jint Fetch() { + const jint pinnable_len = static_cast(pinnable_slice_.size()); + const jint result_len = std::min(jbuffer_len_, pinnable_len); + env_->SetByteArrayRegion( + jbuffer_, jbuffer_off_, result_len, + reinterpret_cast(pinnable_slice_.data())); + KVException::ThrowOnError( + env_); // exception thrown: ArrayIndexOutOfBoundsException + + return pinnable_len; + }; + + /** + * @brief create a new Java buffer and copy the result into it + * + * @return jbyteArray the java buffer holding the result + */ + jbyteArray NewByteArray() { + const jint pinnable_len = static_cast(pinnable_slice_.size()); + jbyteArray jbuffer = env_->NewByteArray(static_cast(pinnable_len)); + KVException::ThrowOnError(env_); // OutOfMemoryError + + env_->SetByteArrayRegion( + jbuffer, 0, pinnable_len, + reinterpret_cast(pinnable_slice_.data())); + KVException::ThrowOnError(env_); // ArrayIndexOutOfBoundsException + + return jbuffer; + } + + private: + JNIEnv* env_; + jbyteArray jbuffer_; + jint jbuffer_off_; + jint jbuffer_len_; + PinnableSlice pinnable_slice_; +}; + +/** + * @brief Wrap a pinnable slice with a method to retrieve the contents back into + * Java + * + * The Java Direct Buffer version copies the memory of the buffer from the slice + */ +class JDirectBufferPinnableSlice { + public: + JDirectBufferPinnableSlice(JNIEnv* env, const jobject& jbuffer, + const jint jbuffer_off, const jint jbuffer_len) + : buffer_(static_cast(env->GetDirectBufferAddress(jbuffer)) + + jbuffer_off), + jbuffer_len_(jbuffer_len) { + jlong capacity = env->GetDirectBufferCapacity(jbuffer); + if (capacity < jbuffer_off + jbuffer_len) { + auto message = + "Invalid value argument. Capacity is less than requested region. " + "offset " + + std::to_string(jbuffer_off) + " + length " + + std::to_string(jbuffer_len) + " exceeds capacity " + + std::to_string(capacity); + KVException::ThrowNew(env, message); + } + } + + PinnableSlice& pinnable_slice() { return pinnable_slice_; } + + ~JDirectBufferPinnableSlice() { pinnable_slice_.Reset(); }; + + /** + * @brief copy back contents of the pinnable slice into the Java DirectBuffer + * + * @return jint min of size of buffer and number of bytes in value for + * requested key + */ + jint Fetch() { + const jint pinnable_len = static_cast(pinnable_slice_.size()); + const jint result_len = std::min(jbuffer_len_, pinnable_len); + + memcpy(buffer_, pinnable_slice_.data(), result_len); + return pinnable_len; + }; + + private: + char* buffer_; + jint jbuffer_len_; + PinnableSlice pinnable_slice_; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 66eb2488b..97244dd5e 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -26,6 +26,7 @@ #include "rocksdb/types.h" #include "rocksdb/version.h" #include "rocksjni/cplusplus_to_java_convert.h" +#include "rocksjni/kv_helper.h" #include "rocksjni/portal.h" #ifdef min @@ -600,55 +601,6 @@ void Java_org_rocksdb_RocksDB_dropColumnFamilies( ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Put -/** - * @return true if the put succeeded, false if a Java Exception was thrown - */ -bool rocksdb_put_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, - jbyteArray jval, jint jval_off, jint jval_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return false; - } - - jbyte* value = new jbyte[jval_len]; - env->GetByteArrayRegion(jval, jval_off, jval_len, value); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] value; - delete[] key; - return false; - } - - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jval_len); - - ROCKSDB_NAMESPACE::Status s; - if (cf_handle != nullptr) { - s = db->Put(write_options, cf_handle, key_slice, value_slice); - } else { - // backwards compatibility - s = db->Put(write_options, key_slice, value_slice); - } - - // cleanup - delete[] value; - delete[] key; - - if (s.ok()) { - return true; - } else { - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; - } -} - /* * Class: org_rocksdb_RocksDB * Method: put @@ -662,8 +614,14 @@ void Java_org_rocksdb_RocksDB_put__J_3BII_3BII(JNIEnv* env, jobject, auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_put_helper(env, db, default_write_options, nullptr, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, db->Put(default_write_options, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* @@ -682,13 +640,21 @@ void Java_org_rocksdb_RocksDB_put__J_3BII_3BIIJ(JNIEnv* env, jobject, ROCKSDB_NAMESPACE::WriteOptions(); auto* cf_handle = reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_put_helper(env, db, default_write_options, cf_handle, jkey, - jkey_off, jkey_len, jval, jval_off, jval_len); - } else { + if (cf_handle == nullptr) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); + return; + } + + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + db->Put(default_write_options, cf_handle, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; } } @@ -706,8 +672,15 @@ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BII(JNIEnv* env, jobject, auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); - rocksdb_put_helper(env, db, *write_options, nullptr, jkey, jkey_off, jkey_len, - jval, jval_off, jval_len); + + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, db->Put(*write_options, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* @@ -724,13 +697,19 @@ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BIIJ( reinterpret_cast(jwrite_options_handle); auto* cf_handle = reinterpret_cast(jcf_handle); - if (cf_handle != nullptr) { - rocksdb_put_helper(env, db, *write_options, cf_handle, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); - } else { + if (cf_handle == nullptr) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( "Invalid ColumnFamilyHandle.")); + return; + } + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, db->Put(*write_options, cf_handle, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; } } @@ -1309,53 +1288,6 @@ jint Java_org_rocksdb_RocksDB_getDirect(JNIEnv* env, jobject /*jdb*/, ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Merge -/** - * @return true if the merge succeeded, false if a Java Exception was thrown - */ -bool rocksdb_merge_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, - const ROCKSDB_NAMESPACE::WriteOptions& write_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle, - jbyteArray jkey, jint jkey_off, jint jkey_len, - jbyteArray jval, jint jval_off, jint jval_len) { - jbyte* key = new jbyte[jkey_len]; - env->GetByteArrayRegion(jkey, jkey_off, jkey_len, key); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] key; - return false; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), jkey_len); - - jbyte* value = new jbyte[jval_len]; - env->GetByteArrayRegion(jval, jval_off, jval_len, value); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - delete[] value; - delete[] key; - return false; - } - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jval_len); - - ROCKSDB_NAMESPACE::Status s; - if (cf_handle != nullptr) { - s = db->Merge(write_options, cf_handle, key_slice, value_slice); - } else { - s = db->Merge(write_options, key_slice, value_slice); - } - - // cleanup - delete[] value; - delete[] key; - - if (s.ok()) { - return true; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return false; -} - /* * Class: org_rocksdb_RocksDB * Method: merge @@ -1369,8 +1301,14 @@ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BII(JNIEnv* env, jobject, auto* db = reinterpret_cast(jdb_handle); static const ROCKSDB_NAMESPACE::WriteOptions default_write_options = ROCKSDB_NAMESPACE::WriteOptions(); - rocksdb_merge_helper(env, db, default_write_options, nullptr, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, db->Merge(default_write_options, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* @@ -1388,8 +1326,15 @@ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BIIJ( auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { - rocksdb_merge_helper(env, db, default_write_options, cf_handle, jkey, - jkey_off, jkey_len, jval, jval_off, jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, db->Merge(default_write_options, cf_handle, key.slice(), + value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( @@ -1409,8 +1354,14 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BII( auto* db = reinterpret_cast(jdb_handle); auto* write_options = reinterpret_cast(jwrite_options_handle); - rocksdb_merge_helper(env, db, *write_options, nullptr, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, db->Merge(*write_options, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* @@ -1428,8 +1379,15 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( auto* cf_handle = reinterpret_cast(jcf_handle); if (cf_handle != nullptr) { - rocksdb_merge_helper(env, db, *write_options, cf_handle, jkey, jkey_off, - jkey_len, jval, jval_off, jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + db->Merge(*write_options, cf_handle, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } else { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::InvalidArgument( @@ -1437,16 +1395,37 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( } } -jlong rocksdb_iterator_helper( - ROCKSDB_NAMESPACE::DB* db, ROCKSDB_NAMESPACE::ReadOptions read_options, - ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle) { - ROCKSDB_NAMESPACE::Iterator* iterator = nullptr; - if (cf_handle != nullptr) { - iterator = db->NewIterator(read_options, cf_handle); - } else { - iterator = db->NewIterator(read_options); - } - return GET_CPLUSPLUS_POINTER(iterator); +/* + * Class: org_rocksdb_RocksDB + * Method: mergeDirect + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V + */ +void Java_org_rocksdb_RocksDB_mergeDirect( + JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options_handle, + jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, + jint jval_len, jlong jcf_handle) { + auto* db = reinterpret_cast(jdb_handle); + auto* write_options = + reinterpret_cast(jwrite_options_handle); + auto* cf_handle = + reinterpret_cast(jcf_handle); + + auto merge = [&env, &db, &cf_handle, &write_options]( + ROCKSDB_NAMESPACE::Slice& key, + ROCKSDB_NAMESPACE::Slice& value) { + ROCKSDB_NAMESPACE::Status s; + if (cf_handle == nullptr) { + s = db->Merge(*write_options, key, value); + } else { + s = db->Merge(*write_options, cf_handle, key, value); + } + if (s.ok()) { + return; + } + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); + }; + ROCKSDB_NAMESPACE::JniUtil::kv_op_direct(merge, env, jkey, jkey_off, jkey_len, + jval, jval_off, jval_len); } /* @@ -2644,55 +2623,17 @@ jobjectArray Java_org_rocksdb_RocksDB_keyMayExistFoundValue( /* * Class: org_rocksdb_RocksDB * Method: iterator - * Signature: (J)J - */ -jlong Java_org_rocksdb_RocksDB_iterator__J(JNIEnv*, jobject, jlong db_handle) { - auto* db = reinterpret_cast(db_handle); - return rocksdb_iterator_helper(db, ROCKSDB_NAMESPACE::ReadOptions(), nullptr); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iterator - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_RocksDB_iterator__JJ(JNIEnv*, jobject, jlong db_handle, - jlong jread_options_handle) { - auto* db = reinterpret_cast(db_handle); - auto& read_options = - *reinterpret_cast(jread_options_handle); - return rocksdb_iterator_helper(db, read_options, nullptr); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iteratorCF - * Signature: (JJ)J - */ -jlong Java_org_rocksdb_RocksDB_iteratorCF__JJ(JNIEnv*, jobject, jlong db_handle, - jlong jcf_handle) { - auto* db = reinterpret_cast(db_handle); - auto* cf_handle = - reinterpret_cast(jcf_handle); - return rocksdb_iterator_helper(db, ROCKSDB_NAMESPACE::ReadOptions(), - cf_handle); -} - -/* - * Class: org_rocksdb_RocksDB - * Method: iteratorCF * Signature: (JJJ)J */ -jlong Java_org_rocksdb_RocksDB_iteratorCF__JJJ(JNIEnv*, jobject, - jlong db_handle, - jlong jcf_handle, - jlong jread_options_handle) { +jlong Java_org_rocksdb_RocksDB_iterator(JNIEnv*, jobject, jlong db_handle, + jlong jcf_handle, + jlong jread_options_handle) { auto* db = reinterpret_cast(db_handle); auto* cf_handle = reinterpret_cast(jcf_handle); auto& read_options = *reinterpret_cast(jread_options_handle); - return rocksdb_iterator_helper(db, read_options, cf_handle); + return GET_CPLUSPLUS_POINTER(db->NewIterator(read_options, cf_handle)); } /* diff --git a/java/rocksjni/transaction.cc b/java/rocksjni/transaction.cc index 1a0a64fc7..3e90db8bc 100644 --- a/java/rocksjni/transaction.cc +++ b/java/rocksjni/transaction.cc @@ -14,6 +14,7 @@ #include "include/org_rocksdb_Transaction.h" #include "rocksjni/cplusplus_to_java_convert.h" +#include "rocksjni/kv_helper.h" #include "rocksjni/portal.h" #if defined(_MSC_VER) @@ -158,90 +159,113 @@ typedef std::function FnGet; -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -jbyteArray txn_get_helper(JNIEnv* env, const FnGet& fn_get, - const jlong& jread_options_handle, - const jbyteArray& jkey, const jint& jkey_part_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError +/* + * Class: org_rocksdb_Transaction + * Method: get + * Signature: (JJ[BIIJ)[B + */ +jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BIIJ( + JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, + jbyteArray jkey, jint jkey_off, jint jkey_part_len, + jlong jcolumn_family_handle) { + auto* txn = reinterpret_cast(jhandle); + auto* read_options = + reinterpret_cast(jread_options_handle); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArrayPinnableSlice value(env); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Get(*read_options, column_family_handle, key.slice(), + &value.pinnable_slice())); + return value.NewByteArray(); + } catch (ROCKSDB_NAMESPACE::KVException&) { return nullptr; } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); +} +/* + * Class: org_rocksdb_Transaction + * Method: get + * Signature: (JJ[BII)[B + */ +jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BII( + JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, + jbyteArray jkey, jint jkey_off, jint jkey_part_len) { + auto* txn = reinterpret_cast(jhandle); auto* read_options = reinterpret_cast(jread_options_handle); - std::string value; - ROCKSDB_NAMESPACE::Status s = fn_get(*read_options, key_slice, &value); - - // trigger java unref on key. - // by passing JNI_ABORT, it will simply release the reference without - // copying the result back to the java byte array. - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - - if (s.IsNotFound()) { + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArrayPinnableSlice value(env); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Get(*read_options, key.slice(), &value.pinnable_slice())); + return value.NewByteArray(); + } catch (ROCKSDB_NAMESPACE::KVException&) { return nullptr; } - - if (s.ok()) { - jbyteArray jret_value = env->NewByteArray(static_cast(value.size())); - if (jret_value == nullptr) { - // exception thrown: OutOfMemoryError - return nullptr; - } - env->SetByteArrayRegion( - jret_value, 0, static_cast(value.size()), - const_cast(reinterpret_cast(value.c_str()))); - if (env->ExceptionCheck()) { - // exception thrown: ArrayIndexOutOfBoundsException - return nullptr; - } - return jret_value; - } - - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); - return nullptr; } /* * Class: org_rocksdb_Transaction * Method: get - * Signature: (JJ[BIJ)[B + * Signature: (JJ[BII[BIIJ)I */ -jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BIJ( +jint Java_org_rocksdb_Transaction_get__JJ_3BII_3BIIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { + jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, + jint jval_off, jint jval_part_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); + auto* read_options = + reinterpret_cast(jread_options_handle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); - FnGet fn_get = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Get, txn, std::placeholders::_1, - column_family_handle, std::placeholders::_2, std::placeholders::_3); - return txn_get_helper(env, fn_get, jread_options_handle, jkey, jkey_part_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArrayPinnableSlice value(env, jval, jval_off, + jval_part_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Get(*read_options, column_family_handle, key.slice(), + &value.pinnable_slice())); + return value.Fetch(); + } catch (const ROCKSDB_NAMESPACE::KVException& e) { + return e.Code(); + } } /* * Class: org_rocksdb_Transaction - * Method: get - * Signature: (JJ[BI)[B + * Method: getDirect + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I */ -jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len) { +jint Java_org_rocksdb_Transaction_getDirect(JNIEnv* env, jobject, jlong jhandle, + jlong jread_options_handle, + jobject jkey_bb, jint jkey_off, + jint jkey_part_len, jobject jval_bb, + jint jval_off, jint jval_part_len, + jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); - FnGet fn_get = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Get, txn, std::placeholders::_1, - std::placeholders::_2, std::placeholders::_3); - return txn_get_helper(env, fn_get, jread_options_handle, jkey, jkey_part_len); + auto* read_options = + reinterpret_cast(jread_options_handle); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, + jkey_part_len); + ROCKSDB_NAMESPACE::JDirectBufferPinnableSlice value(env, jval_bb, jval_off, + jval_part_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Get(*read_options, column_family_handle, key.slice(), + &value.pinnable_slice())); + return value.Fetch(); + } catch (const ROCKSDB_NAMESPACE::KVException& e) { + return e.Code(); + } } // TODO(AR) consider refactoring to share this between here and rocksjni.cc @@ -428,47 +452,91 @@ jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B( /* * Class: org_rocksdb_Transaction * Method: getForUpdate - * Signature: (JJ[BIJZZ)[B + * Signature: (JJ[BIIJZZ)[B */ -jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIJZZ( +jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIIJZZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, - jboolean jexclusive, jboolean jdo_validate) { + jbyteArray jkey, jint jkey_off, jint jkey_part_len, + jlong jcolumn_family_handle, jboolean jexclusive, jboolean jdo_validate) { + auto* read_options = + reinterpret_cast(jread_options_handle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); auto* txn = reinterpret_cast(jhandle); - FnGet fn_get_for_update = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::GetForUpdate, txn, - std::placeholders::_1, column_family_handle, std::placeholders::_2, - std::placeholders::_3, jexclusive, jdo_validate); - return txn_get_helper(env, fn_get_for_update, jread_options_handle, jkey, - jkey_part_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArrayPinnableSlice value(env); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + txn->GetForUpdate(*read_options, column_family_handle, key.slice(), + &value.pinnable_slice(), jexclusive, jdo_validate)); + return value.NewByteArray(); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return nullptr; + } } /* * Class: org_rocksdb_Transaction * Method: getForUpdate - * Signature: (JJ[BIZZ)[B + * Signature: (JJ[BII[BIIJZZ)I */ -jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIZZ( +jint Java_org_rocksdb_Transaction_getForUpdate__JJ_3BII_3BIIJZZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, - jbyteArray jkey, jint jkey_part_len, jboolean jexclusive, - jboolean jdo_validate) { + jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, + jint jval_off, jint jval_len, jlong jcolumn_family_handle, + jboolean jexclusive, jboolean jdo_validate) { auto* txn = reinterpret_cast(jhandle); - FnGet fn_get_for_update = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::GetForUpdate, txn, - std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, - jexclusive, jdo_validate); - return txn_get_helper(env, fn_get_for_update, jread_options_handle, jkey, - jkey_part_len); + auto* read_options = + reinterpret_cast(jread_options_handle); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArrayPinnableSlice value(env, jval, jval_off, + jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + txn->GetForUpdate(*read_options, column_family_handle, key.slice(), + &value.pinnable_slice(), jexclusive, jdo_validate)); + return value.Fetch(); + } catch (const ROCKSDB_NAMESPACE::KVException& e) { + return e.Code(); + } +} + +/* + * Class: org_rocksdb_Transaction + * Method: getDirectForUpdate + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZZ)I + */ +jint Java_org_rocksdb_Transaction_getDirectForUpdate( + JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, + jobject jkey_bb, jint jkey_off, jint jkey_part_len, jobject jval_bb, + jint jval_off, jint jval_len, jlong jcolumn_family_handle, + jboolean jexclusive, jboolean jdo_validate) { + auto* txn = reinterpret_cast(jhandle); + auto* read_options = + reinterpret_cast(jread_options_handle); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, + jkey_part_len); + ROCKSDB_NAMESPACE::JDirectBufferPinnableSlice value(env, jval_bb, jval_off, + jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + txn->GetForUpdate(*read_options, column_family_handle, key.slice(), + &value.pinnable_slice(), jexclusive, jdo_validate)); + return value.Fetch(); + } catch (const ROCKSDB_NAMESPACE::KVException& e) { + return e.Code(); + } } /* @@ -522,114 +590,109 @@ jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B( /* * Class: org_rocksdb_Transaction * Method: getIterator - * Signature: (JJ)J + * Signature: (JJJ)J */ -jlong Java_org_rocksdb_Transaction_getIterator__JJ(JNIEnv* /*env*/, - jobject /*jobj*/, - jlong jhandle, - jlong jread_options_handle) { +jlong Java_org_rocksdb_Transaction_getIterator(JNIEnv* /*env*/, + jobject /*jobj*/, jlong jhandle, + jlong jread_options_handle, + jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* read_options = reinterpret_cast(jread_options_handle); - return GET_CPLUSPLUS_POINTER(txn->GetIterator(*read_options)); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + return GET_CPLUSPLUS_POINTER( + txn->GetIterator(*read_options, column_family_handle)); } /* * Class: org_rocksdb_Transaction - * Method: getIterator - * Signature: (JJJ)J + * Method: put + * Signature: (J[BII[BIIJZ)V */ -jlong Java_org_rocksdb_Transaction_getIterator__JJJ( - JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, - jlong jread_options_handle, jlong jcolumn_family_handle) { +void Java_org_rocksdb_Transaction_put__J_3BII_3BIIJZ( + JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, + jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, + jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); - auto* read_options = - reinterpret_cast(jread_options_handle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); - return GET_CPLUSPLUS_POINTER( - txn->GetIterator(*read_options, column_family_handle)); -} - -typedef std::function - FnWriteKV; - -// TODO(AR) consider refactoring to share this between here and rocksjni.cc -void txn_write_kv_helper(JNIEnv* env, const FnWriteKV& fn_write_kv, - const jbyteArray& jkey, const jint& jkey_part_len, - const jbyteArray& jval, const jint& jval_len) { - jbyte* key = env->GetByteArrayElements(jkey, nullptr); - if (key == nullptr) { - // exception thrown: OutOfMemoryError + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Put(column_family_handle, key.slice(), value.slice(), + jassume_tracked)); + } catch (ROCKSDB_NAMESPACE::KVException&) { return; } - jbyte* value = env->GetByteArrayElements(jval, nullptr); - if (value == nullptr) { - // exception thrown: OutOfMemoryError - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); - return; - } - ROCKSDB_NAMESPACE::Slice key_slice(reinterpret_cast(key), - jkey_part_len); - ROCKSDB_NAMESPACE::Slice value_slice(reinterpret_cast(value), - jval_len); - - ROCKSDB_NAMESPACE::Status s = fn_write_kv(key_slice, value_slice); - - // trigger java unref on key. - // by passing JNI_ABORT, it will simply release the reference without - // copying the result back to the java byte array. - env->ReleaseByteArrayElements(jval, value, JNI_ABORT); - env->ReleaseByteArrayElements(jkey, key, JNI_ABORT); +} - if (s.ok()) { +/* + * Class: org_rocksdb_Transaction + * Method: put + * Signature: (J[BII[BII)V + */ +void Java_org_rocksdb_Transaction_put__J_3BII_3BII( + JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, + jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, + jint jval_len) { + auto* txn = reinterpret_cast(jhandle); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Put(key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { return; } - ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } /* * Class: org_rocksdb_Transaction - * Method: put - * Signature: (J[BI[BIJZ)V + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZ)V */ -void Java_org_rocksdb_Transaction_put__J_3BI_3BIJZ( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len, +void Java_org_rocksdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ( + JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, + jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); - FnWriteKV fn_put = - std::bind(&ROCKSDB_NAMESPACE::Transaction::Put, txn, - column_family_handle, std::placeholders::_1, - std::placeholders::_2, jassume_tracked); - txn_write_kv_helper(env, fn_put, jkey, jkey_part_len, jval, jval_len); + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JDirectBufferSlice value(env, jval_bb, jval_off, + jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Put(column_family_handle, key.slice(), value.slice(), + jassume_tracked)); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* * Class: org_rocksdb_Transaction - * Method: put - * Signature: (J[BI[BI)V + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_Transaction_put__J_3BI_3BI(JNIEnv* env, jobject /*jobj*/, - jlong jhandle, jbyteArray jkey, - jint jkey_part_len, - jbyteArray jval, - jint jval_len) { +void Java_org_rocksdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II( + JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, + jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len) { auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_put = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Put, txn, std::placeholders::_1, - std::placeholders::_2); - txn_write_kv_helper(env, fn_put, jkey, jkey_part_len, jval, jval_len); + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JDirectBufferSlice value(env, jval_bb, jval_off, + jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Put(key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } typedef std::function(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); - FnWriteKV fn_merge = - std::bind(&ROCKSDB_NAMESPACE::Transaction::Merge, txn, - column_family_handle, std::placeholders::_1, - std::placeholders::_2, jassume_tracked); - txn_write_kv_helper(env, fn_merge, jkey, jkey_part_len, jval, jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Merge(column_family_handle, key.slice(), value.slice(), + jassume_tracked)); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* * Class: org_rocksdb_Transaction * Method: merge - * Signature: (J[BI[BI)V + * Signature: (J[BII[BII)V */ -void Java_org_rocksdb_Transaction_merge__J_3BI_3BI( +void Java_org_rocksdb_Transaction_merge__J_3BII_3BII( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len) { + jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, + jint jval_len) { + auto* txn = reinterpret_cast(jhandle); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Merge(key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } +} + +/* + * Class: org_rocksdb_Transaction + * Method: mergeDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZ)V + */ +JNIEXPORT void JNICALL +Java_org_rocksdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ( + JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, + jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len, + jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_merge = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::Merge, txn, std::placeholders::_1, - std::placeholders::_2); - txn_write_kv_helper(env, fn_merge, jkey, jkey_part_len, jval, jval_len); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JDirectBufferSlice value(env, jval_bb, jval_off, + jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Merge(column_family_handle, key.slice(), value.slice(), + jassume_tracked)); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } +} + +/* + * Class: org_rocksdb_Transaction + * Method: mergeDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL +Java_org_rocksdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II( + JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, + jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len) { + auto* txn = reinterpret_cast(jhandle); + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey_bb, jkey_off, jkey_len); + ROCKSDB_NAMESPACE::JDirectBufferSlice value(env, jval_bb, jval_off, + jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->Merge(key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } typedef std::function( jcolumn_family_handle); - FnWriteKV fn_put_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, - column_family_handle, std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_put_untracked, jkey, jkey_part_len, jval, - jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, 0, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, 0, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + txn->PutUntracked(column_family_handle, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* @@ -1084,13 +1199,14 @@ void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len) { auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_put_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::PutUntracked, txn, - std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_put_untracked, jkey, jkey_part_len, jval, - jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, 0, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, 0, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, txn->PutUntracked(key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* @@ -1137,42 +1253,50 @@ void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BI( /* * Class: org_rocksdb_Transaction * Method: mergeUntracked - * Signature: (J[BI[BIJ)V + * Signature: (J[BII[BIIJ)V */ -void Java_org_rocksdb_Transaction_mergeUntracked__J_3BI_3BIJ( +void Java_org_rocksdb_Transaction_mergeUntracked( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len, - jlong jcolumn_family_handle) { + jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, + jint jval_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); auto* column_family_handle = reinterpret_cast( jcolumn_family_handle); - FnWriteKV fn_merge_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::MergeUntracked, txn, - column_family_handle, std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_merge_untracked, jkey, jkey_part_len, jval, - jval_len); + try { + ROCKSDB_NAMESPACE::JByteArraySlice key(env, jkey, jkey_off, jkey_part_len); + ROCKSDB_NAMESPACE::JByteArraySlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + txn->MergeUntracked(column_family_handle, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* * Class: org_rocksdb_Transaction - * Method: mergeUntracked - * Signature: (J[BI[BI)V + * Method: mergeUntrackedDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_Transaction_mergeUntracked__J_3BI_3BI( - JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, - jint jkey_part_len, jbyteArray jval, jint jval_len) { +void Java_org_rocksdb_Transaction_mergeUntrackedDirect( + JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobject jkey, jint jkey_off, + jint jkey_part_len, jobject jval, jint jval_off, jint jval_len, + jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); - FnWriteKV fn_merge_untracked = - std::bind( - &ROCKSDB_NAMESPACE::Transaction::MergeUntracked, txn, - std::placeholders::_1, std::placeholders::_2); - txn_write_kv_helper(env, fn_merge_untracked, jkey, jkey_part_len, jval, - jval_len); + auto* column_family_handle = + reinterpret_cast( + jcolumn_family_handle); + try { + ROCKSDB_NAMESPACE::JDirectBufferSlice key(env, jkey, jkey_off, + jkey_part_len); + ROCKSDB_NAMESPACE::JDirectBufferSlice value(env, jval, jval_off, jval_len); + ROCKSDB_NAMESPACE::KVException::ThrowOnError( + env, + txn->MergeUntracked(column_family_handle, key.slice(), value.slice())); + } catch (ROCKSDB_NAMESPACE::KVException&) { + return; + } } /* diff --git a/java/src/main/java/org/rocksdb/GetStatus.java b/java/src/main/java/org/rocksdb/GetStatus.java new file mode 100644 index 000000000..a2afafe39 --- /dev/null +++ b/java/src/main/java/org/rocksdb/GetStatus.java @@ -0,0 +1,32 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * The result for a fetch + * and the total size of the object fetched. + * If the target of the fetch is not big enough, this may be bigger than the contents of the target. + */ +public class GetStatus { + public final Status status; + public final int requiredSize; + + /** + * Constructor used for success status, when the value is contained in the buffer + * + * @param status the status of the request to fetch into the buffer + * @param requiredSize the size of the data, which may be bigger than the buffer + */ + GetStatus(final Status status, final int requiredSize) { + this.status = status; + this.requiredSize = requiredSize; + } + + static GetStatus fromStatusCode(final Status.Code code, final int requiredSize) { + return new GetStatus(new Status(code, Status.SubCode.getSubCode((byte) 0), null), requiredSize); + } +} diff --git a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java b/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java index 80d3c720b..283f19a31 100644 --- a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java +++ b/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java @@ -86,6 +86,7 @@ public static OptimisticTransactionDB open(final DBOptions dbOptions, // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. otdb.storeOptionsInstance(dbOptions); + otdb.storeDefaultColumnFamilyHandle(otdb.makeDefaultColumnFamilyHandle()); for (int i = 1; i < handles.length; i++) { columnFamilyHandles.add(new ColumnFamilyHandle(otdb, handles[i])); diff --git a/java/src/main/java/org/rocksdb/ReadOptions.java b/java/src/main/java/org/rocksdb/ReadOptions.java index 481101fc9..c444ae167 100644 --- a/java/src/main/java/org/rocksdb/ReadOptions.java +++ b/java/src/main/java/org/rocksdb/ReadOptions.java @@ -573,7 +573,6 @@ public ReadOptions setAutoPrefixMode(final boolean mode) { * @see #iterStartTs() * @return Reference to timestamp or null if there is no timestamp defined. */ - @SuppressWarnings("PMD.ConfusingTernary") public Slice timestamp() { assert (isOwningHandle()); final long timestampSliceHandle = timestamp(nativeHandle_); @@ -623,7 +622,6 @@ public ReadOptions setTimestamp(final AbstractSlice timestamp) { * @return Reference to lower bound timestamp or null if there is no lower bound timestamp * defined. */ - @SuppressWarnings("PMD.ConfusingTernary") public Slice iterStartTs() { assert (isOwningHandle()); final long iterStartTsHandle = iterStartTs(nativeHandle_); diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 54e95e6e8..120323e54 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -6,6 +6,7 @@ package org.rocksdb; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.rocksdb.util.BufferUtil.CheckBounds; import java.io.IOException; import java.nio.ByteBuffer; @@ -39,6 +40,11 @@ private enum LibraryState { static final String PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD = "Performance optimization for a very specific workload"; + private static final String BB_ALL_DIRECT_OR_INDIRECT = + "ByteBuffer parameters must all be direct, or must all be indirect"; + private ColumnFamilyHandle defaultColumnFamilyHandle_; + private final ReadOptions defaultReadOptions_ = new ReadOptions(); + private final List ownedColumnFamilyHandles = new ArrayList<>(); /** @@ -256,6 +262,7 @@ public static RocksDB open(final Options options, final String path) // the currently-created RocksDB. final RocksDB db = new RocksDB(open(options.nativeHandle_, path)); db.storeOptionsInstance(options); + db.storeDefaultColumnFamilyHandle(db.makeDefaultColumnFamilyHandle()); return db; } @@ -321,6 +328,7 @@ public static RocksDB open(final DBOptions options, final String path, } db.ownedColumnFamilyHandles.addAll(columnFamilyHandles); + db.storeDefaultColumnFamilyHandle(db.makeDefaultColumnFamilyHandle()); return db; } @@ -395,6 +403,7 @@ public static RocksDB openReadOnly(final Options options, final String path, // the currently-created RocksDB. final RocksDB db = new RocksDB(openROnly(options.nativeHandle_, path, errorIfWalFileExists)); db.storeOptionsInstance(options); + db.storeDefaultColumnFamilyHandle(db.makeDefaultColumnFamilyHandle()); return db; } @@ -505,6 +514,7 @@ public static RocksDB openReadOnly(final DBOptions options, final String path, } db.ownedColumnFamilyHandles.addAll(columnFamilyHandles); + db.storeDefaultColumnFamilyHandle(db.makeDefaultColumnFamilyHandle()); return db; } @@ -542,6 +552,7 @@ public static RocksDB openAsSecondary(final Options options, final String path, // the currently-created RocksDB. final RocksDB db = new RocksDB(openAsSecondary(options.nativeHandle_, path, secondaryPath)); db.storeOptionsInstance(options); + db.storeDefaultColumnFamilyHandle(db.makeDefaultColumnFamilyHandle()); return db; } @@ -602,6 +613,7 @@ public static RocksDB openAsSecondary(final DBOptions options, final String path } db.ownedColumnFamilyHandles.addAll(columnFamilyHandles); + db.storeDefaultColumnFamilyHandle(db.makeDefaultColumnFamilyHandle()); return db; } @@ -887,8 +899,8 @@ public void put(final byte[] key, final byte[] value) public void put(final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); put(nativeHandle_, key, offset, len, value, vOffset, vLen); } @@ -937,8 +949,8 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); put(nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } @@ -982,8 +994,8 @@ public void put(final WriteOptions writeOpts, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); put(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen); } @@ -1031,9 +1043,18 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, */ public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, - value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); + if (key.isDirect() && value.isDirect()) { + putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, + value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + put(nativeHandle_, writeOpts.nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } key.position(key.limit()); value.position(value.limit()); } @@ -1055,9 +1076,18 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions */ public void put(final WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, - value.position(), value.remaining(), 0); + if (key.isDirect() && value.isDirect()) { + putDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), value, + value.position(), value.remaining(), 0); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + put(nativeHandle_, writeOpts.nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), + value.remaining()); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } key.position(key.limit()); value.position(value.limit()); } @@ -1089,8 +1119,8 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); put(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } @@ -1268,9 +1298,18 @@ public void delete(final ColumnFamilyHandle columnFamilyHandle, */ public int get(final ReadOptions opt, final ByteBuffer key, final ByteBuffer value) throws RocksDBException { - assert key.isDirect() && value.isDirect(); - final int result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), - key.remaining(), value, value.position(), value.remaining(), 0); + final int result; + if (key.isDirect() && value.isDirect()) { + result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), key.remaining(), + value, value.position(), value.remaining(), 0); + } else if (!key.isDirect() && !value.isDirect()) { + result = + get(nativeHandle_, opt.nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), + value.remaining(), defaultColumnFamilyHandle_.nativeHandle_); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } if (result != NOT_FOUND) { value.limit(Math.min(value.limit(), value.position() + result)); } @@ -1556,8 +1595,8 @@ public void merge(final byte[] key, final byte[] value) */ public void merge(final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); merge(nativeHandle_, key, offset, len, value, vOffset, vLen); } @@ -1601,8 +1640,8 @@ public void merge(final ColumnFamilyHandle columnFamilyHandle, public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); merge(nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } @@ -1648,12 +1687,48 @@ public void merge(final WriteOptions writeOpts, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); merge(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen); } + public void merge(final WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value) + throws RocksDBException { + if (key.isDirect() && value.isDirect()) { + mergeDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), + value, value.position(), value.remaining(), 0); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + merge(nativeHandle_, writeOpts.nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), + value.remaining()); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + value.position(value.limit()); + } + + public void merge(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions writeOpts, + final ByteBuffer key, final ByteBuffer value) throws RocksDBException { + if (key.isDirect() && value.isDirect()) { + mergeDirect(nativeHandle_, writeOpts.nativeHandle_, key, key.position(), key.remaining(), + value, value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + merge(nativeHandle_, writeOpts.nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + value.position(value.limit()); + } + /** * Delete the database entry (if any) for "key". Returns OK on * success, and a non-OK status on error. It is not an error if "key" @@ -1740,8 +1815,8 @@ public void merge( final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); merge(nativeHandle_, writeOpts.nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); @@ -1825,8 +1900,8 @@ public int get(final byte[] key, final byte[] value) throws RocksDBException { public int get(final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); return get(nativeHandle_, key, offset, len, value, vOffset, vLen); } @@ -1882,8 +1957,8 @@ public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException, IllegalArgumentException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); return get(nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } @@ -1937,8 +2012,8 @@ public int get(final ReadOptions opt, final byte[] key, public int get(final ReadOptions opt, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len, value, vOffset, vLen); } @@ -1998,8 +2073,8 @@ public int get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final byte[] key, final int offset, final int len, final byte[] value, final int vOffset, final int vLen) throws RocksDBException { - checkBounds(offset, len, key.length); - checkBounds(vOffset, vLen, value.length); + CheckBounds(offset, len, key.length); + CheckBounds(vOffset, vLen, value.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len, value, vOffset, vLen, columnFamilyHandle.nativeHandle_); } @@ -2038,7 +2113,7 @@ public byte[] get(final byte[] key) throws RocksDBException { */ public byte[] get(final byte[] key, final int offset, final int len) throws RocksDBException { - checkBounds(offset, len, key.length); + CheckBounds(offset, len, key.length); return get(nativeHandle_, key, offset, len); } @@ -2083,7 +2158,7 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final int offset, final int len) throws RocksDBException { - checkBounds(offset, len, key.length); + CheckBounds(offset, len, key.length); return get(nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); } @@ -2125,7 +2200,7 @@ public byte[] get(final ReadOptions opt, final byte[] key) */ public byte[] get(final ReadOptions opt, final byte[] key, final int offset, final int len) throws RocksDBException { - checkBounds(offset, len, key.length); + CheckBounds(offset, len, key.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len); } @@ -2172,7 +2247,7 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions opt, final byte[] key, final int offset, final int len) throws RocksDBException { - checkBounds(offset, len, key.length); + CheckBounds(offset, len, key.length); return get(nativeHandle_, opt.nativeHandle_, key, offset, len, columnFamilyHandle.nativeHandle_); } @@ -2973,7 +3048,7 @@ public boolean keyMayExist( final ReadOptions readOptions, final byte[] key, final int offset, final int len, /* @Nullable */ final Holder valueHolder) { - checkBounds(offset, len, key.length); + CheckBounds(offset, len, key.length); if (valueHolder == null) { return keyMayExist(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, @@ -3113,9 +3188,11 @@ public boolean keyMayExist(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, final ByteBuffer key) { assert key != null : "key ByteBuffer parameter cannot be null"; assert key.isDirect() : "key parameter must be a direct ByteBuffer"; - return keyMayExistDirect(nativeHandle_, + final boolean result = keyMayExistDirect(nativeHandle_, columnFamilyHandle == null ? 0 : columnFamilyHandle.nativeHandle_, readOptions == null ? 0 : readOptions.nativeHandle_, key, key.position(), key.limit()); + key.position(key.limit()); + return result; } /** @@ -3148,6 +3225,7 @@ public KeyMayExist keyMayExist(final ColumnFamilyHandle columnFamilyHandle, value, value.position(), value.remaining()); final int valueLength = result[1]; value.limit(value.position() + Math.min(valueLength, value.remaining())); + key.position(key.limit()); return new KeyMayExist(KeyMayExist.KeyMayExistEnum.values()[result[0]], valueLength); } @@ -3164,7 +3242,9 @@ public KeyMayExist keyMayExist(final ColumnFamilyHandle columnFamilyHandle, * @return instance of iterator object. */ public RocksIterator newIterator() { - return new RocksIterator(this, iterator(nativeHandle_)); + return new RocksIterator(this, + iterator(nativeHandle_, defaultColumnFamilyHandle_.nativeHandle_, + defaultReadOptions_.nativeHandle_)); } /** @@ -3181,8 +3261,9 @@ public RocksIterator newIterator() { * @return instance of iterator object. */ public RocksIterator newIterator(final ReadOptions readOptions) { - return new RocksIterator(this, iterator(nativeHandle_, - readOptions.nativeHandle_)); + return new RocksIterator(this, + iterator( + nativeHandle_, defaultColumnFamilyHandle_.nativeHandle_, readOptions.nativeHandle_)); } /** @@ -3201,8 +3282,9 @@ public RocksIterator newIterator(final ReadOptions readOptions) { */ public RocksIterator newIterator( final ColumnFamilyHandle columnFamilyHandle) { - return new RocksIterator(this, iteratorCF(nativeHandle_, - columnFamilyHandle.nativeHandle_)); + return new RocksIterator(this, + iterator( + nativeHandle_, columnFamilyHandle.nativeHandle_, defaultReadOptions_.nativeHandle_)); } /** @@ -3222,8 +3304,8 @@ public RocksIterator newIterator( */ public RocksIterator newIterator(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions) { - return new RocksIterator(this, iteratorCF(nativeHandle_, - columnFamilyHandle.nativeHandle_, readOptions.nativeHandle_)); + return new RocksIterator( + this, iterator(nativeHandle_, columnFamilyHandle.nativeHandle_, readOptions.nativeHandle_)); } /** @@ -4438,8 +4520,17 @@ public void verifyChecksum() throws RocksDBException { * @return The handle of the default column family */ public ColumnFamilyHandle getDefaultColumnFamily() { - final ColumnFamilyHandle cfHandle = new ColumnFamilyHandle(this, - getDefaultColumnFamily(nativeHandle_)); + return defaultColumnFamilyHandle_; + } + + /** + * Create a handle for the default CF on open + * + * @return the default family handle + */ + protected ColumnFamilyHandle makeDefaultColumnFamilyHandle() { + final ColumnFamilyHandle cfHandle = + new ColumnFamilyHandle(this, getDefaultColumnFamily(nativeHandle_)); cfHandle.disOwnNativeHandle(); return cfHandle; } @@ -4688,7 +4779,11 @@ protected void storeOptionsInstance(final DBOptionsInterface options) { options_ = options; } - private static void checkBounds(final int offset, final int len, final int size) { + protected void storeDefaultColumnFamilyHandle(ColumnFamilyHandle columnFamilyHandle) { + defaultColumnFamilyHandle_ = columnFamilyHandle; + } + + private static void checkBounds(int offset, int len, int size) { if ((offset | len | (offset + len) | (size - (offset + len))) < 0) { throw new IndexOutOfBoundsException(String.format("offset(%d), len(%d), size(%d)", offset, len, size)); } @@ -4833,6 +4928,10 @@ private native void merge(final long handle, final long writeOptHandle, final byte[] key, final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, final int valueLength, final long cfHandle) throws RocksDBException; + private native void mergeDirect(long handle, long writeOptHandle, ByteBuffer key, int keyOffset, + int keyLength, ByteBuffer value, int valueOffset, int valueLength, long cfHandle) + throws RocksDBException; + private native void write0(final long handle, final long writeOptHandle, final long wbHandle) throws RocksDBException; private native void write1(final long handle, final long writeOptHandle, @@ -4894,11 +4993,7 @@ private native byte[][] keyMayExistFoundValue( private native void putDirect(long handle, long writeOptHandle, ByteBuffer key, int keyOffset, int keyLength, ByteBuffer value, int valueOffset, int valueLength, long cfHandle) throws RocksDBException; - private native long iterator(final long handle); - private native long iterator(final long handle, final long readOptHandle); - private native long iteratorCF(final long handle, final long cfHandle); - private native long iteratorCF(final long handle, final long cfHandle, - final long readOptHandle); + private native long iterator(final long handle, final long cfHandle, final long readOptHandle); private native long[] iterators(final long handle, final long[] columnFamilyHandles, final long readOptHandle) throws RocksDBException; diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/rocksdb/RocksIterator.java index 20e56d2eb..b35dea2af 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/rocksdb/RocksIterator.java @@ -5,6 +5,8 @@ package org.rocksdb; +import static org.rocksdb.util.BufferUtil.CheckBounds; + import java.nio.ByteBuffer; /** @@ -39,6 +41,45 @@ public byte[] key() { return key0(nativeHandle_); } + /** + *

    Return the key for the current entry. The underlying storage for + * the returned slice is valid only until the next modification of + * the iterator.

    + * + *

    REQUIRES: {@link #isValid()}

    + * + * @param key the out-value to receive the retrieved key. + * @return The size of the actual key. If the return key is greater than + * the length of the buffer {@code key}, then it indicates that the size of the + * input buffer {@code key} is insufficient and partial result will + * be returned. + */ + public int key(final byte[] key) { + assert isOwningHandle(); + return keyByteArray0(nativeHandle_, key, 0, key.length); + } + + /** + *

    Return the key for the current entry. The underlying storage for + * the returned slice is valid only until the next modification of + * the iterator.

    + * + *

    REQUIRES: {@link #isValid()}

    + * + * @param key the out-value to receive the retrieved key. + * @param offset in {@code key} at which to place the retrieved key + * @param len limit to length of received key returned + * @return The size of the actual key. If the return key is greater than + * {@code len}, then it indicates that the size of the + * input buffer {@code key} is insufficient and partial result will + * be returned. + */ + public int key(final byte[] key, final int offset, final int len) { + assert isOwningHandle(); + CheckBounds(offset, len, key.length); + return keyByteArray0(nativeHandle_, key, offset, len); + } + /** *

    Return the key for the current entry. The underlying storage for * the returned slice is valid only until the next modification of @@ -48,7 +89,6 @@ public byte[] key() { * * @param key the out-value to receive the retrieved key. * It is using position and limit. Limit is set according to key size. - * Supports direct buffer only. * @return The size of the actual key. If the return key is greater than the * length of {@code key}, then it indicates that the size of the * input buffer {@code key} is insufficient and partial result will @@ -90,7 +130,6 @@ public byte[] value() { * * @param value the out-value to receive the retrieved value. * It is using position and limit. Limit is set according to value size. - * Supports direct buffer only. * @return The size of the actual value. If the return value is greater than the * length of {@code value}, then it indicates that the size of the * input buffer {@code value} is insufficient and partial result will @@ -110,6 +149,45 @@ public int value(final ByteBuffer value) { return result; } + /** + *

    Return the value for the current entry. The underlying storage for + * the returned slice is valid only until the next modification of + * the iterator.

    + * + *

    REQUIRES: {@link #isValid()}

    + * + * @param value the out-value to receive the retrieved value. + * @return The size of the actual value. If the return value is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and partial result will + * be returned. + */ + public int value(final byte[] value) { + assert isOwningHandle(); + return valueByteArray0(nativeHandle_, value, 0, value.length); + } + + /** + *

    Return the value for the current entry. The underlying storage for + * the returned slice is valid only until the next modification of + * the iterator.

    + * + *

    REQUIRES: {@link #isValid()}

    + * + * @param value the out-value to receive the retrieved value. + * @param offset the offset within value at which to place the result + * @param len the length available in value after offset, for placing the result + * @return The size of the actual value. If the return value is greater than {@code len}, + * then it indicates that the size of the + * input buffer {@code value} is insufficient and partial result will + * be returned. + */ + public int value(final byte[] value, final int offset, final int len) { + assert isOwningHandle(); + CheckBounds(offset, len, value.length); + return valueByteArray0(nativeHandle_, value, offset, len); + } + @Override protected final native void disposeInternal(final long handle); @Override final native boolean isValid0(long handle); @Override final native void seekToFirst0(long handle); diff --git a/java/src/main/java/org/rocksdb/Transaction.java b/java/src/main/java/org/rocksdb/Transaction.java index 8ab968a3c..cab7ed287 100644 --- a/java/src/main/java/org/rocksdb/Transaction.java +++ b/java/src/main/java/org/rocksdb/Transaction.java @@ -7,6 +7,7 @@ import static org.rocksdb.RocksDB.PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -31,7 +32,10 @@ public class Transaction extends RocksObject { private static final String FOR_EACH_KEY_THERE_MUST_BE_A_COLUMNFAMILYHANDLE = "For each key there must be a ColumnFamilyHandle."; + private static final String BB_ALL_DIRECT_OR_INDIRECT = + "ByteBuffer parameters must all be direct, or must all be indirect"; private final RocksDB parent; + private final ColumnFamilyHandle defaultColumnFamilyHandle; /** * Intentionally package private @@ -47,6 +51,7 @@ public class Transaction extends RocksObject { Transaction(final RocksDB parent, final long transactionHandle) { super(transactionHandle); this.parent = parent; + this.defaultColumnFamilyHandle = parent.getDefaultColumnFamily(); } /** @@ -250,6 +255,9 @@ public void rollbackToSavePoint() throws RocksDBException { } /** + * This function has an inconsistent parameter order compared to other {@code get()} + * methods and is deprecated in favour of one with a consistent order. + * * This function is similar to * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])} except it will * also read pending changes in this transaction. @@ -275,10 +283,44 @@ public void rollbackToSavePoint() throws RocksDBException { * @throws RocksDBException thrown if error happens in underlying native * library. */ - public byte[] get(final ColumnFamilyHandle columnFamilyHandle, - final ReadOptions readOptions, final byte[] key) throws RocksDBException { - assert(isOwningHandle()); - return get(nativeHandle_, readOptions.nativeHandle_, key, key.length, + @Deprecated + public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, + final byte[] key) throws RocksDBException { + assert (isOwningHandle()); + return get(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, + columnFamilyHandle.nativeHandle_); + } + + /** + * This function is similar to + * {@link RocksDB#get(ColumnFamilyHandle, ReadOptions, byte[])} except it will + * also read pending changes in this transaction. + * Currently, this function will return Status::MergeInProgress if the most + * recent write to the queried key in this batch is a Merge. + * + * If {@link ReadOptions#snapshot()} is not set, the current version of the + * key will be read. Calling {@link #setSnapshot()} does not affect the + * version of the data returned. + * + * Note that setting {@link ReadOptions#setSnapshot(Snapshot)} will affect + * what is read from the DB but will NOT change which keys are read from this + * transaction (the keys in this transaction do not yet belong to any snapshot + * and will be fetched regardless). + * + * @param readOptions Read options. + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param key the key to retrieve the value for. + * + * @return a byte array storing the value associated with the input key if + * any. null if it does not find the specified key. + * + * @throws RocksDBException thrown if error happens in underlying native + * library. + */ + public byte[] get(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, + final byte[] key) throws RocksDBException { + assert (isOwningHandle()); + return get(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_); } @@ -310,7 +352,141 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, public byte[] get(final ReadOptions readOptions, final byte[] key) throws RocksDBException { assert(isOwningHandle()); - return get(nativeHandle_, readOptions.nativeHandle_, key, key.length); + return get(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, + defaultColumnFamilyHandle.nativeHandle_); + } + + /** + * Get the value associated with the specified key in the default column family + * + * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param key the key to retrieve the value. + * @param value the out-value to receive the retrieved value. + * @return A {@link GetStatus} wrapping the result status and the return value size. + * If {@code GetStatus.status} is {@code Ok} then {@code GetStatus.requiredSize} contains + * the size of the actual value that matches the specified + * {@code key} in byte. If {@code GetStatus.requiredSize} is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and a partial result was + * returned. If {@code GetStatus.status} is {@code NotFound} this indicates that + * the value was not found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus get(final ReadOptions opt, final byte[] key, final byte[] value) + throws RocksDBException { + final int result = get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, value, 0, + value.length, defaultColumnFamilyHandle.nativeHandle_); + if (result < 0) { + return GetStatus.fromStatusCode(Status.Code.NotFound, 0); + } else { + return GetStatus.fromStatusCode(Status.Code.Ok, result); + } + } + + /** + * Get the value associated with the specified key in a specified column family + * + * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param columnFamilyHandle the column family to find the key in + * @param key the key to retrieve the value. + * @param value the out-value to receive the retrieved value. + * @return A {@link GetStatus} wrapping the result status and the return value size. + * If {@code GetStatus.status} is {@code Ok} then {@code GetStatus.requiredSize} contains + * the size of the actual value that matches the specified + * {@code key} in byte. If {@code GetStatus.requiredSize} is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and a partial result was + * returned. If {@code GetStatus.status} is {@code NotFound} this indicates that + * the value was not found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus get(final ReadOptions opt, final ColumnFamilyHandle columnFamilyHandle, + final byte[] key, final byte[] value) throws RocksDBException { + final int result = get(nativeHandle_, opt.nativeHandle_, key, 0, key.length, value, 0, + value.length, columnFamilyHandle.nativeHandle_); + if (result < 0) { + return GetStatus.fromStatusCode(Status.Code.NotFound, 0); + } else { + return GetStatus.fromStatusCode(Status.Code.Ok, result); + } + } + + /** + * Get the value associated with the specified key within the specified column family. + * + * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param columnFamilyHandle the column family in which to find the key. + * @param key the key to retrieve the value. It is using position and limit. + * Supports direct buffer only. + * @param value the out-value to receive the retrieved value. + * It is using position and limit. Limit is set according to value size. + * Supports direct buffer only. + * @return A {@link GetStatus} wrapping the result status and the return value size. + * If {@code GetStatus.status} is {@code Ok} then {@code GetStatus.requiredSize} contains + * the size of the actual value that matches the specified + * {@code key} in byte. If {@code GetStatus.requiredSize} is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and a partial result was + * returned. If {@code GetStatus.status} is {@code NotFound} this indicates that + * the value was not found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus get(final ReadOptions opt, final ColumnFamilyHandle columnFamilyHandle, + final ByteBuffer key, final ByteBuffer value) throws RocksDBException { + final int result; + if (key.isDirect() && value.isDirect()) { + result = getDirect(nativeHandle_, opt.nativeHandle_, key, key.position(), key.remaining(), + value, value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + result = + get(nativeHandle_, opt.nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), + value.remaining(), columnFamilyHandle.nativeHandle_); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + + key.position(key.limit()); + if (result < 0) { + return GetStatus.fromStatusCode(Status.Code.NotFound, 0); + } else { + value.position(Math.min(value.limit(), value.position() + result)); + return GetStatus.fromStatusCode(Status.Code.Ok, result); + } + } + + /** + * Get the value associated with the specified key within the default column family. + * + * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param key the key to retrieve the value. It is using position and limit. + * Supports direct buffer only. + * @param value the out-value to receive the retrieved value. + * It is using position and limit. Limit is set according to value size. + * Supports direct buffer only. + * @return A {@link GetStatus} wrapping the result status and the return value size. + * If {@code GetStatus.status} is {@code Ok} then {@code GetStatus.requiredSize} contains + * the size of the actual value that matches the specified + * {@code key} in byte. If {@code GetStatus.requiredSize} is greater than the + * length of {@code value}, then it indicates that the size of the + * input buffer {@code value} is insufficient and a partial result was + * returned. If {@code GetStatus.status} is {@code NotFound} this indicates that + * the value was not found. + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus get(final ReadOptions opt, final ByteBuffer key, final ByteBuffer value) + throws RocksDBException { + return get(opt, this.defaultColumnFamilyHandle, key, value); } /** @@ -536,7 +712,7 @@ public byte[] getForUpdate(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean exclusive, final boolean doValidate) throws RocksDBException { assert (isOwningHandle()); - return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.length, + return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_, exclusive, doValidate); } @@ -562,7 +738,7 @@ public byte[] getForUpdate(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final boolean exclusive) throws RocksDBException { assert(isOwningHandle()); - return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.length, + return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, columnFamilyHandle.nativeHandle_, exclusive, true /*doValidate*/); } @@ -613,8 +789,369 @@ public byte[] getForUpdate(final ReadOptions readOptions, public byte[] getForUpdate(final ReadOptions readOptions, final byte[] key, final boolean exclusive) throws RocksDBException { assert(isOwningHandle()); + return getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, + defaultColumnFamilyHandle.nativeHandle_, exclusive, true /*doValidate*/); + } + + /** + * Read this key and ensure that this transaction will only + * be able to be committed if this key is not written outside this + * transaction after it has first been read (or after the snapshot if a + * snapshot is set in this transaction). The transaction behavior is the + * same regardless of whether the key exists or not. + *

    + * Note: Currently, this function will return Status::MergeInProgress + * if the most recent write to the queried key in this batch is a Merge. + *

    + * The values returned by this function are similar to + * {@link RocksDB#get(ReadOptions, byte[])}. + * If value==nullptr, then this function will not read any data, but will + * still ensure that this key cannot be written to by outside of this + * transaction. + *

    + * If this transaction was created on an {@link OptimisticTransactionDB}, + * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} + * could cause {@link #commit()} to fail. Otherwise, it could return any error + * that could be returned by + * {@link RocksDB#get(ReadOptions, byte[])}. + *

    + * If this transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * {@link Status.Code#MergeInProgress} if merge operations cannot be + * resolved. + * + * @param readOptions Read options. + * @param key the key to retrieve the value for. + * @param value the value associated with the input key if + * * any. The result is undefined in no value is associated with the key + * @param exclusive true if the transaction should have exclusive access to + * the key, otherwise false for shared access. + * + * @return a status object containing + * Status.OK if the requested value was read + * Status.NotFound if the requested value does not exist + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus getForUpdate(final ReadOptions readOptions, final byte[] key, final byte[] value, + final boolean exclusive) throws RocksDBException { + final int result = getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, + value, 0, value.length, defaultColumnFamilyHandle.nativeHandle_, exclusive, + true /* doValidate */); + if (result < 0) { + return GetStatus.fromStatusCode(Status.Code.NotFound, 0); + } else { + return GetStatus.fromStatusCode(Status.Code.Ok, result); + } + } + + /** + * Read this key and ensure that this transaction will only + * be able to be committed if this key is not written outside this + * transaction after it has first been read (or after the snapshot if a + * snapshot is set in this transaction). The transaction behavior is the + * same regardless of whether the key exists or not. + *

    + * Note: Currently, this function will return Status::MergeInProgress + * if the most recent write to the queried key in this batch is a Merge. + *

    + * The values returned by this function are similar to + * {@link RocksDB#get(ReadOptions, byte[])}. + * If value==nullptr, then this function will not read any data, but will + * still ensure that this key cannot be written to by outside of this + * transaction. + *

    + * If this transaction was created on an {@link OptimisticTransactionDB}, + * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} + * could cause {@link #commit()} to fail. Otherwise, it could return any error + * that could be returned by + * {@link RocksDB#get(ReadOptions, byte[])}. + *

    + * If this transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * {@link Status.Code#MergeInProgress} if merge operations cannot be + * resolved. + * + * @param readOptions Read options. + * @param key the key to retrieve the value for. + * @param value the value associated with the input key if + * * any. The result is undefined in no value is associated with the key + * @param exclusive true if the transaction should have exclusive access to + * the key, otherwise false for shared access. + * + * @return a status object containing + * Status.OK if the requested value was read + * Status.NotFound if the requested value does not exist + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus getForUpdate(final ReadOptions readOptions, final ByteBuffer key, + final ByteBuffer value, final boolean exclusive) throws RocksDBException { return getForUpdate( - nativeHandle_, readOptions.nativeHandle_, key, key.length, exclusive, true /*doValidate*/); + readOptions, defaultColumnFamilyHandle, key, value, exclusive, true /* doValidate */); + } + + /** + * Read this key and ensure that this transaction will only + * be able to be committed if this key is not written outside this + * transaction after it has first been read (or after the snapshot if a + * snapshot is set in this transaction). The transaction behavior is the + * same regardless of whether the key exists or not. + *

    + * Note: Currently, this function will return Status::MergeInProgress + * if the most recent write to the queried key in this batch is a Merge. + *

    + * The values returned by this function are similar to + * {@link RocksDB#get(ReadOptions, byte[])}. + * If value==nullptr, then this function will not read any data, but will + * still ensure that this key cannot be written to by outside of this + * transaction. + *

    + * If this transaction was created on an {@link OptimisticTransactionDB}, + * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} + * could cause {@link #commit()} to fail. Otherwise, it could return any error + * that could be returned by + * {@link RocksDB#get(ReadOptions, byte[])}. + *

    + * If this transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * {@link Status.Code#MergeInProgress} if merge operations cannot be + * resolved. + * + * @param readOptions Read options. + * @param columnFamilyHandle in which to find the key/value + * @param key the key to retrieve the value for. + * @param value the value associated with the input key if + * * any. The result is undefined in no value is associated with the key + * @param exclusive true if the transaction should have exclusive access to + * the key, otherwise false for shared access. + * + * @return a status object containing + * Status.OK if the requested value was read + * Status.NotFound if the requested value does not exist + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus getForUpdate(final ReadOptions readOptions, + final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value, + final boolean exclusive) throws RocksDBException { + return getForUpdate( + readOptions, columnFamilyHandle, key, value, exclusive, true /*doValidate*/); + } + + /** + * Read this key and ensure that this transaction will only + * be able to be committed if this key is not written outside this + * transaction after it has first been read (or after the snapshot if a + * snapshot is set in this transaction). The transaction behavior is the + * same regardless of whether the key exists or not. + *

    + * Note: Currently, this function will return Status::MergeInProgress + * if the most recent write to the queried key in this batch is a Merge. + *

    + * The values returned by this function are similar to + * {@link RocksDB#get(ReadOptions, byte[])}. + * If value==nullptr, then this function will not read any data, but will + * still ensure that this key cannot be written to by outside of this + * transaction. + *

    + * If this transaction was created on an {@link OptimisticTransactionDB}, + * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} + * could cause {@link #commit()} to fail. Otherwise, it could return any error + * that could be returned by + * {@link RocksDB#get(ReadOptions, byte[])}. + *

    + * If this transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * {@link Status.Code#MergeInProgress} if merge operations cannot be + * resolved. + * + * @param readOptions Read options. + * @param columnFamilyHandle in which to find the key/value + * @param key the key to retrieve the value for. + * @param value the value associated with the input key if + * * any. The result is undefined in no value is associated with the key + * @param exclusive true if the transaction should have exclusive access to + * the key, otherwise false for shared access. + * @param doValidate true if the transaction should validate the snapshot before doing the read + * + * @return a status object containing + * Status.OK if the requested value was read + * Status.NotFound if the requested value does not exist + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + + public GetStatus getForUpdate(final ReadOptions readOptions, + final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value, + final boolean exclusive, final boolean doValidate) throws RocksDBException { + final int result = getForUpdate(nativeHandle_, readOptions.nativeHandle_, key, 0, key.length, + value, 0, value.length, columnFamilyHandle.nativeHandle_, exclusive, doValidate); + if (result < 0) { + return GetStatus.fromStatusCode(Status.Code.NotFound, 0); + } else { + return GetStatus.fromStatusCode(Status.Code.Ok, result); + } + } + + /** + * Read this key and ensure that this transaction will only + * be able to be committed if this key is not written outside this + * transaction after it has first been read (or after the snapshot if a + * snapshot is set in this transaction). The transaction behavior is the + * same regardless of whether the key exists or not. + *

    + * Note: Currently, this function will return Status::MergeInProgress + * if the most recent write to the queried key in this batch is a Merge. + *

    + * The values returned by this function are similar to + * {@link RocksDB#get(ReadOptions, byte[])}. + * If value==nullptr, then this function will not read any data, but will + * still ensure that this key cannot be written to by outside of this + * transaction. + *

    + * If this transaction was created on an {@link OptimisticTransactionDB}, + * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} + * could cause {@link #commit()} to fail. Otherwise, it could return any error + * that could be returned by + * {@link RocksDB#get(ReadOptions, byte[])}. + *

    + * If this transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * {@link Status.Code#MergeInProgress} if merge operations cannot be + * resolved. + * + * @param readOptions Read options. + * @param columnFamilyHandle in which to find the key/value + * @param key the key to retrieve the value for. + * @param value the value associated with the input key if + * * any. The result is undefined in no value is associated with the key + * @param exclusive true if the transaction should have exclusive access to + * the key, otherwise false for shared access. + * + * @return a status object containing + * Status.OK if the requested value was read + * Status.NotFound if the requested value does not exist + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + + public GetStatus getForUpdate(final ReadOptions readOptions, + final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, final ByteBuffer value, + final boolean exclusive) throws RocksDBException { + return getForUpdate( + readOptions, columnFamilyHandle, key, value, exclusive, true /*doValidate*/); + } + + /** + * Read this key and ensure that this transaction will only + * be able to be committed if this key is not written outside this + * transaction after it has first been read (or after the snapshot if a + * snapshot is set in this transaction). The transaction behavior is the + * same regardless of whether the key exists or not. + *

    + * Note: Currently, this function will return Status::MergeInProgress + * if the most recent write to the queried key in this batch is a Merge. + *

    + * The values returned by this function are similar to + * {@link RocksDB#get(ReadOptions, byte[])}. + * If value==nullptr, then this function will not read any data, but will + * still ensure that this key cannot be written to by outside of this + * transaction. + *

    + * If this transaction was created on an {@link OptimisticTransactionDB}, + * {@link #getForUpdate(ReadOptions, ColumnFamilyHandle, byte[], boolean)} + * could cause {@link #commit()} to fail. Otherwise, it could return any error + * that could be returned by + * {@link RocksDB#get(ReadOptions, byte[])}. + *

    + * If this transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * {@link Status.Code#MergeInProgress} if merge operations cannot be + * resolved. + * + * @param readOptions Read options. + * @param columnFamilyHandle in which to find the key/value + * @param key the key to retrieve the value for. + * @param value the value associated with the input key if + * * any. The result is undefined in no value is associated with the key + * @param exclusive true if the transaction should have exclusive access to + * the key, otherwise false for shared access. + * @param doValidate true if the transaction should validate the snapshot before doing the read + * + * @return a status object containing + * Status.OK if the requested value was read + * Status.NotFound if the requested value does not exist + * + * @throws RocksDBException thrown if error happens in underlying + * native library. + */ + public GetStatus getForUpdate(final ReadOptions readOptions, + final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, final ByteBuffer value, + final boolean exclusive, final boolean doValidate) throws RocksDBException { + final int result; + if (key.isDirect() && value.isDirect()) { + result = getDirectForUpdate(nativeHandle_, readOptions.nativeHandle_, key, key.position(), + key.remaining(), value, value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_, exclusive, doValidate); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + result = getForUpdate(nativeHandle_, readOptions.nativeHandle_, key.array(), + key.arrayOffset() + key.position(), key.remaining(), value.array(), + value.arrayOffset() + value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_, exclusive, doValidate); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + if (result < 0) { + return GetStatus.fromStatusCode(Status.Code.NotFound, 0); + } else { + value.position(Math.min(value.limit(), value.position() + result)); + return GetStatus.fromStatusCode(Status.Code.Ok, result); + } } /** @@ -726,23 +1263,44 @@ public byte[][] multiGetForUpdate(final ReadOptions readOptions, final byte[][] * @throws RocksDBException thrown if error happens in underlying * native library. */ - public List multiGetForUpdateAsList( - final ReadOptions readOptions, final List keys) throws RocksDBException { + public List multiGetForUpdateAsList( + final ReadOptions readOptions, final List keys) throws RocksDBException { + assert (isOwningHandle()); + if (keys.isEmpty()) { + return new ArrayList<>(0); + } + + final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); + + return Arrays.asList(multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, keysArray)); + } + + /** + * Returns an iterator that will iterate on all keys in the default + * column family including both keys in the DB and uncommitted keys in this + * transaction. + *

    + * Caller is responsible for deleting the returned Iterator. + *

    + * The returned iterator is only valid until {@link #commit()}, + * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. + * + * @return instance of iterator object. + */ + public RocksIterator getIterator() { assert (isOwningHandle()); - if (keys.isEmpty()) { - return new ArrayList<>(0); + try (ReadOptions readOptions = new ReadOptions()) { + return new RocksIterator(parent, + getIterator( + nativeHandle_, readOptions.nativeHandle_, defaultColumnFamilyHandle.nativeHandle_)); } - - final byte[][] keysArray = keys.toArray(new byte[keys.size()][]); - - return Arrays.asList(multiGetForUpdate(nativeHandle_, readOptions.nativeHandle_, keysArray)); } /** * Returns an iterator that will iterate on all keys in the default * column family including both keys in the DB and uncommitted keys in this * transaction. - *

    + * * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read * from the DB but will NOT change which keys are read from this transaction * (the keys in this transaction do not yet belong to any snapshot and will be @@ -759,8 +1317,9 @@ public List multiGetForUpdateAsList( */ public RocksIterator getIterator(final ReadOptions readOptions) { assert(isOwningHandle()); - return new RocksIterator(parent, getIterator(nativeHandle_, - readOptions.nativeHandle_)); + return new RocksIterator(parent, + getIterator( + nativeHandle_, readOptions.nativeHandle_, defaultColumnFamilyHandle.nativeHandle_)); } /** @@ -792,6 +1351,35 @@ public RocksIterator getIterator(final ReadOptions readOptions, readOptions.nativeHandle_, columnFamilyHandle.nativeHandle_)); } + /** + * Returns an iterator that will iterate on all keys in the column family + * specified by {@code columnFamilyHandle} including both keys in the DB + * and uncommitted keys in this transaction. + *

    + * Setting {@link ReadOptions#setSnapshot(Snapshot)} will affect what is read + * from the DB but will NOT change which keys are read from this transaction + * (the keys in this transaction do not yet belong to any snapshot and will be + * fetched regardless). + *

    + * Caller is responsible for calling {@link RocksIterator#close()} on + * the returned Iterator. + *

    + * The returned iterator is only valid until {@link #commit()}, + * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. + * + * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * instance + * + * @return instance of iterator object. + */ + public RocksIterator getIterator(final ColumnFamilyHandle columnFamilyHandle) { + assert (isOwningHandle()); + try (ReadOptions readOptions = new ReadOptions()) { + return new RocksIterator(parent, + getIterator(nativeHandle_, readOptions.nativeHandle_, columnFamilyHandle.nativeHandle_)); + } + } + /** * Similar to {@link RocksDB#put(ColumnFamilyHandle, byte[], byte[])}, but * will also perform conflict checking on the keys be written. @@ -823,8 +1411,8 @@ public RocksIterator getIterator(final ReadOptions readOptions, public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); - put(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_, assumeTracked); + put(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_, + assumeTracked); } /** @@ -855,8 +1443,8 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); - put(nativeHandle_, key, key.length, value, value.length, - columnFamilyHandle.nativeHandle_, false); + put(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_, + false); } /** @@ -884,7 +1472,7 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, public void put(final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); - put(nativeHandle_, key, key.length, value, value.length); + put(nativeHandle_, key, 0, key.length, value, 0, value.length); } //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future @@ -935,7 +1523,97 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, columnFamilyHandle.nativeHandle_, false); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + /** + * Similar to {@link RocksDB#put(byte[], byte[])}, but + * will also perform conflict checking on the keys be written. + * + * If this Transaction was created on an {@link OptimisticTransactionDB}, + * these functions should always succeed. + * + * If this Transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * + * @param key the specified key to be inserted. + * @param value the value associated with the specified key. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void put(final ByteBuffer key, final ByteBuffer value) throws RocksDBException { + assert (isOwningHandle()); + if (key.isDirect() && value.isDirect()) { + putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), + value.remaining()); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + put(nativeHandle_, key.array(), key.arrayOffset() + key.position(), key.remaining(), + value.array(), value.arrayOffset() + value.position(), value.remaining()); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + value.position(value.limit()); + } + + /** + * Similar to {@link RocksDB#put(byte[], byte[])}, but + * will also perform conflict checking on the keys be written. + * + * If this Transaction was created on an {@link OptimisticTransactionDB}, + * these functions should always succeed. + * + * If this Transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * + * @param columnFamilyHandle The column family to put the key/value into + * @param key the specified key to be inserted. + * @param value the value associated with the specified key. + * @param assumeTracked true when it is expected that the key is already + * tracked. More specifically, it means the the key was previous tracked + * in the same savepoint, with the same exclusive flag, and at a lower + * sequence number. If valid then it skips ValidateSnapshot, + * throws an error otherwise. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void put(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, + final ByteBuffer value, final boolean assumeTracked) throws RocksDBException { + assert (isOwningHandle()); + if (key.isDirect() && value.isDirect()) { + putDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), + value.remaining(), columnFamilyHandle.nativeHandle_, assumeTracked); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + put(nativeHandle_, key.array(), key.arrayOffset() + key.position(), key.remaining(), + value.array(), value.arrayOffset() + value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_, assumeTracked); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + value.position(value.limit()); + } + public void put(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, + final ByteBuffer value) throws RocksDBException { + put(columnFamilyHandle, key, value, false); + } + + // TODO(AR) refactor if we implement org.rocksdb.SliceParts in future /** * Similar to {@link #put(byte[], byte[])} but allows * you to specify the key and value in several parts that will be @@ -986,7 +1664,7 @@ public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value, final boolean assumeTracked) throws RocksDBException { assert (isOwningHandle()); - merge(nativeHandle_, key, key.length, value, value.length, + merge(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_, assumeTracked); } @@ -1018,7 +1696,7 @@ public void merge(final ColumnFamilyHandle columnFamilyHandle, public void merge(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); - merge(nativeHandle_, key, key.length, value, value.length, + merge(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_, false); } @@ -1047,7 +1725,115 @@ public void merge(final ColumnFamilyHandle columnFamilyHandle, public void merge(final byte[] key, final byte[] value) throws RocksDBException { assert(isOwningHandle()); - merge(nativeHandle_, key, key.length, value, value.length); + merge(nativeHandle_, key, 0, key.length, value, 0, value.length); + } + + /** + * Similar to {@link RocksDB#merge(byte[], byte[])}, but + * will also perform conflict checking on the keys be written. + * + * If this Transaction was created on an {@link OptimisticTransactionDB}, + * these functions should always succeed. + * + * If this Transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * + * @param key the specified key to be merged. + * @param value the value associated with the specified key. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void merge(final ByteBuffer key, final ByteBuffer value) throws RocksDBException { + assert (isOwningHandle()); + if (key.isDirect() && value.isDirect()) { + mergeDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), + value.remaining()); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + merge(nativeHandle_, key.array(), key.arrayOffset() + key.position(), key.remaining(), + value.array(), value.arrayOffset() + value.position(), value.remaining()); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + } + + /** + * Similar to {@link RocksDB#merge(byte[], byte[])}, but + * will also perform conflict checking on the keys be written. + * + * If this Transaction was created on an {@link OptimisticTransactionDB}, + * these functions should always succeed. + * + * If this Transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * + * @param columnFamilyHandle in which to apply the merge + * @param key the specified key to be merged. + * @param value the value associated with the specified key. + * @param assumeTracked expects the key be already tracked. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void merge(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, + final ByteBuffer value, final boolean assumeTracked) throws RocksDBException { + assert (isOwningHandle()); + if (key.isDirect() && value.isDirect()) { + mergeDirect(nativeHandle_, key, key.position(), key.remaining(), value, value.position(), + value.remaining(), columnFamilyHandle.nativeHandle_, assumeTracked); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + merge(nativeHandle_, key.array(), key.arrayOffset() + key.position(), key.remaining(), + value.array(), value.arrayOffset() + value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_, assumeTracked); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + value.position(value.limit()); + } + + /** + * Similar to {@link RocksDB#merge(byte[], byte[])}, but + * will also perform conflict checking on the keys be written. + * + * If this Transaction was created on an {@link OptimisticTransactionDB}, + * these functions should always succeed. + * + * If this Transaction was created on a {@link TransactionDB}, an + * {@link RocksDBException} may be thrown with an accompanying {@link Status} + * when: + * {@link Status.Code#Busy} if there is a write conflict, + * {@link Status.Code#TimedOut} if a lock could not be acquired, + * {@link Status.Code#TryAgain} if the memtable history size is not large + * enough. See + * {@link ColumnFamilyOptions#maxWriteBufferNumberToMaintain()} + * + * @param columnFamilyHandle in which to apply the merge + * @param key the specified key to be merged. + * @param value the value associated with the specified key. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void merge(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, + final ByteBuffer value) throws RocksDBException { + merge(columnFamilyHandle, key, value, false); } /** @@ -1473,10 +2259,50 @@ public void putUntracked(final byte[][] keyParts, final byte[][] valueParts) */ public void mergeUntracked(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, final byte[] value) throws RocksDBException { - mergeUntracked(nativeHandle_, key, key.length, value, value.length, + assert (isOwningHandle()); + mergeUntracked(nativeHandle_, key, 0, key.length, value, 0, value.length, columnFamilyHandle.nativeHandle_); } + /** + * Similar to {@link RocksDB#merge(ColumnFamilyHandle, byte[], byte[])}, + * but operates on the transactions write batch. This write will only happen + * if this transaction gets committed successfully. + * + * Unlike {@link #merge(ColumnFamilyHandle, byte[], byte[])} no conflict + * checking will be performed for this key. + * + * If this Transaction was created on a {@link TransactionDB}, this function + * will still acquire locks necessary to make sure this write doesn't cause + * conflicts in other transactions; This may cause a {@link RocksDBException} + * with associated {@link Status.Code#Busy}. + * + * @param columnFamilyHandle The column family to merge the key/value into + * @param key the specified key to be merged. + * @param value the value associated with the specified key. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void mergeUntracked(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer key, + final ByteBuffer value) throws RocksDBException { + assert (isOwningHandle()); + if (key.isDirect() && value.isDirect()) { + mergeUntrackedDirect(nativeHandle_, key, key.position(), key.remaining(), value, + value.position(), value.remaining(), columnFamilyHandle.nativeHandle_); + } else if (!key.isDirect() && !value.isDirect()) { + assert key.hasArray(); + assert value.hasArray(); + mergeUntracked(nativeHandle_, key.array(), key.arrayOffset() + key.position(), + key.remaining(), value.array(), value.arrayOffset() + value.position(), value.remaining(), + columnFamilyHandle.nativeHandle_); + } else { + throw new RocksDBException(BB_ALL_DIRECT_OR_INDIRECT); + } + key.position(key.limit()); + value.position(value.limit()); + } + /** * Similar to {@link RocksDB#merge(byte[], byte[])}, * but operates on the transactions write batch. This write will only happen @@ -1498,8 +2324,30 @@ public void mergeUntracked(final ColumnFamilyHandle columnFamilyHandle, */ public void mergeUntracked(final byte[] key, final byte[] value) throws RocksDBException { - assert(isOwningHandle()); - mergeUntracked(nativeHandle_, key, key.length, value, value.length); + mergeUntracked(defaultColumnFamilyHandle, key, value); + } + + /** + * Similar to {@link RocksDB#merge(byte[], byte[])}, + * but operates on the transactions write batch. This write will only happen + * if this transaction gets committed successfully. + * + * Unlike {@link #merge(byte[], byte[])} no conflict + * checking will be performed for this key. + * + * If this Transaction was created on a {@link TransactionDB}, this function + * will still acquire locks necessary to make sure this write doesn't cause + * conflicts in other transactions; This may cause a {@link RocksDBException} + * with associated {@link Status.Code#Busy}. + * + * @param key the specified key to be merged. + * @param value the value associated with the specified key. + * + * @throws RocksDBException when one of the TransactionalDB conditions + * described above occurs, or in the case of an unexpected error + */ + public void mergeUntracked(final ByteBuffer key, final ByteBuffer value) throws RocksDBException { + mergeUntracked(defaultColumnFamilyHandle, key, value); } /** @@ -2041,48 +2889,68 @@ private native void setSnapshotOnNextOperation(final long handle, private native void rollbackToSavePoint(final long handle) throws RocksDBException; private native byte[] get(final long handle, final long readOptionsHandle, final byte[] key, - final int keyLength, final long columnFamilyHandle) throws RocksDBException; - private native byte[] get(final long handle, final long readOptionsHandle, final byte[] key, - final int keyLen) throws RocksDBException; - private native byte[][] multiGet(final long handle, - final long readOptionsHandle, final byte[][] keys, - final long[] columnFamilyHandles) throws RocksDBException; + final int keyOffset, final int keyLength, final long columnFamilyHandle) + throws RocksDBException; + private native int get(final long handle, final long readOptionsHandle, final byte[] key, + final int keyOffset, final int keyLen, final byte[] value, final int valueOffset, + final int valueLen, final long columnFamilyHandle) throws RocksDBException; + private native int getDirect(final long handle, final long readOptionsHandle, + final ByteBuffer key, final int keyOffset, final int keyLength, final ByteBuffer value, + final int valueOffset, final int valueLength, final long columnFamilyHandle) + throws RocksDBException; + + private native byte[][] multiGet(final long handle, final long readOptionsHandle, + final byte[][] keys, final long[] columnFamilyHandles) throws RocksDBException; private native byte[][] multiGet(final long handle, final long readOptionsHandle, final byte[][] keys) throws RocksDBException; private native byte[] getForUpdate(final long handle, final long readOptionsHandle, - final byte[] key, final int keyLength, final long columnFamilyHandle, final boolean exclusive, + final byte[] key, final int keyOffset, final int keyLength, final long columnFamilyHandle, + final boolean exclusive, final boolean doValidate) throws RocksDBException; + private native int getForUpdate(final long handle, final long readOptionsHandle, final byte[] key, + final int keyOffset, final int keyLength, final byte[] value, final int valueOffset, + final int valueLen, final long columnFamilyHandle, final boolean exclusive, final boolean doValidate) throws RocksDBException; - private native byte[] getForUpdate(final long handle, final long readOptionsHandle, - final byte[] key, final int keyLen, final boolean exclusive, final boolean doValidate) - throws RocksDBException; + private native int getDirectForUpdate(final long handle, final long readOptionsHandle, + final ByteBuffer key, final int keyOffset, final int keyLength, final ByteBuffer value, + final int valueOffset, final int valueLen, final long columnFamilyHandle, + final boolean exclusive, final boolean doValidate) throws RocksDBException; private native byte[][] multiGetForUpdate(final long handle, final long readOptionsHandle, final byte[][] keys, final long[] columnFamilyHandles) throws RocksDBException; - private native byte[][] multiGetForUpdate(final long handle, - final long readOptionsHandle, final byte[][] keys) - throws RocksDBException; - private native long getIterator(final long handle, - final long readOptionsHandle); + private native byte[][] multiGetForUpdate( + final long handle, final long readOptionsHandle, final byte[][] keys) throws RocksDBException; private native long getIterator(final long handle, final long readOptionsHandle, final long columnFamilyHandle); - private native void put(final long handle, final byte[] key, final int keyLength, - final byte[] value, final int valueLength, final long columnFamilyHandle, - final boolean assumeTracked) throws RocksDBException; - private native void put(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) + private native void put(final long handle, final byte[] key, final int keyOffset, + final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; + private native void put(final long handle, final byte[] key, final int keyOffset, + final int keyLength, final byte[] value, final int valueOffset, final int valueLength, + final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void put(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; private native void put(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength) throws RocksDBException; - private native void merge(final long handle, final byte[] key, final int keyLength, - final byte[] value, final int valueLength, final long columnFamilyHandle, + private native void putDirect(long handle, ByteBuffer key, int keyOffset, int keyLength, + ByteBuffer value, int valueOffset, int valueLength, long cfHandle, final boolean assumeTracked) throws RocksDBException; - private native void merge(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) + private native void putDirect(long handle, ByteBuffer key, int keyOffset, int keyLength, + ByteBuffer value, int valueOffset, int valueLength) throws RocksDBException; + + private native void merge(final long handle, final byte[] key, final int keyOffset, + final int keyLength, final byte[] value, final int valueOffset, final int valueLength, + final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; + private native void mergeDirect(long handle, ByteBuffer key, int keyOffset, int keyLength, + ByteBuffer value, int valueOffset, int valueLength, long cfHandle, boolean assumeTracked) + throws RocksDBException; + private native void mergeDirect(long handle, ByteBuffer key, int keyOffset, int keyLength, + ByteBuffer value, int valueOffset, int valueLength) throws RocksDBException; + + private native void merge(final long handle, final byte[] key, final int keyOffset, + final int keyLength, final byte[] value, final int valueOffset, final int valueLength) throws RocksDBException; private native void delete(final long handle, final byte[] key, final int keyLength, final long columnFamilyHandle, final boolean assumeTracked) throws RocksDBException; @@ -2112,15 +2980,14 @@ private native void putUntracked(final long handle, final byte[][] keys, private native void putUntracked(final long handle, final byte[][] keys, final int keysLength, final byte[][] values, final int valuesLength) throws RocksDBException; - private native void mergeUntracked(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength, + private native void mergeUntracked(final long handle, final byte[] key, final int keyOff, + final int keyLength, final byte[] value, final int valueOff, final int valueLength, + final long columnFamilyHandle) throws RocksDBException; + private native void mergeUntrackedDirect(final long handle, final ByteBuffer key, + final int keyOff, final int keyLength, final ByteBuffer value, final int valueOff, + final int valueLength, final long columnFamilyHandle) throws RocksDBException; + private native void deleteUntracked(final long handle, final byte[] key, final int keyLength, final long columnFamilyHandle) throws RocksDBException; - private native void mergeUntracked(final long handle, final byte[] key, - final int keyLength, final byte[] value, final int valueLength) - throws RocksDBException; - private native void deleteUntracked(final long handle, final byte[] key, - final int keyLength, final long columnFamilyHandle) - throws RocksDBException; private native void deleteUntracked(final long handle, final byte[] key, final int keyLength) throws RocksDBException; private native void deleteUntracked(final long handle, final byte[][] keys, diff --git a/java/src/main/java/org/rocksdb/TransactionDB.java b/java/src/main/java/org/rocksdb/TransactionDB.java index 134a0c8a1..a4ee951dc 100644 --- a/java/src/main/java/org/rocksdb/TransactionDB.java +++ b/java/src/main/java/org/rocksdb/TransactionDB.java @@ -50,6 +50,7 @@ public static TransactionDB open(final Options options, // the currently-created RocksDB. tdb.storeOptionsInstance(options); tdb.storeTransactionDbOptions(transactionDbOptions); + tdb.storeDefaultColumnFamilyHandle(tdb.makeDefaultColumnFamilyHandle()); return tdb; } @@ -94,6 +95,7 @@ public static TransactionDB open(final DBOptions dbOptions, // in RocksDB can prevent Java to GC during the life-time of // the currently-created RocksDB. tdb.storeOptionsInstance(dbOptions); + tdb.storeDefaultColumnFamilyHandle(tdb.makeDefaultColumnFamilyHandle()); tdb.storeTransactionDbOptions(transactionDbOptions); for (int i = 1; i < handles.length; i++) { diff --git a/java/src/main/java/org/rocksdb/util/BufferUtil.java b/java/src/main/java/org/rocksdb/util/BufferUtil.java new file mode 100644 index 000000000..54be3e693 --- /dev/null +++ b/java/src/main/java/org/rocksdb/util/BufferUtil.java @@ -0,0 +1,16 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb.util; + +public class BufferUtil { + public static void CheckBounds(final int offset, final int len, final int size) { + if ((offset | len | (offset + len) | (size - (offset + len))) < 0) { + throw new IndexOutOfBoundsException( + String.format("offset(%d), len(%d), size(%d)", offset, len, size)); + } + } +} diff --git a/java/src/test/java/org/rocksdb/AbstractTransactionTest.java b/java/src/test/java/org/rocksdb/AbstractTransactionTest.java index d57258009..2977d78fd 100644 --- a/java/src/test/java/org/rocksdb/AbstractTransactionTest.java +++ b/java/src/test/java/org/rocksdb/AbstractTransactionTest.java @@ -8,10 +8,12 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; +import java.util.function.Function; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -181,8 +183,10 @@ public void getPut_cf() throws RocksDBException { final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + assertThat(txn.get(readOptions, testCf, k1)).isNull(); assertThat(txn.get(testCf, readOptions, k1)).isNull(); txn.put(testCf, k1, v1); + assertThat(txn.get(readOptions, testCf, k1)).isEqualTo(v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); } } @@ -200,6 +204,135 @@ public void getPut() throws RocksDBException { } } + @Test + public void getPutTargetBuffer_cf() throws RocksDBException { + final byte[] k1 = "key1".getBytes(UTF_8); + final byte[] v1 = "value1".getBytes(UTF_8); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + final byte[] target = "overwrite1".getBytes(UTF_8); + GetStatus status = txn.get(readOptions, testCf, k1, target); + assertThat(status.status.getCode()).isEqualTo(Status.Code.NotFound); + assertThat(status.requiredSize).isEqualTo(0); + txn.put(testCf, k1, v1); + status = txn.get(readOptions, testCf, k1, target); + assertThat(status.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(status.requiredSize).isEqualTo(v1.length); + assertThat(target).isEqualTo("value1ite1".getBytes()); + } + } + + @Test + public void getPutTargetBuffer() throws RocksDBException { + final byte[] k1 = "key1".getBytes(UTF_8); + final byte[] v1 = "value1".getBytes(UTF_8); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final byte[] target = "overwrite1".getBytes(UTF_8); + GetStatus status = txn.get(readOptions, k1, target); + assertThat(status.status.getCode()).isEqualTo(Status.Code.NotFound); + assertThat(status.requiredSize).isEqualTo(0); + txn.put(k1, v1); + status = txn.get(readOptions, k1, target); + assertThat(status.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(status.requiredSize).isEqualTo(v1.length); + assertThat(target).isEqualTo("value1ite1".getBytes()); + } + } + + public void getPutByteBuffer(final Function allocateBuffer) + throws RocksDBException { + final ByteBuffer k1 = allocateBuffer.apply(100).put("key1".getBytes(UTF_8)); + k1.flip(); + final ByteBuffer v1 = allocateBuffer.apply(100).put("value1".getBytes(UTF_8)); + v1.flip(); + final ByteBuffer vEmpty = allocateBuffer.apply(0); + + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ByteBuffer vGet = allocateBuffer.apply(100); + assertThat(txn.get(readOptions, k1, vGet).status.getCode()).isEqualTo(Status.Code.NotFound); + txn.put(k1, v1); + + final GetStatus getStatusError = txn.get(readOptions, k1, vEmpty); + assertThat(getStatusError.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(getStatusError.requiredSize).isEqualTo("value1".getBytes(UTF_8).length); + assertThat(vEmpty.position()).isEqualTo(0); + assertThat(vEmpty.remaining()).isEqualTo(0); + + vGet.put("12345".getBytes(UTF_8)); + + final GetStatus getStatus = txn.get(readOptions, k1, vGet); + assertThat(getStatusError.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(getStatusError.requiredSize).isEqualTo("value1".getBytes(UTF_8).length); + + vGet.put("67890".getBytes(UTF_8)); + vGet.flip(); + final byte[] bytes = new byte[vGet.limit()]; + vGet.get(bytes); + assertThat(new String(bytes, UTF_8)).isEqualTo("12345value167890"); + } + } + + @Test + public void getPutDirectByteBuffer() throws RocksDBException { + getPutByteBuffer(ByteBuffer::allocateDirect); + } + + @Test + public void getPutIndirectByteBuffer() throws RocksDBException { + getPutByteBuffer(ByteBuffer::allocate); + } + + public void getPutByteBuffer_cf(final Function allocateBuffer) + throws RocksDBException { + final ByteBuffer k1 = allocateBuffer.apply(100).put("key1".getBytes(UTF_8)); + k1.flip(); + final ByteBuffer v1 = allocateBuffer.apply(100).put("value1".getBytes(UTF_8)); + v1.flip(); + final ByteBuffer vEmpty = allocateBuffer.apply(0); + + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + final ByteBuffer vGet = allocateBuffer.apply(100); + assertThat(txn.get(readOptions, testCf, k1, vGet).status.getCode()) + .isEqualTo(Status.Code.NotFound); + txn.put(testCf, k1, v1); + + final GetStatus getStatusError = txn.get(readOptions, testCf, k1, vEmpty); + assertThat(getStatusError.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(getStatusError.requiredSize).isEqualTo("value1".getBytes(UTF_8).length); + assertThat(vEmpty.position()).isEqualTo(0); + assertThat(vEmpty.remaining()).isEqualTo(0); + + vGet.put("12345".getBytes(UTF_8)); + final GetStatus getStatus = txn.get(readOptions, testCf, k1, vGet); + assertThat(getStatus.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(getStatus.requiredSize).isEqualTo("value1".getBytes(UTF_8).length); + vGet.put("67890".getBytes(UTF_8)); + vGet.flip(); + final byte[] bytes = new byte[vGet.limit()]; + vGet.get(bytes); + assertThat(new String(bytes, UTF_8)).isEqualTo("12345value167890"); + } + } + + @Test + public void getPutDirectByteBuffer_cf() throws RocksDBException { + getPutByteBuffer_cf(ByteBuffer::allocateDirect); + } + + @Test + public void getPutIndirectByteBuffer_cf() throws RocksDBException { + getPutByteBuffer_cf(ByteBuffer::allocate); + } + @Test public void multiGetPut_cf() throws RocksDBException { final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; @@ -300,6 +433,162 @@ public void getForUpdate() throws RocksDBException { } } + @Test + public void getForUpdateByteArray_cf_doValidate() throws RocksDBException { + final byte[] k1 = "key1".getBytes(UTF_8); + final byte[] v1 = "value1".getBytes(UTF_8); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + final byte[] vNonExistent = new byte[1]; + final GetStatus sNonExistent = + txn.getForUpdate(readOptions, testCf, k1, vNonExistent, true, true); + assertThat(sNonExistent.status.getCode()).isEqualTo(Status.Code.NotFound); + txn.put(testCf, k1, v1); + final byte[] vPartial = new byte[4]; + final GetStatus sPartial = txn.getForUpdate(readOptions, testCf, k1, vPartial, true, true); + assertThat(sPartial.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(sPartial.requiredSize).isEqualTo(v1.length); + assertThat(vPartial).isEqualTo(Arrays.copyOfRange(v1, 0, vPartial.length)); + + final byte[] vTotal = new byte[sPartial.requiredSize]; + final GetStatus sTotal = txn.getForUpdate(readOptions, testCf, k1, vTotal, true, true); + assertThat(sTotal.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(sTotal.requiredSize).isEqualTo(v1.length); + assertThat(vTotal).isEqualTo(v1); + } + } + + public void getForUpdateByteArray_cf() throws RocksDBException { + final byte[] k1 = "key1".getBytes(UTF_8); + final byte[] v1 = "value1".getBytes(UTF_8); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + final byte[] vNonExistent = new byte[1]; + final GetStatus sNonExistent = txn.getForUpdate(readOptions, testCf, k1, vNonExistent, true); + assertThat(sNonExistent.status.getCode()).isEqualTo(Status.Code.NotFound); + txn.put(testCf, k1, v1); + final byte[] vPartial = new byte[4]; + final GetStatus sPartial = txn.getForUpdate(readOptions, testCf, k1, vPartial, true); + assertThat(sPartial.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(sPartial.requiredSize).isEqualTo(v1.length); + assertThat(vPartial).isEqualTo(Arrays.copyOfRange(v1, 0, vPartial.length)); + + final byte[] vTotal = new byte[sPartial.requiredSize]; + final GetStatus sTotal = txn.getForUpdate(readOptions, testCf, k1, vTotal, true); + assertThat(sTotal.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(sTotal.requiredSize).isEqualTo(v1.length); + assertThat(vTotal).isEqualTo(v1); + } + } + + @Test + public void getForUpdateByteArray() throws RocksDBException { + final byte[] k1 = "key1".getBytes(UTF_8); + final byte[] v1 = "value1".getBytes(UTF_8); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final byte[] vNonExistent = new byte[1]; + final GetStatus sNonExistent = txn.getForUpdate(readOptions, k1, vNonExistent, true); + assertThat(sNonExistent.status.getCode()).isEqualTo(Status.Code.NotFound); + txn.put(k1, v1); + final byte[] vPartial = new byte[4]; + final GetStatus sPartial = txn.getForUpdate(readOptions, k1, vPartial, true); + assertThat(sPartial.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(sPartial.requiredSize).isEqualTo(v1.length); + assertThat(vPartial).isEqualTo(Arrays.copyOfRange(v1, 0, vPartial.length)); + + final byte[] vTotal = new byte[sPartial.requiredSize]; + final GetStatus sTotal = txn.getForUpdate(readOptions, k1, vTotal, true); + assertThat(sTotal.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(sTotal.requiredSize).isEqualTo(v1.length); + assertThat(vTotal).isEqualTo(v1); + } + } + + @Test + public void getForUpdateDirectByteBuffer() throws Exception { + getForUpdateByteBuffer(ByteBuffer::allocateDirect); + } + + @Test + public void getForUpdateIndirectByteBuffer() throws Exception { + getForUpdateByteBuffer(ByteBuffer::allocate); + } + + public void getForUpdateByteBuffer(final Function allocateBuffer) + throws Exception { + final ByteBuffer k1 = allocateBuffer.apply(20).put("key1".getBytes(UTF_8)); + k1.flip(); + final ByteBuffer v1 = allocateBuffer.apply(20).put("value1".getBytes(UTF_8)); + v1.flip(); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ByteBuffer v1Read1 = allocateBuffer.apply(20); + final GetStatus getStatus1 = txn.getForUpdate(readOptions, k1, v1Read1, true); + assertThat(getStatus1.status.getCode()).isEqualTo(Status.Code.NotFound); + txn.put(k1, v1); + final ByteBuffer v1Read2 = allocateBuffer.apply(20); + final GetStatus getStatus2 = txn.getForUpdate(readOptions, k1, v1Read2, true); + assertThat(getStatus2.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(getStatus2.requiredSize).isEqualTo("value1".getBytes(UTF_8).length); + assertThat(v1Read2).isEqualTo(allocateBuffer.apply(20).put("value1".getBytes(UTF_8))); + } + } + + @Test + public void getForUpdateDirectByteBuffer_cf() throws Exception { + getForUpdateByteBuffer_cf(ByteBuffer::allocateDirect); + } + + @Test + public void getForUpdateIndirectByteBuffer_cf() throws Exception { + getForUpdateByteBuffer_cf(ByteBuffer::allocate); + } + + public void getForUpdateByteBuffer_cf(final Function allocateBuffer) + throws Exception { + final ByteBuffer k1 = allocateBuffer.apply(20).put("key1".getBytes(UTF_8)); + k1.flip(); + final ByteBuffer v1 = allocateBuffer.apply(20).put("value1".getBytes(UTF_8)); + v1.flip(); + final ByteBuffer k2 = allocateBuffer.apply(20).put("key2".getBytes(UTF_8)); + k2.flip(); + final ByteBuffer v2 = allocateBuffer.apply(20).put("value2".getBytes(UTF_8)); + v2.flip(); + try (final DBContainer dbContainer = startDb(); + final ReadOptions readOptions = new ReadOptions(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + final ByteBuffer v1Read1 = allocateBuffer.apply(20); + final GetStatus getStatus1 = txn.getForUpdate(readOptions, testCf, k1, v1Read1, true); + assertThat(getStatus1.status.getCode()).isEqualTo(Status.Code.NotFound); + txn.put(k1, v1); + k1.flip(); + v1.flip(); + txn.put(testCf, k2, v2); + k2.flip(); + v2.flip(); + final ByteBuffer v1Read2 = allocateBuffer.apply(20); + final GetStatus getStatus2 = txn.getForUpdate(readOptions, testCf, k1, v1Read2, true); + assertThat(getStatus2.status.getCode()).isEqualTo(Status.Code.NotFound); + k1.flip(); + txn.put(testCf, k1, v1); + k1.flip(); + v1.flip(); + final ByteBuffer v1Read3 = allocateBuffer.apply(20); + final GetStatus getStatus3 = txn.getForUpdate(readOptions, testCf, k1, v1Read3, true); + assertThat(getStatus3.status.getCode()).isEqualTo(Status.Code.Ok); + assertThat(getStatus3.requiredSize).isEqualTo("value1".getBytes(UTF_8).length); + assertThat(v1Read3).isEqualTo(allocateBuffer.apply(20).put("value1".getBytes(UTF_8))); + } + } + @Test public void multiGetForUpdate_cf() throws RocksDBException { final byte[][] keys = new byte[][] {"key1".getBytes(UTF_8), "key2".getBytes(UTF_8)}; @@ -401,6 +690,13 @@ public void getIterator() throws RocksDBException { assertThat(iterator.key()).isEqualTo(k1); assertThat(iterator.value()).isEqualTo(v1); } + + try (final RocksIterator iterator = txn.getIterator()) { + iterator.seek(k1); + assertThat(iterator.isValid()).isTrue(); + assertThat(iterator.key()).isEqualTo(k1); + assertThat(iterator.value()).isEqualTo(v1); + } } } @@ -422,6 +718,13 @@ public void getIterator_cf() throws RocksDBException { assertThat(iterator.key()).isEqualTo(k1); assertThat(iterator.value()).isEqualTo(v1); } + + try (final RocksIterator iterator = txn.getIterator(testCf)) { + iterator.seek(k1); + assertThat(iterator.isValid()).isTrue(); + assertThat(iterator.key()).isEqualTo(k1); + assertThat(iterator.value()).isEqualTo(v1); + } } } @@ -429,11 +732,15 @@ public void getIterator_cf() throws RocksDBException { public void merge_cf() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); + final byte[] v2 = "value2".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); - txn.merge(testCf, k1, v1); + txn.put(testCf, k1, v1); + txn.merge(testCf, k1, v2); + assertThat(txn.get(new ReadOptions(), testCf, k1)).isEqualTo("value1**value2".getBytes()); + assertThat(txn.get(testCf, new ReadOptions(), k1)).isEqualTo("value1**value2".getBytes()); } } @@ -441,13 +748,94 @@ public void merge_cf() throws RocksDBException { public void merge() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); + final byte[] v2 = "value2".getBytes(UTF_8); + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + txn.put(k1, v1); + txn.merge(k1, v2); + assertThat(txn.get(new ReadOptions(), k1)).isEqualTo("value1++value2".getBytes()); + } + } + + @Test + public void mergeDirectByteBuffer() throws RocksDBException { + final ByteBuffer k1 = ByteBuffer.allocateDirect(100).put("key1".getBytes(UTF_8)); + final ByteBuffer v1 = ByteBuffer.allocateDirect(100).put("value1".getBytes(UTF_8)); + final ByteBuffer v2 = ByteBuffer.allocateDirect(100).put("value2".getBytes(UTF_8)); + k1.flip(); + v1.flip(); + v2.flip(); + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + txn.put(k1, v1); + k1.flip(); + v1.flip(); + txn.merge(k1, v2); + assertThat(txn.get(new ReadOptions(), "key1".getBytes(UTF_8))) + .isEqualTo("value1++value2".getBytes()); + } + } + + public void mergeIndirectByteBuffer() throws RocksDBException { + final ByteBuffer k1 = ByteBuffer.allocate(100).put("key1".getBytes(UTF_8)); + k1.flip(); + final ByteBuffer v1 = ByteBuffer.allocate(100).put("value1".getBytes(UTF_8)); + v1.flip(); + final ByteBuffer v2 = ByteBuffer.allocate(100).put("value2".getBytes(UTF_8)); + v2.flip(); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { - txn.merge(k1, v1); + txn.put(k1, v1); + txn.merge(k1, v2); + assertThat(txn.get(new ReadOptions(), "key1".getBytes(UTF_8))) + .isEqualTo("value1++value2".getBytes()); + } + } + + @Test + public void mergeDirectByteBuffer_cf() throws RocksDBException { + final ByteBuffer k1 = ByteBuffer.allocateDirect(100).put("key1".getBytes(UTF_8)); + final ByteBuffer v1 = ByteBuffer.allocateDirect(100).put("value1".getBytes(UTF_8)); + final ByteBuffer v2 = ByteBuffer.allocateDirect(100).put("value2".getBytes(UTF_8)); + k1.flip(); + v1.flip(); + v2.flip(); + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + txn.put(testCf, k1, v1); + k1.flip(); + v1.flip(); + txn.merge(testCf, k1, v2); + assertThat(txn.get(new ReadOptions(), testCf, "key1".getBytes(UTF_8))) + .isEqualTo("value1**value2".getBytes()); + assertThat(txn.get(testCf, new ReadOptions(), "key1".getBytes(UTF_8))) + .isEqualTo("value1**value2".getBytes()); } } + public void mergeIndirectByteBuffer_cf() throws RocksDBException { + final ByteBuffer k1 = ByteBuffer.allocate(100).put("key1".getBytes(UTF_8)); + k1.flip(); + final ByteBuffer v1 = ByteBuffer.allocate(100).put("value1".getBytes(UTF_8)); + v1.flip(); + final ByteBuffer v2 = ByteBuffer.allocate(100).put("value2".getBytes(UTF_8)); + v2.flip(); + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + txn.put(testCf, k1, v1); + txn.merge(testCf, k1, v2); + assertThat(txn.get(new ReadOptions(), testCf, "key1".getBytes(UTF_8))) + .isEqualTo("value1**value2".getBytes()); + assertThat(txn.get(testCf, new ReadOptions(), "key1".getBytes(UTF_8))) + .isEqualTo("value1**value2".getBytes()); + } + } @Test public void delete_cf() throws RocksDBException { @@ -459,9 +847,11 @@ public void delete_cf() throws RocksDBException { final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, k1, v1); + assertThat(txn.get(readOptions, testCf, k1)).isEqualTo(v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); txn.delete(testCf, k1); + assertThat(txn.get(readOptions, testCf, k1)).isNull(); assertThat(txn.get(testCf, readOptions, k1)).isNull(); } } @@ -495,11 +885,12 @@ public void delete_parts_cf() throws RocksDBException { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, keyParts, valueParts); assertThat(txn.get(testCf, readOptions, key)).isEqualTo(value); + assertThat(txn.get(readOptions, testCf, key)).isEqualTo(value); txn.delete(testCf, keyParts); - assertThat(txn.get(testCf, readOptions, key)) - .isNull(); + assertThat(txn.get(readOptions, testCf, key)).isNull(); + assertThat(txn.get(testCf, readOptions, key)).isNull(); } } @@ -532,8 +923,10 @@ public void getPutUntracked_cf() throws RocksDBException { final ReadOptions readOptions = new ReadOptions(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + assertThat(txn.get(readOptions, testCf, k1)).isNull(); assertThat(txn.get(testCf, readOptions, k1)).isNull(); txn.putUntracked(testCf, k1, v1); + assertThat(txn.get(readOptions, testCf, k1)).isEqualTo(v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); } } @@ -628,11 +1021,19 @@ public void multiGetPutAsListUntracked() throws RocksDBException { public void mergeUntracked_cf() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); + final byte[] v2 = "value2".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.mergeUntracked(testCf, k1, v1); + txn.mergeUntracked(testCf, k1, v2); + txn.commit(); + } + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + assertThat(txn.get(new ReadOptions(), testCf, k1)).isEqualTo("value1**value2".getBytes()); } } @@ -640,10 +1041,89 @@ public void mergeUntracked_cf() throws RocksDBException { public void mergeUntracked() throws RocksDBException { final byte[] k1 = "key1".getBytes(UTF_8); final byte[] v1 = "value1".getBytes(UTF_8); + final byte[] v2 = "value2".getBytes(UTF_8); try(final DBContainer dbContainer = startDb(); final Transaction txn = dbContainer.beginTransaction()) { txn.mergeUntracked(k1, v1); + txn.mergeUntracked(k1, v2); + txn.commit(); + } + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + assertThat(txn.get(new ReadOptions(), k1)).isEqualTo("value1++value2".getBytes()); + } + } + + @Test + public void mergeUntrackedByteBuffer() throws RocksDBException { + final ByteBuffer k1 = ByteBuffer.allocateDirect(20).put("key1".getBytes(UTF_8)); + final ByteBuffer v1 = ByteBuffer.allocateDirect(20).put("value1".getBytes(UTF_8)); + final ByteBuffer v2 = ByteBuffer.allocateDirect(20).put("value2".getBytes(UTF_8)); + k1.flip(); + v1.flip(); + v2.flip(); + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + txn.mergeUntracked(k1, v1); + k1.flip(); + v1.flip(); + txn.mergeUntracked(k1, v2); + k1.flip(); + v2.flip(); + txn.commit(); + } + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + final ByteBuffer v = ByteBuffer.allocateDirect(20); + final GetStatus status = txn.get(new ReadOptions(), k1, v); + assertThat(status.status.getCode()).isEqualTo(Status.Code.Ok); + k1.flip(); + v.flip(); + final int expectedLength = "value1++value2".length(); + assertThat(v.remaining()).isEqualTo(expectedLength); + final byte[] vBytes = new byte[expectedLength]; + v.get(vBytes); + assertThat(vBytes).isEqualTo("value1++value2".getBytes()); + } + } + + @Test + public void mergeUntrackedByteBuffer_cf() throws RocksDBException { + final ByteBuffer k1 = ByteBuffer.allocateDirect(20).put("key1".getBytes(UTF_8)); + final ByteBuffer v1 = ByteBuffer.allocateDirect(20).put("value1".getBytes(UTF_8)); + final ByteBuffer v2 = ByteBuffer.allocateDirect(20).put("value2".getBytes(UTF_8)); + k1.flip(); + v1.flip(); + v2.flip(); + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + txn.mergeUntracked(testCf, k1, v1); + k1.flip(); + v1.flip(); + txn.mergeUntracked(testCf, k1, v2); + k1.flip(); + v2.flip(); + txn.commit(); + } + + try (final DBContainer dbContainer = startDb(); + final Transaction txn = dbContainer.beginTransaction()) { + final ByteBuffer v = ByteBuffer.allocateDirect(20); + final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); + final GetStatus status = txn.get(new ReadOptions(), testCf, k1, v); + assertThat(status.status.getCode()).isEqualTo(Status.Code.Ok); + k1.flip(); + v.flip(); + final int expectedLength = "value1++value2".length(); + assertThat(v.remaining()).isEqualTo(expectedLength); + final byte[] vBytes = new byte[expectedLength]; + v.get(vBytes); + assertThat(vBytes).isEqualTo("value1**value2".getBytes()); } } @@ -657,9 +1137,11 @@ public void deleteUntracked_cf() throws RocksDBException { final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, k1, v1); + assertThat(txn.get(readOptions, testCf, k1)).isEqualTo(v1); assertThat(txn.get(testCf, readOptions, k1)).isEqualTo(v1); txn.deleteUntracked(testCf, k1); + assertThat(txn.get(readOptions, testCf, k1)).isNull(); assertThat(txn.get(testCf, readOptions, k1)).isNull(); } } @@ -692,9 +1174,11 @@ public void deleteUntracked_parts_cf() throws RocksDBException { final Transaction txn = dbContainer.beginTransaction()) { final ColumnFamilyHandle testCf = dbContainer.getTestColumnFamily(); txn.put(testCf, keyParts, valueParts); + assertThat(txn.get(readOptions, testCf, key)).isEqualTo(value); assertThat(txn.get(testCf, readOptions, key)).isEqualTo(value); txn.deleteUntracked(testCf, keyParts); + assertThat(txn.get(readOptions, testCf, key)).isNull(); assertThat(txn.get(testCf, readOptions, key)).isNull(); } } diff --git a/java/src/test/java/org/rocksdb/KeyMayExistTest.java b/java/src/test/java/org/rocksdb/KeyMayExistTest.java index 3f3bec6ba..5a9ffd6eb 100644 --- a/java/src/test/java/org/rocksdb/KeyMayExistTest.java +++ b/java/src/test/java/org/rocksdb/KeyMayExistTest.java @@ -261,10 +261,12 @@ public void keyMayExistBB() throws RocksDBException { keyBuffer.flip(); assertThat(db.keyMayExist(keyBuffer)).isEqualTo(true); + keyBuffer.flip(); final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); valueBuffer.position(12); KeyMayExist keyMayExist = db.keyMayExist(keyBuffer, valueBuffer); + keyBuffer.flip(); assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); assertThat(keyMayExist.valueLength).isEqualTo(value.length); assertThat(valueBuffer.position()).isEqualTo(12); @@ -303,10 +305,11 @@ public void keyMayExistBBReadOptions() throws RocksDBException { try (final ReadOptions readOptions = new ReadOptions()) { assertThat(db.keyMayExist(readOptions, keyBuffer)).isEqualTo(true); - + keyBuffer.flip(); final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); valueBuffer.position(12); KeyMayExist keyMayExist = db.keyMayExist(readOptions, keyBuffer, valueBuffer); + keyBuffer.flip(); assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); assertThat(keyMayExist.valueLength).isEqualTo(value.length); assertThat(valueBuffer.position()).isEqualTo(12); @@ -318,6 +321,7 @@ public void keyMayExistBBReadOptions() throws RocksDBException { valueBuffer.limit(value.length + 24); valueBuffer.position(25); keyMayExist = db.keyMayExist(readOptions, keyBuffer, valueBuffer); + keyBuffer.flip(); assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); assertThat(keyMayExist.valueLength).isEqualTo(value.length); assertThat(valueBuffer.position()).isEqualTo(25); @@ -362,7 +366,9 @@ public void keyMayExistBBCF() throws RocksDBException { keyBuffer.flip(); assertThat(db.keyMayExist(keyBuffer)).isEqualTo(true); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer)).isEqualTo(false); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(0), keyBuffer)).isEqualTo(true); // 1 is just a CF @@ -372,8 +378,11 @@ public void keyMayExistBBCF() throws RocksDBException { keyBuffer.flip(); assertThat(db.keyMayExist(keyBuffer)).isEqualTo(false); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer)).isEqualTo(true); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(0), keyBuffer)).isEqualTo(false); + keyBuffer.flip(); exceptionRule.expect(AssertionError.class); exceptionRule.expectMessage( @@ -395,8 +404,10 @@ public void keyMayExistBBCFReadOptions() throws RocksDBException { try (final ReadOptions readOptions = new ReadOptions()) { assertThat(db.keyMayExist(keyBuffer)).isEqualTo(true); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer)) .isEqualTo(false); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(0), readOptions, keyBuffer)) .isEqualTo(true); @@ -407,8 +418,10 @@ public void keyMayExistBBCFReadOptions() throws RocksDBException { keyBuffer.flip(); assertThat(db.keyMayExist(readOptions, keyBuffer)).isEqualTo(false); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer)) .isEqualTo(true); + keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(0), readOptions, keyBuffer)) .isEqualTo(false); @@ -432,10 +445,11 @@ public void keyMayExistBBCFOffset() throws RocksDBException { keyBuffer.flip(); assertThat(db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer)).isEqualTo(true); - + keyBuffer.flip(); final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); valueBuffer.position(12); KeyMayExist keyMayExist = db.keyMayExist(columnFamilyHandleList.get(1), keyBuffer, valueBuffer); + keyBuffer.flip(); assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); assertThat(keyMayExist.valueLength).isEqualTo(value.length); assertThat(valueBuffer.position()).isEqualTo(12); @@ -474,11 +488,12 @@ public void keyMayExistBBCFOffsetReadOptions() throws RocksDBException { try (final ReadOptions readOptions = new ReadOptions()) { assertThat(db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer)) .isEqualTo(true); - + keyBuffer.flip(); final ByteBuffer valueBuffer = ByteBuffer.allocateDirect(value.length + 24); valueBuffer.position(12); KeyMayExist keyMayExist = db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer, valueBuffer); + keyBuffer.flip(); assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); assertThat(keyMayExist.valueLength).isEqualTo(value.length); assertThat(valueBuffer.position()).isEqualTo(12); @@ -491,6 +506,7 @@ public void keyMayExistBBCFOffsetReadOptions() throws RocksDBException { valueBuffer.position(25); keyMayExist = db.keyMayExist(columnFamilyHandleList.get(1), readOptions, keyBuffer, valueBuffer); + keyBuffer.flip(); assertThat(keyMayExist.exists).isEqualTo(KeyMayExist.KeyMayExistEnum.kExistsWithValue); assertThat(keyMayExist.valueLength).isEqualTo(value.length); assertThat(valueBuffer.position()).isEqualTo(25); diff --git a/java/src/test/java/org/rocksdb/MergeCFVariantsTest.java b/java/src/test/java/org/rocksdb/MergeCFVariantsTest.java new file mode 100644 index 000000000..6c4f07ddc --- /dev/null +++ b/java/src/test/java/org/rocksdb/MergeCFVariantsTest.java @@ -0,0 +1,126 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.rocksdb.MergeTest.longFromByteArray; +import static org.rocksdb.MergeTest.longToByteArray; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class MergeCFVariantsTest { + @FunctionalInterface + interface FunctionMerge { + public void apply(PDatabase db, PColumnFamilyHandle one, PLeft two, PRight three) + throws RocksDBException; + } + + @Parameterized.Parameters + public static List> data() { + return Arrays.asList(RocksDB::merge, + (db, cfh, left, right) + -> db.merge(cfh, new WriteOptions(), left, right), + (db, cfh, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.merge(cfh, left0, 7, left.length, right0, 4, right.length); + }, + (db, cfh, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.merge(cfh, new WriteOptions(), left0, 7, left.length, right0, 4, right.length); + }, + (db, cfh, left, right) + -> { + final ByteBuffer bbLeft = ByteBuffer.allocateDirect(100); + final ByteBuffer bbRight = ByteBuffer.allocateDirect(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.merge(cfh, new WriteOptions(), bbLeft, bbRight); + }, + (db, cfh, left, right) -> { + final ByteBuffer bbLeft = ByteBuffer.allocate(100); + final ByteBuffer bbRight = ByteBuffer.allocate(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.merge(cfh, new WriteOptions(), bbLeft, bbRight); + }); + } + + @Parameterized.Parameter + public FunctionMerge mergeFunction; + + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Test + public void cFUInt64AddOperatorOption() throws InterruptedException, RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); + final ColumnFamilyOptions cfOpt1 = + new ColumnFamilyOptions().setMergeOperator(uint64AddOperator); + final ColumnFamilyOptions cfOpt2 = + new ColumnFamilyOptions().setMergeOperator(uint64AddOperator)) { + final List cfDescriptors = + Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), + new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2)); + final List columnFamilyHandleList = new ArrayList<>(); + try (final DBOptions opt = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open( + opt, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { + try { + // writing (long)100 under key + db.put(columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(100)); + // merge (long)1 under key + mergeFunction.apply( + db, columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(1)); + byte[] value = db.get(columnFamilyHandleList.get(1), "cfkey".getBytes()); + long longValue = longFromByteArray(value); + + // Test also with createColumnFamily + try (final ColumnFamilyOptions cfHandleOpts = + new ColumnFamilyOptions().setMergeOperator(uint64AddOperator); + final ColumnFamilyHandle cfHandle = db.createColumnFamily( + new ColumnFamilyDescriptor("new_cf2".getBytes(), cfHandleOpts))) { + // writing (long)200 under cfkey2 + db.put(cfHandle, "cfkey2".getBytes(), longToByteArray(200)); + // merge (long)50 under cfkey2 + db.merge(cfHandle, new WriteOptions(), "cfkey2".getBytes(), longToByteArray(50)); + value = db.get(cfHandle, "cfkey2".getBytes()); + long longValueTmpCf = longFromByteArray(value); + + assertThat(longValue).isEqualTo(101); + assertThat(longValueTmpCf).isEqualTo(250); + } + } finally { + for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { + columnFamilyHandle.close(); + } + } + } + } + } +} diff --git a/java/src/test/java/org/rocksdb/MergeTest.java b/java/src/test/java/org/rocksdb/MergeTest.java index f99ac49d3..10ffeb778 100644 --- a/java/src/test/java/org/rocksdb/MergeTest.java +++ b/java/src/test/java/org/rocksdb/MergeTest.java @@ -45,14 +45,14 @@ public void stringOption() } } - private byte[] longToByteArray(final long l) { + static byte[] longToByteArray(final long l) { final ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE).order(ByteOrder.LITTLE_ENDIAN); buf.putLong(l); return buf.array(); } - private long longFromByteArray(final byte[] a) { + static long longFromByteArray(final byte[] a) { final ByteBuffer buf = ByteBuffer.allocate(Long.SIZE / Byte.SIZE).order(ByteOrder.LITTLE_ENDIAN); buf.put(a); diff --git a/java/src/test/java/org/rocksdb/MergeVariantsTest.java b/java/src/test/java/org/rocksdb/MergeVariantsTest.java new file mode 100644 index 000000000..1acedc1e6 --- /dev/null +++ b/java/src/test/java/org/rocksdb/MergeVariantsTest.java @@ -0,0 +1,95 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.rocksdb.MergeTest.longFromByteArray; +import static org.rocksdb.MergeTest.longToByteArray; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class MergeVariantsTest { + @FunctionalInterface + interface FunctionMerge { + public void apply(PDatabase db, PLeft two, PRight three) throws RocksDBException; + } + + @Parameterized.Parameters + public static List> data() { + return Arrays.asList(RocksDB::merge, + (db, left, right) + -> db.merge(new WriteOptions(), left, right), + (db, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.merge(left0, 7, left.length, right0, 4, right.length); + }, + (db, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.merge(new WriteOptions(), left0, 7, left.length, right0, 4, right.length); + }, + (db, left, right) + -> { + final ByteBuffer bbLeft = ByteBuffer.allocateDirect(100); + final ByteBuffer bbRight = ByteBuffer.allocateDirect(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.merge(new WriteOptions(), bbLeft, bbRight); + }, + (db, left, right) -> { + final ByteBuffer bbLeft = ByteBuffer.allocate(100); + final ByteBuffer bbRight = ByteBuffer.allocate(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.merge(new WriteOptions(), bbLeft, bbRight); + }); + } + + @Parameterized.Parameter + public MergeVariantsTest.FunctionMerge mergeFunction; + + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Test + public void uint64AddOperatorOption() throws InterruptedException, RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); + final Options opt = + new Options().setCreateIfMissing(true).setMergeOperator(uint64AddOperator); + final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { + // Writing (long)100 under key + db.put("key".getBytes(), longToByteArray(100)); + + // Writing (long)1 under key + mergeFunction.apply(db, "key".getBytes(), longToByteArray(1)); + + final byte[] value = db.get("key".getBytes()); + final long longValue = longFromByteArray(value); + + assertThat(longValue).isEqualTo(101); + } + } +} diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java b/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java index d2f92e1ff..4959d207b 100644 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java +++ b/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java @@ -373,12 +373,13 @@ public OptimisticTransactionDBContainer startDb() .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); + final ColumnFamilyOptions defaultColumnFamilyOptions = new ColumnFamilyOptions(); + defaultColumnFamilyOptions.setMergeOperator(new StringAppendOperator("++")); final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, - columnFamilyOptions)); + columnFamilyOptions.setMergeOperator(new StringAppendOperator("**")); + final List columnFamilyDescriptors = Arrays.asList( + new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, defaultColumnFamilyOptions), + new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, columnFamilyOptions)); final List columnFamilyHandles = new ArrayList<>(); final OptimisticTransactionDB optimisticTxnDb; diff --git a/java/src/test/java/org/rocksdb/PutCFVariantsTest.java b/java/src/test/java/org/rocksdb/PutCFVariantsTest.java new file mode 100644 index 000000000..977c74dc8 --- /dev/null +++ b/java/src/test/java/org/rocksdb/PutCFVariantsTest.java @@ -0,0 +1,126 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.rocksdb.MergeTest.longFromByteArray; +import static org.rocksdb.MergeTest.longToByteArray; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class PutCFVariantsTest { + @FunctionalInterface + interface FunctionCFPut { + public void apply(PDatabase db, PColumnFamilyHandle cfh, PLeft two, PRight three) + throws RocksDBException; + } + + @Parameterized.Parameters + public static List> + data() { + return Arrays.asList(RocksDB::put, + (db, cfh, left, right) + -> db.put(cfh, new WriteOptions(), left, right), + (db, cfh, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.put(cfh, left0, 7, left.length, right0, 4, right.length); + }, + (db, cfh, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.put(cfh, new WriteOptions(), left0, 7, left.length, right0, 4, right.length); + }, + + (db, cfh, left, right) + -> { + final ByteBuffer bbLeft = ByteBuffer.allocateDirect(100); + final ByteBuffer bbRight = ByteBuffer.allocateDirect(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.put(cfh, new WriteOptions(), bbLeft, bbRight); + }, + (db, cfh, left, right) -> { + final ByteBuffer bbLeft = ByteBuffer.allocate(100); + final ByteBuffer bbRight = ByteBuffer.allocate(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.put(cfh, new WriteOptions(), bbLeft, bbRight); + }); + } + + @Parameterized.Parameter + public PutCFVariantsTest.FunctionCFPut putFunction; + + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Test + public void writeAndRead() throws InterruptedException, RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); + final ColumnFamilyOptions cfOpt1 = + new ColumnFamilyOptions().setMergeOperator(uint64AddOperator); + final ColumnFamilyOptions cfOpt2 = + new ColumnFamilyOptions().setMergeOperator(uint64AddOperator)) { + final List cfDescriptors = + Arrays.asList(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, cfOpt1), + new ColumnFamilyDescriptor("new_cf".getBytes(), cfOpt2)); + final List columnFamilyHandleList = new ArrayList<>(); + try (final DBOptions opt = + new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open( + opt, dbFolder.getRoot().getAbsolutePath(), cfDescriptors, columnFamilyHandleList)) { + try { + // writing (long)100 under key + putFunction.apply( + db, columnFamilyHandleList.get(1), "cfkey".getBytes(), longToByteArray(100)); + // merge (long)1 under key + byte[] value = db.get(columnFamilyHandleList.get(1), "cfkey".getBytes()); + final long longValue = longFromByteArray(value); + + // Test also with createColumnFamily + try (final ColumnFamilyOptions cfHandleOpts = + new ColumnFamilyOptions().setMergeOperator(uint64AddOperator); + final ColumnFamilyHandle cfHandle = db.createColumnFamily( + new ColumnFamilyDescriptor("new_cf2".getBytes(), cfHandleOpts))) { + // writing (long)200 under cfkey2 + db.put(cfHandle, "cfkey2".getBytes(), longToByteArray(200)); + // merge (long)50 under cfkey2 + value = db.get(cfHandle, "cfkey2".getBytes()); + final long longValueTmpCf = longFromByteArray(value); + + assertThat(longValue).isEqualTo(100); + assertThat(longValueTmpCf).isEqualTo(200); + } + } finally { + for (final ColumnFamilyHandle columnFamilyHandle : columnFamilyHandleList) { + columnFamilyHandle.close(); + } + } + } + } + } +} diff --git a/java/src/test/java/org/rocksdb/PutVariantsTest.java b/java/src/test/java/org/rocksdb/PutVariantsTest.java new file mode 100644 index 000000000..2e0e9b9e3 --- /dev/null +++ b/java/src/test/java/org/rocksdb/PutVariantsTest.java @@ -0,0 +1,92 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.rocksdb.MergeTest.longFromByteArray; +import static org.rocksdb.MergeTest.longToByteArray; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class PutVariantsTest { + @FunctionalInterface + interface FunctionPut { + public void apply(PDatabase db, PLeft two, PRight three) throws RocksDBException; + } + + @Parameterized.Parameters + public static List> data() { + return Arrays.asList(RocksDB::put, + (db, left, right) + -> db.put(new WriteOptions(), left, right), + (db, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.put(left0, 7, left.length, right0, 4, right.length); + }, + (db, left, right) + -> { + final byte[] left0 = + ("1234567" + new String(left, StandardCharsets.UTF_8) + "890").getBytes(); + final byte[] right0 = + ("1234" + new String(right, StandardCharsets.UTF_8) + "567890ab").getBytes(); + db.put(new WriteOptions(), left0, 7, left.length, right0, 4, right.length); + }, + (db, left, right) + -> { + final ByteBuffer bbLeft = ByteBuffer.allocateDirect(100); + final ByteBuffer bbRight = ByteBuffer.allocateDirect(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.put(new WriteOptions(), bbLeft, bbRight); + }, + (db, left, right) -> { + final ByteBuffer bbLeft = ByteBuffer.allocate(100); + final ByteBuffer bbRight = ByteBuffer.allocate(100); + bbLeft.put(left).flip(); + bbRight.put(right).flip(); + db.put(new WriteOptions(), bbLeft, bbRight); + }); + } + + @Parameterized.Parameter public PutVariantsTest.FunctionPut putFunction; + + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Test + public void writeAndRead() throws InterruptedException, RocksDBException { + try (final UInt64AddOperator uint64AddOperator = new UInt64AddOperator(); + final Options opt = + new Options().setCreateIfMissing(true).setMergeOperator(uint64AddOperator); + final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { + // Writing (long)100 under key + putFunction.apply(db, "key".getBytes(), longToByteArray(100)); + + final byte[] value = db.get("key".getBytes()); + final long longValue = longFromByteArray(value); + + assertThat(longValue).isEqualTo(100); + } + } +} diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index ed6e989a8..625b8e089 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -184,10 +184,8 @@ public void put() throws RocksDBException { final WriteOptions opt = new WriteOptions(); final ReadOptions optr = new ReadOptions()) { db.put("key1".getBytes(), "value".getBytes()); db.put(opt, "key2".getBytes(), "12345678".getBytes()); - assertThat(db.get("key1".getBytes())).isEqualTo( - "value".getBytes()); - assertThat(db.get("key2".getBytes())).isEqualTo( - "12345678".getBytes()); + assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); + assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); final ByteBuffer key = ByteBuffer.allocateDirect(12); final ByteBuffer value = ByteBuffer.allocateDirect(12); @@ -245,8 +243,73 @@ public void put() throws RocksDBException { } } - private static Segment sliceSegment(final String key) { - final ByteBuffer rawKey = ByteBuffer.allocate(key.length() + 4); + @Test + public void putIndirectByteBuffers() throws RocksDBException { + try (final RocksDB db = RocksDB.open(dbFolder.getRoot().getAbsolutePath()); + final WriteOptions opt = new WriteOptions(); final ReadOptions optr = new ReadOptions()) { + db.put("key1".getBytes(), "value".getBytes()); + db.put(opt, "key2".getBytes(), "12345678".getBytes()); + assertThat(db.get("key1".getBytes())).isEqualTo("value".getBytes()); + assertThat(db.get("key2".getBytes())).isEqualTo("12345678".getBytes()); + + ByteBuffer key = ByteBuffer.allocate(12); + ByteBuffer value = ByteBuffer.allocate(12); + key.position(4); + key.put("key3".getBytes()); + key.position(4).limit(8); + value.position(4); + value.put("val3".getBytes()); + value.position(4).limit(8); + + db.put(opt, key, value); + + assertThat(key.position()).isEqualTo(8); + assertThat(key.limit()).isEqualTo(8); + + assertThat(value.position()).isEqualTo(8); + assertThat(value.limit()).isEqualTo(8); + + key.position(4); + + ByteBuffer result = ByteBuffer.allocate(12); + assertThat(db.get(optr, key, result)).isEqualTo(4); + assertThat(result.position()).isEqualTo(0); + assertThat(result.limit()).isEqualTo(4); + assertThat(key.position()).isEqualTo(8); + assertThat(key.limit()).isEqualTo(8); + + byte[] tmp = new byte[4]; + result.get(tmp); + assertThat(tmp).isEqualTo("val3".getBytes()); + + key.position(4); + + result.clear().position(9); + assertThat(db.get(optr, key, result)).isEqualTo(4); + assertThat(result.position()).isEqualTo(9); + assertThat(result.limit()).isEqualTo(12); + assertThat(key.position()).isEqualTo(8); + assertThat(key.limit()).isEqualTo(8); + byte[] tmp2 = new byte[3]; + result.get(tmp2); + assertThat(tmp2).isEqualTo("val".getBytes()); + + // put + Segment key3 = sliceSegment("key3"); + Segment key4 = sliceSegment("key4"); + Segment value0 = sliceSegment("value 0"); + Segment value1 = sliceSegment("value 1"); + db.put(key3.data, key3.offset, key3.len, value0.data, value0.offset, value0.len); + db.put(opt, key4.data, key4.offset, key4.len, value1.data, value1.offset, value1.len); + + // compare + Assert.assertTrue(value0.isSamePayload(db.get(key3.data, key3.offset, key3.len))); + Assert.assertTrue(value1.isSamePayload(db.get(key4.data, key4.offset, key4.len))); + } + } + + private static Segment sliceSegment(String key) { + ByteBuffer rawKey = ByteBuffer.allocate(key.length() + 4); rawKey.put((byte)0); rawKey.put((byte)0); rawKey.put(key.getBytes()); diff --git a/java/src/test/java/org/rocksdb/RocksIteratorTest.java b/java/src/test/java/org/rocksdb/RocksIteratorTest.java index 2a13550b7..90c635f58 100644 --- a/java/src/test/java/org/rocksdb/RocksIteratorTest.java +++ b/java/src/test/java/org/rocksdb/RocksIteratorTest.java @@ -5,6 +5,7 @@ package org.rocksdb; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.fail; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; @@ -45,7 +46,7 @@ private void validateValue( } @Test - public void rocksIterator() throws RocksDBException { + public void rocksIteratorByteBuffers() throws RocksDBException { try (final Options options = new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { @@ -72,6 +73,103 @@ public void rocksIterator() throws RocksDBException { validateKey(iterator, ByteBuffer.allocate(5), "key1"); validateValue(iterator, ByteBuffer.allocate(2), "value1"); validateValue(iterator, ByteBuffer.allocate(8), "value1"); + } + } + } + + @Test + public void rocksIteratorByteArrayValues() throws RocksDBException { + try (final Options options = + new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key1".getBytes(), "value1".getBytes()); + db.put("key2".getBytes(), "value2".getBytes()); + + try (final RocksIterator iterator = db.newIterator()) { + iterator.seekToFirst(); + assertThat(iterator.isValid()).isTrue(); + assertThat(iterator.key()).isEqualTo("key1".getBytes()); + assertThat(iterator.value()).isEqualTo("value1".getBytes()); + + final byte[] valueArray0 = new byte[2]; + assertThat(iterator.value(valueArray0)).isEqualTo(6); + assertThat(valueArray0).isEqualTo("va".getBytes()); + final byte[] valueArray1 = new byte[8]; + assertThat(iterator.value(valueArray1)).isEqualTo(6); + assertThat(valueArray1).isEqualTo("value1\0\0".getBytes()); + final byte[] valueArray2 = new byte[10]; + assertThat(iterator.value(valueArray2, 2, 6)).isEqualTo(6); + assertThat(valueArray2).isEqualTo("\0\0value1\0\0".getBytes()); + final byte[] valueArray3 = new byte[10]; + assertThat(iterator.value(valueArray3, 5, 5)).isEqualTo(6); + assertThat(valueArray3).isEqualTo("\0\0\0\0\0value".getBytes()); + final byte[] valueArray4 = new byte[6]; + try { + iterator.value(valueArray4, 1, 6); + fail("Expected IndexOutOfBoundsException"); + } catch (final IndexOutOfBoundsException ignored) { + // we should arrive here + } + final byte[] valueArray5 = new byte[7]; + assertThat(iterator.value(valueArray5, 1, 6)).isEqualTo(6); + assertThat(valueArray5).isEqualTo("\0value1".getBytes()); + } + } + } + + @Test + public void rocksIteratorByteArrayKeys() throws RocksDBException { + try (final Options options = + new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key1".getBytes(), "value1".getBytes()); + db.put("key2".getBytes(), "value2".getBytes()); + + try (final RocksIterator iterator = db.newIterator()) { + iterator.seekToFirst(); + assertThat(iterator.isValid()).isTrue(); + assertThat(iterator.key()).isEqualTo("key1".getBytes()); + assertThat(iterator.value()).isEqualTo("value1".getBytes()); + + final byte[] keyArray0 = new byte[2]; + assertThat(iterator.key(keyArray0)).isEqualTo(4); + assertThat(keyArray0).isEqualTo("ke".getBytes()); + final byte[] keyArray1 = new byte[8]; + assertThat(iterator.key(keyArray1)).isEqualTo(4); + assertThat(keyArray1).isEqualTo("key1\0\0\0\0".getBytes()); + final byte[] keyArray2 = new byte[10]; + assertThat(iterator.key(keyArray2, 2, 6)).isEqualTo(4); + assertThat(keyArray2).isEqualTo("\0\0key1\0\0\0\0".getBytes()); + final byte[] keyArray3 = new byte[10]; + assertThat(iterator.key(keyArray3, 5, 3)).isEqualTo(4); + assertThat(keyArray3).isEqualTo("\0\0\0\0\0key\0\0".getBytes()); + final byte[] keyArray4 = new byte[4]; + try { + iterator.key(keyArray4, 1, 4); + fail("Expected IndexOutOfBoundsException"); + } catch (final IndexOutOfBoundsException ignored) { + // we should arrive here + } + final byte[] keyArray5 = new byte[5]; + assertThat(iterator.key(keyArray5, 1, 4)).isEqualTo(4); + assertThat(keyArray5).isEqualTo("\0key1".getBytes()); + } + } + } + + @Test + public void rocksIteratorSimple() throws RocksDBException { + try (final Options options = + new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key1".getBytes(), "value1".getBytes()); + db.put("key2".getBytes(), "value2".getBytes()); + + try (final RocksIterator iterator = db.newIterator()) { + iterator.seekToFirst(); + assertThat(iterator.isValid()).isTrue(); + assertThat(iterator.key()).isEqualTo("key1".getBytes()); + assertThat(iterator.value()).isEqualTo("value1".getBytes()); iterator.next(); assertThat(iterator.isValid()).isTrue(); @@ -90,6 +188,23 @@ public void rocksIterator() throws RocksDBException { assertThat(iterator.key()).isEqualTo("key2".getBytes()); assertThat(iterator.value()).isEqualTo("value2".getBytes()); iterator.status(); + } + } + } + + @Test + public void rocksIterator() throws RocksDBException { + try (final Options options = + new Options().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + db.put("key1".getBytes(), "value1".getBytes()); + db.put("key2".getBytes(), "value2".getBytes()); + + try (final RocksIterator iterator = db.newIterator()) { + iterator.seekToFirst(); + assertThat(iterator.isValid()).isTrue(); + assertThat(iterator.key()).isEqualTo("key1".getBytes()); + assertThat(iterator.value()).isEqualTo("value1".getBytes()); { final ByteBuffer key = ByteBuffer.allocate(12); diff --git a/java/src/test/java/org/rocksdb/TransactionTest.java b/java/src/test/java/org/rocksdb/TransactionTest.java index b80445c5c..03a6b4ff6 100644 --- a/java/src/test/java/org/rocksdb/TransactionTest.java +++ b/java/src/test/java/org/rocksdb/TransactionTest.java @@ -416,12 +416,13 @@ public TransactionDBContainer startDb() throws RocksDBException { .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true); final TransactionDBOptions txnDbOptions = new TransactionDBOptions(); + final ColumnFamilyOptions defaultColumnFamilyOptions = new ColumnFamilyOptions(); + defaultColumnFamilyOptions.setMergeOperator(new StringAppendOperator("++")); final ColumnFamilyOptions columnFamilyOptions = new ColumnFamilyOptions(); - final List columnFamilyDescriptors = - Arrays.asList( - new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY), - new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, - columnFamilyOptions)); + columnFamilyOptions.setMergeOperator(new StringAppendOperator("**")); + final List columnFamilyDescriptors = Arrays.asList( + new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY, defaultColumnFamilyOptions), + new ColumnFamilyDescriptor(TXN_TEST_COLUMN_FAMILY, columnFamilyOptions)); final List columnFamilyHandles = new ArrayList<>(); final TransactionDB txnDb; diff --git a/unreleased_history/performance_improvements/java_api_consistency.md b/unreleased_history/performance_improvements/java_api_consistency.md new file mode 100644 index 000000000..7e29b629c --- /dev/null +++ b/unreleased_history/performance_improvements/java_api_consistency.md @@ -0,0 +1,16 @@ +* Java API extensions to improve consistency and completeness of APIs + 1 Extended `RocksDB.get([ColumnFamilyHandle columnFamilyHandle,] ReadOptions opt, ByteBuffer key, ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters + 2 Extended `RocksDB.put( [ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters + 3 Added `RocksDB.merge([ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOptions, ByteBuffer key, ByteBuffer value)` methods with the same parameter options as `put(...)` - direct and indirect buffers are supported + 4 Added `RocksIterator.key( byte[] key [, int offset, int len])` methods which retrieve the iterator key into the supplied buffer + 5 Added `RocksIterator.value( byte[] value [, int offset, int len])` methods which retrieve the iterator value into the supplied buffer + 6 Deprecated `get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, byte[])` in favour of `get(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, byte[])` which has consistent parameter ordering with other methods in the same class + 7 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value)` methods which retrieve the requested value into the supplied buffer + 8 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which retrieve the requested value into the supplied buffer + 9 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer + 10 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer + 11 Added `Transaction.getIterator()` method as a convenience which defaults the `ReadOptions` value supplied to existing `Transaction.iterator()` methods. This mirrors the existing `RocksDB.iterator()` method. + 12 Added `Transaction.put([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written in a `ByteBuffer` parameter + 13 Added `Transaction.merge([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter + 14 Added `Transaction.mergeUntracked([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter + From 5c5e01894328ad86c4cede67e50315229a28867a Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Mon, 11 Dec 2023 11:21:52 -0800 Subject: [PATCH 323/386] Fix JNI lazy load regression. (#12133) Summary: A small regression that conflicted with PR https://github.com/facebook/rocksdb/pull/12133 was later merged in commit https://github.com/facebook/rocksdb/commit/2296c624fa0fd72f61eb706c56bb4fc53ddf7ce6#diff-26d3ab8a3d764183d0ea3aea834fe481eec2347c334b918ebd7bdce4bcabcc19R35 This PR addresses that regression. Closes https://github.com/facebook/rocksdb/issues/12132 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12133 Reviewed By: jowlyzhang Differential Revision: D52041736 Pulled By: ltamasi fbshipit-source-id: 33db57035154c833ae00b5d921b17b3be80c8dd7 --- java/src/main/java/org/rocksdb/RocksDB.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/rocksdb/RocksDB.java index 120323e54..839d01877 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/rocksdb/RocksDB.java @@ -33,10 +33,6 @@ private enum LibraryState { private static final AtomicReference libraryLoaded = new AtomicReference<>(LibraryState.NOT_LOADED); - static { - RocksDB.loadLibrary(); - } - static final String PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD = "Performance optimization for a very specific workload"; From c96d9a0fbbfd2215e7ae88af8a13b94109a349b5 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Mon, 11 Dec 2023 12:02:56 -0800 Subject: [PATCH 324/386] Allow TablePropertiesCollectorFactory to return null collector (#12129) Summary: As part of building another feature, I wanted this: * Custom implementations of `TablePropertiesCollectorFactory` may now return a `nullptr` collector to decline processing a file, reducing callback overheads in such cases. * Polished, clarified some related API comments. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12129 Test Plan: unit test added Reviewed By: ltamasi Differential Revision: D51966667 Pulled By: pdillinger fbshipit-source-id: 2991c08fe6ce3a8c9f14c68f1495f5a17bca2770 --- db/db_table_properties_test.cc | 66 +++++++++++++++++++ db/table_properties_collector.h | 9 ++- include/rocksdb/table_properties.h | 10 ++- .../block_based/block_based_table_builder.cc | 7 +- table/plain/plain_table_builder.cc | 7 +- .../public_api_changes/null_collector.md | 1 + 6 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 unreleased_history/public_api_changes/null_collector.md diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc index 61dcf3c1e..bfa67226e 100644 --- a/db/db_table_properties_test.cc +++ b/db/db_table_properties_test.cc @@ -22,6 +22,7 @@ #include "table/table_properties_internal.h" #include "test_util/testharness.h" #include "test_util/testutil.h" +#include "util/atomic.h" #include "util/random.h" @@ -417,6 +418,71 @@ TEST_F(DBTablePropertiesTest, GetDbIdentifiersProperty) { } } +TEST_F(DBTablePropertiesTest, FactoryReturnsNull) { + struct JunkTablePropertiesCollector : public TablePropertiesCollector { + const char* Name() const override { return "JunkTablePropertiesCollector"; } + Status Finish(UserCollectedProperties* properties) override { + properties->insert({"Junk", "Junk"}); + return Status::OK(); + } + UserCollectedProperties GetReadableProperties() const override { + return {}; + } + }; + + // Alternates between putting a "Junk" property and using `nullptr` to + // opt out. + static RelaxedAtomic count{0}; + struct SometimesTablePropertiesCollectorFactory + : public TablePropertiesCollectorFactory { + const char* Name() const override { + return "SometimesTablePropertiesCollectorFactory"; + } + TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context /*context*/) override { + if (count.FetchAddRelaxed(1) & 1) { + return nullptr; + } else { + return new JunkTablePropertiesCollector(); + } + } + }; + + Options options = CurrentOptions(); + options.table_properties_collector_factories.emplace_back( + std::make_shared()); + // For plain table + options.prefix_extractor.reset(NewFixedPrefixTransform(4)); + for (std::shared_ptr tf : + {options.table_factory, + std::shared_ptr(NewPlainTableFactory({}))}) { + SCOPED_TRACE("Table factory = " + std::string(tf->Name())); + options.table_factory = tf; + + DestroyAndReopen(options); + + ASSERT_OK(Put("key0", "value1")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key0", "value2")); + ASSERT_OK(Flush()); + + TablePropertiesCollection props; + ASSERT_OK(db_->GetPropertiesOfAllTables(&props)); + int no_junk_count = 0; + int junk_count = 0; + for (const auto& item : props) { + if (item.second->user_collected_properties.find("Junk") != + item.second->user_collected_properties.end()) { + junk_count++; + } else { + no_junk_count++; + } + } + EXPECT_EQ(1, no_junk_count); + EXPECT_EQ(1, junk_count); + } +} + class DBTableHostnamePropertyTest : public DBTestBase, public ::testing::WithParamInterface> { diff --git a/db/table_properties_collector.h b/db/table_properties_collector.h index 968115c3d..9bcec93dd 100644 --- a/db/table_properties_collector.h +++ b/db/table_properties_collector.h @@ -98,8 +98,13 @@ class UserKeyTablePropertiesCollectorFactory TablePropertiesCollectorFactory::Context context; context.column_family_id = column_family_id; context.level_at_creation = level_at_creation; - return new UserKeyTablePropertiesCollector( - user_collector_factory_->CreateTablePropertiesCollector(context)); + TablePropertiesCollector* collector = + user_collector_factory_->CreateTablePropertiesCollector(context); + if (collector) { + return new UserKeyTablePropertiesCollector(collector); + } else { + return nullptr; + } } virtual const char* Name() const override { diff --git a/include/rocksdb/table_properties.h b/include/rocksdb/table_properties.h index 052df3503..b87647a0d 100644 --- a/include/rocksdb/table_properties.h +++ b/include/rocksdb/table_properties.h @@ -140,8 +140,7 @@ class TablePropertiesCollector { virtual bool NeedCompact() const { return false; } }; -// Constructs TablePropertiesCollector. Internals create a new -// TablePropertiesCollector for each new table +// Constructs TablePropertiesCollector instances for each table file creation. // // Exceptions MUST NOT propagate out of overridden functions into RocksDB, // because RocksDB is not exception-safe. This could cause undefined behavior @@ -163,7 +162,12 @@ class TablePropertiesCollectorFactory : public Customizable { const ConfigOptions& options, const std::string& value, std::shared_ptr* result); - // has to be thread-safe + // To collect properties of a table with the given context, returns + // a new object inheriting from TablePropertiesCollector. The caller + // is responsible for deleting the object returned. Alternatively, + // nullptr may be returned to decline collecting properties for the + // file (and reduce callback overheads). + // MUST be thread-safe. virtual TablePropertiesCollector* CreateTablePropertiesCollector( TablePropertiesCollectorFactory::Context context) = 0; diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 5712354ac..7b8bd0275 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -577,9 +577,12 @@ struct BlockBasedTableBuilder::Rep { for (auto& factory : *tbo.int_tbl_prop_collector_factories) { assert(factory); - table_properties_collectors.emplace_back( + std::unique_ptr collector{ factory->CreateIntTblPropCollector(tbo.column_family_id, - tbo.level_at_creation)); + tbo.level_at_creation)}; + if (collector) { + table_properties_collectors.emplace_back(std::move(collector)); + } } table_properties_collectors.emplace_back( new BlockBasedTablePropertiesCollector( diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index 784ef1471..1e61773d6 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -118,9 +118,12 @@ PlainTableBuilder::PlainTableBuilder( for (auto& factory : *int_tbl_prop_collector_factories) { assert(factory); - table_properties_collectors_.emplace_back( + std::unique_ptr collector{ factory->CreateIntTblPropCollector(column_family_id, - level_at_creation)); + level_at_creation)}; + if (collector) { + table_properties_collectors_.emplace_back(std::move(collector)); + } } } diff --git a/unreleased_history/public_api_changes/null_collector.md b/unreleased_history/public_api_changes/null_collector.md new file mode 100644 index 000000000..2af478c83 --- /dev/null +++ b/unreleased_history/public_api_changes/null_collector.md @@ -0,0 +1 @@ +Custom implementations of `TablePropertiesCollectorFactory` may now return a `nullptr` collector to decline processing a file, reducing callback overheads in such cases. From c1b84d04373928e1f9557cf84c30dd20a71ad9e5 Mon Sep 17 00:00:00 2001 From: anand76 Date: Mon, 11 Dec 2023 16:59:59 -0800 Subject: [PATCH 325/386] Fix false negative in TieredSecondaryCache nvm cache lookup (#12134) Summary: There is a bug in the `TieredSecondaryCache` that can result in a false negative. This can happen when a MultiGet does a cache lookup that gets a hit in the `TieredSecondaryCache` local nvm cache tier, and the result is available before MultiGet calls `WaitAll` (i.e the nvm cache `SecondaryCacheResultHandle` `IsReady` returns true). Pull Request resolved: https://github.com/facebook/rocksdb/pull/12134 Test Plan: Add a new unit test in tiered_secondary_cache_test Reviewed By: akankshamahajan15 Differential Revision: D52023309 Pulled By: anand1976 fbshipit-source-id: e5ae681226a0f12753fecb2f6acc7e5f254ae72b --- cache/tiered_secondary_cache.cc | 6 +- cache/tiered_secondary_cache.h | 8 +- cache/tiered_secondary_cache_test.cc | 138 ++++++++++++++++-- ...tiered_cache_low_latency_false_negative.md | 1 + 4 files changed, 138 insertions(+), 15 deletions(-) create mode 100644 unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md diff --git a/cache/tiered_secondary_cache.cc b/cache/tiered_secondary_cache.cc index 493e69572..1a1201a4d 100644 --- a/cache/tiered_secondary_cache.cc +++ b/cache/tiered_secondary_cache.cc @@ -109,10 +109,8 @@ void TieredSecondaryCache::WaitAll( } nvm_sec_cache_->WaitAll(nvm_handles); for (auto handle : my_handles) { - assert(handle->IsReady()); - auto nvm_handle = handle->inner_handle(); - handle->SetSize(nvm_handle->Size()); - handle->SetValue(nvm_handle->Value()); + assert(handle->inner_handle()->IsReady()); + handle->Complete(); } } diff --git a/cache/tiered_secondary_cache.h b/cache/tiered_secondary_cache.h index 6e0536436..46e3eb084 100644 --- a/cache/tiered_secondary_cache.h +++ b/cache/tiered_secondary_cache.h @@ -79,7 +79,10 @@ class TieredSecondaryCache : public SecondaryCacheWrapper { ~ResultHandle() override {} bool IsReady() override { - return !inner_handle_ || inner_handle_->IsReady(); + if (inner_handle_ && inner_handle_->IsReady()) { + Complete(); + } + return ready_; } void Wait() override { @@ -92,10 +95,10 @@ class TieredSecondaryCache : public SecondaryCacheWrapper { Cache::ObjectPtr Value() override { return value_; } void Complete() { - assert(IsReady()); size_ = inner_handle_->Size(); value_ = inner_handle_->Value(); inner_handle_.reset(); + ready_ = true; } void SetInnerHandle(std::unique_ptr&& handle) { @@ -115,6 +118,7 @@ class TieredSecondaryCache : public SecondaryCacheWrapper { CreateContext ctx_; size_t size_; Cache::ObjectPtr value_; + bool ready_ = false; }; static void NoopDelete(Cache::ObjectPtr /*obj*/, diff --git a/cache/tiered_secondary_cache_test.cc b/cache/tiered_secondary_cache_test.cc index 9d8cdf7fb..d641254df 100644 --- a/cache/tiered_secondary_cache_test.cc +++ b/cache/tiered_secondary_cache_test.cc @@ -15,10 +15,11 @@ namespace ROCKSDB_NAMESPACE { class TestSecondaryCache : public SecondaryCache { public: - explicit TestSecondaryCache(size_t capacity) + explicit TestSecondaryCache(size_t capacity, bool ready_before_wait) : cache_(NewLRUCache(capacity, 0, false, 0.5 /* high_pri_pool_ratio */, nullptr, kDefaultToAdaptiveMutex, kDontChargeCacheMetadata)), + ready_before_wait_(ready_before_wait), num_insert_saved_(0), num_hits_(0), num_misses_(0) {} @@ -88,7 +89,8 @@ class TestSecondaryCache : public SecondaryCache { /*alloc*/ nullptr, &value, &charge); if (s.ok()) { secondary_handle.reset(new TestSecondaryCacheResultHandle( - cache_.get(), handle, value, charge, /*ready=*/wait)); + cache_.get(), handle, value, charge, + /*ready=*/wait || ready_before_wait_)); kept_in_sec_cache = true; } else { cache_.Release(handle); @@ -168,6 +170,7 @@ class TestSecondaryCache : public SecondaryCache { BasicTypedSharedCacheInterface; using TypedHandle = SharedCache::TypedHandle; SharedCache cache_; + bool ready_before_wait_; uint32_t num_insert_saved_; uint32_t num_hits_; uint32_t num_misses_; @@ -179,11 +182,10 @@ class DBTieredSecondaryCacheTest : public DBTestBase { DBTieredSecondaryCacheTest() : DBTestBase("db_tiered_secondary_cache_test", /*env_do_fsync=*/true) {} - std::shared_ptr NewCache(size_t pri_capacity, - size_t compressed_capacity, - size_t nvm_capacity, - TieredAdmissionPolicy adm_policy = - TieredAdmissionPolicy::kAdmPolicyAuto) { + std::shared_ptr NewCache( + size_t pri_capacity, size_t compressed_capacity, size_t nvm_capacity, + TieredAdmissionPolicy adm_policy = TieredAdmissionPolicy::kAdmPolicyAuto, + bool ready_before_wait = false) { LRUCacheOptions lru_opts; TieredCacheOptions opts; lru_opts.capacity = 0; @@ -194,10 +196,11 @@ class DBTieredSecondaryCacheTest : public DBTestBase { opts.comp_cache_opts.capacity = 0; opts.comp_cache_opts.num_shard_bits = 0; opts.total_capacity = pri_capacity + compressed_capacity; - opts.compressed_secondary_ratio = + opts.compressed_secondary_ratio = compressed_secondary_ratio_ = (double)compressed_capacity / opts.total_capacity; if (nvm_capacity > 0) { - nvm_sec_cache_.reset(new TestSecondaryCache(nvm_capacity)); + nvm_sec_cache_.reset( + new TestSecondaryCache(nvm_capacity, ready_before_wait)); opts.nvm_sec_cache = nvm_sec_cache_; } opts.adm_policy = adm_policy; @@ -207,6 +210,12 @@ class DBTieredSecondaryCacheTest : public DBTestBase { return cache_; } + void ClearPrimaryCache() { + ASSERT_EQ(UpdateTieredCache(cache_, -1, 1.0), Status::OK()); + ASSERT_EQ(UpdateTieredCache(cache_, -1, compressed_secondary_ratio_), + Status::OK()); + } + TestSecondaryCache* nvm_sec_cache() { return nvm_sec_cache_.get(); } CompressedSecondaryCache* compressed_secondary_cache() { @@ -218,6 +227,7 @@ class DBTieredSecondaryCacheTest : public DBTestBase { private: std::shared_ptr cache_; std::shared_ptr nvm_sec_cache_; + double compressed_secondary_ratio_; }; // In this test, the block size is set to 4096. Each value is 1007 bytes, so @@ -582,6 +592,116 @@ TEST_F(DBTieredSecondaryCacheTest, WaitAllTest) { Destroy(options); } +TEST_F(DBTieredSecondaryCacheTest, ReadyBeforeWaitAllTest) { + if (!LZ4_Supported()) { + ROCKSDB_GTEST_SKIP("This test requires LZ4 support."); + return; + } + + BlockBasedTableOptions table_options; + table_options.block_cache = NewCache(250 * 1024, 20 * 1024, 256 * 1024, + TieredAdmissionPolicy::kAdmPolicyAuto, + /*ready_before_wait=*/true); + table_options.block_size = 4 * 1024; + table_options.cache_index_and_filter_blocks = false; + Options options = GetDefaultOptions(); + options.create_if_missing = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + options.statistics = CreateDBStatistics(); + + options.paranoid_file_checks = false; + DestroyAndReopen(options); + Random rnd(301); + const int N = 256; + for (int i = 0; i < N; i++) { + std::string p_v; + test::CompressibleString(&rnd, 0.5, 1007, &p_v); + ASSERT_OK(Put(Key(i), p_v)); + } + + ASSERT_OK(Flush()); + + std::vector keys; + std::vector values; + + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 3u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 3u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + ASSERT_EQ(options.statistics->getTickerCount(BLOCK_CACHE_MISS), 3u); + + keys.clear(); + values.clear(); + keys.push_back(Key(12)); + keys.push_back(Key(16)); + keys.push_back(Key(20)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 0u); + ASSERT_EQ(options.statistics->getTickerCount(BLOCK_CACHE_MISS), 6u); + + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(4)); + keys.push_back(Key(8)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 6u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 3u); + ASSERT_EQ(options.statistics->getTickerCount(BLOCK_CACHE_MISS), 6u); + + ClearPrimaryCache(); + + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(32)); + keys.push_back(Key(36)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 4u); + ASSERT_EQ(options.statistics->getTickerCount(BLOCK_CACHE_MISS), 8u); + + keys.clear(); + values.clear(); + keys.push_back(Key(0)); + keys.push_back(Key(32)); + keys.push_back(Key(36)); + values = MultiGet(keys, /*snapshot=*/nullptr, /*async=*/true); + ASSERT_EQ(values.size(), keys.size()); + for (auto value : values) { + ASSERT_EQ(1007, value.size()); + } + ASSERT_EQ(nvm_sec_cache()->num_insert_saved(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_misses(), 8u); + ASSERT_EQ(nvm_sec_cache()->num_hits(), 4u); + ASSERT_EQ(options.statistics->getTickerCount(BLOCK_CACHE_MISS), 8u); + + Destroy(options); +} + // This test is for iteration. It iterates through a set of keys in two // passes. First pass loads the compressed blocks into the nvm tier, and // the second pass should hit all of those blocks. diff --git a/unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md b/unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md new file mode 100644 index 000000000..c271724ad --- /dev/null +++ b/unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md @@ -0,0 +1 @@ +A lookup by MultiGet in a TieredCache that goes to the local flash cache and finishes with very low latency, i.e before the subsequent call to WaitAll, is ignored, resulting in a false negative and a memory leak. From c2ab4e754b86a95a7316feb0a313cc086459302c Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Tue, 12 Dec 2023 09:35:29 -0800 Subject: [PATCH 326/386] Add initial support to stress test persist_user_defined_timestamps (#12124) Summary: This PR adds initial stress testing for the user-defined timestamps in memtable only feature. Each flavor of the `*_ts` crash test get a 1 in 3 chance to run with timestamps not persisted, this setting is initialized once and kept consistent across the following re-runs. This initial stress test included these things besides disabling incompatible feature combinations to make the test run more stably: 1) It currently only run test methods that validates db state with expected state. Not the ones that validate db state by comparing result from one API to another API. Such as `TestMultiGet` (compared with `Get`), similarly `TestMultiGetEntity`, `TestIterate` (compare src iterator to a control iterator). Due to timestamps being removed, results from one API to another API is not directly comparable as it is now. More test logic to handle that need to be added, will do that in a follow up. 2) Even when comparing db state to expected state, sometimes the db can receive `InvalidArgument` too due to timestamps getting flushed and removed. Added some logic to handle that. 3) When timestamps are not persisted, we don't try to read with older timestamp. Since that's making it easier to get `InvalidArgument`. And this capability is not yet needed by our customer so it's disabled for now. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12124 Test Plan: running multiple flavor of this test on continuous run for sometime before checkin Reviewed By: ltamasi Differential Revision: D51916267 Pulled By: jowlyzhang fbshipit-source-id: 3f3eb5f9618d05d296062820e0ef5cb8edc7c2b2 --- db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_gflags.cc | 4 ++++ db_stress_tool/db_stress_test_base.cc | 21 +++++++++++++++++++++ tools/db_crashtest.py | 25 ++++++++++++++++++++++++- 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 485400e05..adbc554ab 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -313,6 +313,7 @@ DECLARE_uint32(memtable_protection_bytes_per_key); DECLARE_uint32(block_protection_bytes_per_key); DECLARE_uint64(user_timestamp_size); +DECLARE_bool(persist_user_defined_timestamps); DECLARE_string(secondary_cache_uri); DECLARE_int32(secondary_cache_fault_one_in); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index cd1c978b8..c6ffbc93e 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -1025,6 +1025,10 @@ DEFINE_uint64(user_timestamp_size, 0, "Number of bytes for a user-defined timestamp. Currently, only " "8-byte is supported"); +DEFINE_bool(persist_user_defined_timestamps, true, + "Flag to indicate whether user-defined timestamps will be persisted" + " during Flush"); + DEFINE_int32(open_metadata_write_fault_one_in, 0, "On non-zero, enables fault injection on file metadata write " "during DB reopen."); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index f2b4f50b8..e79a71127 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -430,6 +430,13 @@ Status StressTest::AssertSame(DB* db, ColumnFamilyHandle* cf, PinnableSlice v; s = db->Get(ropt, cf, snap_state.key, &v); if (!s.ok() && !s.IsNotFound()) { + // When `persist_user_defined_timestamps` is false, a repeated read with + // both a read timestamp and an explicitly taken snapshot cannot guarantee + // consistent result all the time. When it cannot return consistent result, + // it will return an `InvalidArgument` status. + if (s.IsInvalidArgument() && !FLAGS_persist_user_defined_timestamps) { + return Status::OK(); + } return s; } if (snap_state.status != s) { @@ -2668,6 +2675,8 @@ void StressTest::PrintEnv() const { static_cast(FLAGS_fail_if_options_file_error)); fprintf(stdout, "User timestamp size bytes : %d\n", static_cast(FLAGS_user_timestamp_size)); + fprintf(stdout, "Persist user defined timestamps : %d\n", + FLAGS_persist_user_defined_timestamps); fprintf(stdout, "WAL compression : %s\n", FLAGS_wal_compression.c_str()); fprintf(stdout, "Try verify sst unique id : %d\n", @@ -3087,6 +3096,11 @@ bool StressTest::MaybeUseOlderTimestampForPointLookup(ThreadState* thread, return false; } + if (!FLAGS_persist_user_defined_timestamps) { + // Not read with older timestamps to avoid get InvalidArgument. + return false; + } + assert(thread); if (!thread->rand.OneInOpt(3)) { return false; @@ -3116,6 +3130,11 @@ void StressTest::MaybeUseOlderTimestampForRangeScan(ThreadState* thread, return; } + if (!FLAGS_persist_user_defined_timestamps) { + // Not read with older timestamps to avoid get InvalidArgument. + return; + } + assert(thread); if (!thread->rand.OneInOpt(3)) { return; @@ -3175,6 +3194,8 @@ void CheckAndSetOptionsForUserTimestamp(Options& options) { exit(1); } options.comparator = cmp; + options.persist_user_defined_timestamps = + FLAGS_persist_user_defined_timestamps; } bool InitializeOptionsFromFile(Options& options) { diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 2f8b0d0fc..93f1f2427 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -458,6 +458,8 @@ def is_direct_io_supported(dbname): "test_cf_consistency": 0, "test_batches_snapshots": 0, "user_timestamp_size": 8, + # Below flag is randomly picked once and kept consistent in following runs. + "persist_user_defined_timestamps": random.choice([0, 1, 1]), "use_merge": 0, "use_full_merge_v1": 0, "use_txn": 0, @@ -551,7 +553,6 @@ def is_direct_io_supported(dbname): "create_timestamped_snapshot_one_in": 0, } - def finalize_and_sanitize(src_params): dest_params = {k: v() if callable(v) else v for (k, v) in src_params.items()} if is_release_mode(): @@ -712,6 +713,28 @@ def finalize_and_sanitize(src_params): if (dest_params["cache_size"] <= 0 or dest_params["db_write_buffer_size"] <= 0): dest_params["use_write_buffer_manager"] = 0 + if dest_params["user_timestamp_size"] > 0 and dest_params["persist_user_defined_timestamps"] == 0: + # Features that are not compatible with UDT in memtable only feature. + dest_params["delpercent"] += dest_params["delrangepercent"] + dest_params["delrangepercent"] = 0 + dest_params["enable_blob_files"] = 0 + dest_params["atomic_flush"] = 0 + dest_params["allow_concurrent_memtable_write"] = 0 + dest_params["block_protection_bytes_per_key"] = 0 + # TODO(yuzhangyu): make stress test logic handle this and enable testing + # these APIs. + # These operations need to compare side to side one operation with another. + # It's hard to guarantee their consistency because when timestamps can be + # collapsed, only operations using the same SuperVersion can be consistent + # with each other. There is no external APIs to ensure that. + dest_params["use_multiget"] = 0 + dest_params["use_multi_get_entity"] = 0 + dest_params["readpercent"] += dest_params.get("iterpercent", 10); + dest_params["iterpercent"] = 0 + # Only best efforts recovery test support disabling wal and + # disable atomic flush. + if dest_params["test_best_efforts_recovery"] == 0: + dest_params["disable_wal"] = 0 return dest_params From ebb5242d559bd48c90e7a38aa9594c4b593a7031 Mon Sep 17 00:00:00 2001 From: anand76 Date: Tue, 12 Dec 2023 10:58:00 -0800 Subject: [PATCH 327/386] Sanitize the secondary_cache option in TieredCacheOptions (#12137) Summary: Sanitize the `secondary_cache` field in the `cache_opts` option of `TieredCacheOptions` to `nullptr` if set by the user. The nvm secondary cache should be directly set in `TieredCacheOptions`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12137 Reviewed By: akankshamahajan15 Differential Revision: D52063817 Pulled By: anand1976 fbshipit-source-id: 255116c665a9b908c8f44109a2d331d4b73e7591 --- cache/secondary_cache_adapter.cc | 2 ++ include/rocksdb/cache.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index b36f3a381..dce18390e 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -683,12 +683,14 @@ std::shared_ptr NewTieredCache(const TieredCacheOptions& _opts) { *(static_cast_with_check( opts.cache_opts)); cache_opts.capacity = opts.total_capacity; + cache_opts.secondary_cache = nullptr; cache = cache_opts.MakeSharedCache(); } else if (opts.cache_type == PrimaryCacheType::kCacheTypeHCC) { HyperClockCacheOptions cache_opts = *(static_cast_with_check( opts.cache_opts)); cache_opts.capacity = opts.total_capacity; + cache_opts.secondary_cache = nullptr; cache = cache_opts.MakeSharedCache(); } else { return nullptr; diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index d3762b4a2..43a422049 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -499,6 +499,10 @@ enum TieredAdmissionPolicy { // allocations costed to the block cache, will be distributed // proportionally across both the primary and secondary. struct TieredCacheOptions { + // This should point to an instance of either LRUCacheOptions or + // HyperClockCacheOptions, depending on the cache_type. In either + // case, the capacity and secondary_cache fields in those options + // should not be set. If set, they will be ignored by NewTieredCache. ShardedCacheOptions* cache_opts = nullptr; PrimaryCacheType cache_type = PrimaryCacheType::kCacheTypeLRU; TieredAdmissionPolicy adm_policy = TieredAdmissionPolicy::kAdmPolicyAuto; From d8e47620d7e0aa6ecc939eb55e2f2518a7f030b8 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 13 Dec 2023 10:37:27 -0800 Subject: [PATCH 328/386] Speedup based on pending compaction bytes relative to data size (#12130) Summary: RocksDB self throttles per-DB compaction parallelism until it detects compaction pressure. The pressure detection based on pending compaction bytes was only comparing against the slowdown trigger (`soft_pending_compaction_bytes_limit`). Online services tend to set that extremely high to avoid stalling at all costs. Perhaps they should have set it to zero, but we never documented that zero disables stalling so I have been telling everyone to increase it for years. This PR adds pressure detection based on pending compaction bytes relative to the size of bottommost data. The size of bottommost data should be fairly stable and proportional to the logical data size Pull Request resolved: https://github.com/facebook/rocksdb/pull/12130 Reviewed By: hx235 Differential Revision: D52000746 Pulled By: ajkr fbshipit-source-id: 7e1fd170901a74c2d4a69266285e3edf6e7631c7 --- db/column_family.cc | 60 ++++++++++++++----- db/column_family_test.cc | 37 ++++++++++++ db/version_set.cc | 4 +- db/version_set.h | 6 ++ db/version_set_test.cc | 49 +++++++++++++++ .../behavior_changes/debt_based_speedup.md | 1 + 6 files changed, 139 insertions(+), 18 deletions(-) create mode 100644 unreleased_history/behavior_changes/debt_based_speedup.md diff --git a/db/column_family.cc b/db/column_family.cc index 280533993..9782cd31a 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -833,8 +833,8 @@ std::unique_ptr SetupDelay( return write_controller->GetDelayToken(write_rate); } -int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger, - int level0_slowdown_writes_trigger) { +int GetL0FileCountForCompactionSpeedup(int level0_file_num_compaction_trigger, + int level0_slowdown_writes_trigger) { // SanitizeOptions() ensures it. assert(level0_file_num_compaction_trigger <= level0_slowdown_writes_trigger); @@ -864,6 +864,36 @@ int GetL0ThresholdSpeedupCompaction(int level0_file_num_compaction_trigger, return static_cast(res); } } + +uint64_t GetPendingCompactionBytesForCompactionSpeedup( + const MutableCFOptions& mutable_cf_options, + const VersionStorageInfo* vstorage) { + // Compaction debt relatively large compared to the stable (bottommost) data + // size indicates compaction fell behind. + const uint64_t kBottommostSizeMultiplier = 8; + // Meaningful progress toward the slowdown trigger is another good indication. + const uint64_t kSlowdownTriggerDivisor = 4; + + uint64_t bottommost_files_size = 0; + for (const auto& level_and_file : vstorage->BottommostFiles()) { + bottommost_files_size += level_and_file.second->fd.GetFileSize(); + } + + // Slowdown trigger might be zero but that means compaction speedup should + // always happen (undocumented/historical), so no special treatment is needed. + uint64_t slowdown_threshold = + mutable_cf_options.soft_pending_compaction_bytes_limit / + kSlowdownTriggerDivisor; + + // Size of zero, however, should not be used to decide to speedup compaction. + if (bottommost_files_size == 0) { + return slowdown_threshold; + } + + uint64_t size_threshold = + MultiplyCheckOverflow(bottommost_files_size, kBottommostSizeMultiplier); + return std::min(size_threshold, slowdown_threshold); +} } // anonymous namespace std::pair @@ -1019,7 +1049,7 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions( } else { assert(write_stall_condition == WriteStallCondition::kNormal); if (vstorage->l0_delay_trigger_count() >= - GetL0ThresholdSpeedupCompaction( + GetL0FileCountForCompactionSpeedup( mutable_cf_options.level0_file_num_compaction_trigger, mutable_cf_options.level0_slowdown_writes_trigger)) { write_controller_token_ = @@ -1029,22 +1059,22 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions( "[%s] Increasing compaction threads because we have %d level-0 " "files ", name_.c_str(), vstorage->l0_delay_trigger_count()); - } else if (vstorage->estimated_compaction_needed_bytes() >= - mutable_cf_options.soft_pending_compaction_bytes_limit / 4) { - // Increase compaction threads if bytes needed for compaction exceeds - // 1/4 of threshold for slowing down. + } else if (mutable_cf_options.soft_pending_compaction_bytes_limit == 0) { // If soft pending compaction byte limit is not set, always speed up // compaction. write_controller_token_ = write_controller->GetCompactionPressureToken(); - if (mutable_cf_options.soft_pending_compaction_bytes_limit > 0) { - ROCKS_LOG_INFO( - ioptions_.logger, - "[%s] Increasing compaction threads because of estimated pending " - "compaction " - "bytes %" PRIu64, - name_.c_str(), vstorage->estimated_compaction_needed_bytes()); - } + } else if (vstorage->estimated_compaction_needed_bytes() >= + GetPendingCompactionBytesForCompactionSpeedup( + mutable_cf_options, vstorage)) { + write_controller_token_ = + write_controller->GetCompactionPressureToken(); + ROCKS_LOG_INFO( + ioptions_.logger, + "[%s] Increasing compaction threads because of estimated pending " + "compaction " + "bytes %" PRIu64, + name_.c_str(), vstorage->estimated_compaction_needed_bytes()); } else { write_controller_token_.reset(); } diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 25bc0b36f..edadfebf1 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -2971,6 +2971,43 @@ TEST_P(ColumnFamilyTest, CompactionSpeedupTwoColumnFamilies) { ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); } +TEST_P(ColumnFamilyTest, CompactionSpeedupForCompactionDebt) { + db_options_.max_background_compactions = 6; + Open(); + ColumnFamilyData* cfd = + static_cast(db_->DefaultColumnFamily())->cfd(); + MutableCFOptions mutable_cf_options(column_family_options_); + mutable_cf_options.soft_pending_compaction_bytes_limit = + std::numeric_limits::max(); + + { + // No bottommost data, so debt ratio cannot trigger speedup. + VersionStorageInfo* vstorage = cfd->current()->storage_info(); + vstorage->TEST_set_estimated_compaction_needed_bytes(1048576 /* 1MB */); + RecalculateWriteStallConditions(cfd, mutable_cf_options); + ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); + } + + // Add a tiny amount of bottommost data. + ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Flush(FlushOptions())); + + { + // 1MB debt is way bigger than bottommost data so definitely triggers + // speedup. + VersionStorageInfo* vstorage = cfd->current()->storage_info(); + vstorage->TEST_set_estimated_compaction_needed_bytes(1048576 /* 1MB */); + RecalculateWriteStallConditions(cfd, mutable_cf_options); + ASSERT_EQ(6, dbfull()->TEST_BGCompactionsAllowed()); + + // Eight bytes is way smaller than bottommost data so definitely does not + // trigger speedup. + vstorage->TEST_set_estimated_compaction_needed_bytes(8); + RecalculateWriteStallConditions(cfd, mutable_cf_options); + ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); + } +} + TEST_P(ColumnFamilyTest, CreateAndDestroyOptions) { std::unique_ptr cfo(new ColumnFamilyOptions()); ColumnFamilyHandle* cfh; diff --git a/db/version_set.cc b/db/version_set.cc index 926c768ca..335d492bc 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3067,9 +3067,7 @@ void VersionStorageInfo::PrepareForVersionAppend( GenerateFileIndexer(); GenerateLevelFilesBrief(); GenerateLevel0NonOverlapping(); - if (!immutable_options.allow_ingest_behind) { - GenerateBottommostFiles(); - } + GenerateBottommostFiles(); GenerateFileLocationIndex(); } diff --git a/db/version_set.h b/db/version_set.h index 6a6522ec9..5ccb69771 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -490,6 +490,12 @@ class VersionStorageInfo { files_marked_for_periodic_compaction_.emplace_back(level, f); } + // REQUIRES: PrepareForVersionAppend has been called + const autovector>& BottommostFiles() const { + assert(finalized_); + return bottommost_files_; + } + // REQUIRES: ComputeCompactionScore has been called // REQUIRES: DB mutex held during access const autovector>& diff --git a/db/version_set_test.cc b/db/version_set_test.cc index f925ec36e..5def22925 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -520,6 +520,55 @@ TEST_F(VersionStorageInfoTest, EstimateLiveDataSize2) { ASSERT_EQ(4U, vstorage_.EstimateLiveDataSize()); } +TEST_F(VersionStorageInfoTest, SingleLevelBottommostData) { + // In case of a single level, the oldest L0 file is bottommost. This could be + // improved in case the L0 files cover disjoint key-ranges. + Add(0 /* level */, 1U /* file_number */, "A" /* smallest */, + "Z" /* largest */, 1U /* file_size */); + Add(0 /* level */, 2U /* file_number */, "A" /* smallest */, + "Z" /* largest */, 1U /* file_size */); + Add(0 /* level */, 3U /* file_number */, "0" /* smallest */, + "9" /* largest */, 1U /* file_size */); + + UpdateVersionStorageInfo(); + + ASSERT_EQ(1, vstorage_.BottommostFiles().size()); + ASSERT_EQ(0, vstorage_.BottommostFiles()[0].first); + ASSERT_EQ(3U, vstorage_.BottommostFiles()[0].second->fd.GetNumber()); +} + +TEST_F(VersionStorageInfoTest, MultiLevelBottommostData) { + // In case of multiple levels, the oldest file for a key-range from each L1+ + // level is bottommost. This could be improved in case an L0 file contains the + // oldest data for some range of keys. + Add(0 /* level */, 1U /* file_number */, "A" /* smallest */, + "Z" /* largest */, 1U /* file_size */); + Add(0 /* level */, 2U /* file_number */, "0" /* smallest */, + "9" /* largest */, 1U /* file_size */); + Add(1 /* level */, 3U /* file_number */, "A" /* smallest */, + "D" /* largest */, 1U /* file_size */); + Add(2 /* level */, 4U /* file_number */, "E" /* smallest */, + "H" /* largest */, 1U /* file_size */); + Add(2 /* level */, 5U /* file_number */, "I" /* smallest */, + "L" /* largest */, 1U /* file_size */); + + UpdateVersionStorageInfo(); + + autovector> bottommost_files = + vstorage_.BottommostFiles(); + std::sort(bottommost_files.begin(), bottommost_files.end(), + [](const std::pair& lhs, + const std::pair& rhs) { + assert(lhs.second); + assert(rhs.second); + return lhs.second->fd.GetNumber() < rhs.second->fd.GetNumber(); + }); + ASSERT_EQ(3, bottommost_files.size()); + ASSERT_EQ(3U, bottommost_files[0].second->fd.GetNumber()); + ASSERT_EQ(4U, bottommost_files[1].second->fd.GetNumber()); + ASSERT_EQ(5U, bottommost_files[2].second->fd.GetNumber()); +} + TEST_F(VersionStorageInfoTest, GetOverlappingInputs) { // Two files that overlap at the range deletion tombstone sentinel. Add(1, 1U, {"a", 0, kTypeValue}, diff --git a/unreleased_history/behavior_changes/debt_based_speedup.md b/unreleased_history/behavior_changes/debt_based_speedup.md new file mode 100644 index 000000000..8db8b6688 --- /dev/null +++ b/unreleased_history/behavior_changes/debt_based_speedup.md @@ -0,0 +1 @@ +Compactions can be scheduled in parallel in an additional scenario: high compaction debt relative to the data size From d926593df527bd5e93aca8b85b9c9d4ca6268386 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Wed, 13 Dec 2023 12:15:04 -0800 Subject: [PATCH 329/386] Fix stress tests failure for auto_readahead_size (#12131) Summary: When auto_readahead_size is enabled, Prev operation calls SeekForPrev in db_iter so that - BlockBasedTableIterator can point index_iter_ to the right block. - disable readahead_cache_lookup. However, there can be cases where SeekForPrev might not go through Version_set and call BlockBasedTableIterator SeekForPrev. In that case, when BlockBasedTableIterator::Prev is called, it returns NotSupported error. This more like a corner case. So to handle that case, removed SeekForPrev calling from db_iter and reseeking index_iter_ in Prev operation. block_iter_'s key already point to right block. So reseeking to index_iter_ solves the issue. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12131 Test Plan: - Tested on db_stress command that was failing - `./db_stress --acquire_snapshot_one_in=10000 --adaptive_readahead=1 --allow_data_in_errors=True --async_io=0 --atomic_flush=0 --auto_readahead_size=1 --avoid_flush_during_recovery=0 --avoid_unnecessary_blocking_io=1 --backup_max_size=104857600 --backup_one_in=100000 --batch_protection_bytes_per_key=0 --best_efforts_recovery=1 --block_protection_bytes_per_key=1 --block_size=16384 --bloom_before_level=2147483646 --bloom_bits=12 --bottommost_compression_type=none --bottommost_file_compaction_delay=0 --bytes_per_sync=262144 --cache_index_and_filter_blocks=0 --cache_size=33554432 --cache_type=lru_cache --charge_compression_dictionary_building_buffer=1 --charge_file_metadata=0 --charge_filter_construction=1 --charge_table_reader=1 --checkpoint_one_in=1000000 --checksum_type=kxxHash64 --clear_column_family_one_in=0 --column_families=1 --compact_files_one_in=1000000 --compact_range_one_in=1000000 --compaction_pri=4 --compaction_readahead_size=1048576 --compaction_ttl=10 --compressed_secondary_cache_size=16777216 --compression_checksum=0 --compression_max_dict_buffer_bytes=0 --compression_max_dict_bytes=0 --compression_parallel_threads=1 --compression_type=zlib --compression_use_zstd_dict_trainer=0 --compression_zstd_max_train_bytes=0 --continuous_verification_interval=0 --data_block_index_type=1 --db=/home/akankshamahajan/rocksdb_auto_tune/dev/shm/rocksdb_test/rocksdb_crashtest_blackbox --db_write_buffer_size=134217728 --delpercent=4 --delrangepercent=1 --destroy_db_initially=0 --detect_filter_construct_corruption=1 --disable_wal=1 --enable_compaction_filter=0 --enable_pipelined_write=0 --enable_thread_tracking=1 --expected_values_dir=/home/akankshamahajan/rocksdb_auto_tune/dev/shm/rocksdb_test/rocksdb_crashtest_expected --fail_if_options_file_error=1 --fifo_allow_compaction=1 --file_checksum_impl=big --flush_one_in=1000000 --format_version=6 --get_current_wal_file_one_in=0 --get_live_files_one_in=1000000 --get_property_one_in=1000000 --get_sorted_wal_files_one_in=0 --index_block_restart_interval=10 --index_type=0 --ingest_external_file_one_in=0 --initial_auto_readahead_size=0 --iterpercent=10 --key_len_percent_dist=1,30,69 --level_compaction_dynamic_level_bytes=1 --lock_wal_one_in=1000000 --long_running_snapshots=1 --manual_wal_flush_one_in=0 --mark_for_compaction_one_file_in=0 --max_auto_readahead_size=524288 --max_background_compactions=20 --max_bytes_for_level_base=10485760 --max_key=25000000 --max_key_len=3 --max_manifest_file_size=1073741824 --max_write_batch_group_size_bytes=16 --max_write_buffer_number=3 --max_write_buffer_size_to_maintain=4194304 --memtable_max_range_deletions=1000 --memtable_prefix_bloom_size_ratio=0 --memtable_protection_bytes_per_key=2 --memtable_whole_key_filtering=0 --memtablerep=skip_list --min_write_buffer_number_to_merge=1 --mmap_read=1 --mock_direct_io=False --nooverwritepercent=1 --num_file_reads_for_auto_readahead=1 --open_files=-1 --open_metadata_write_fault_one_in=0 --open_read_fault_one_in=0 --open_write_fault_one_in=0 --ops_per_thread=100000000 --optimize_filters_for_memory=1 --paranoid_file_checks=1 --partition_filters=0 --partition_pinning=1 --pause_background_one_in=1000000 --periodic_compaction_seconds=10 --prefix_size=-1 --prefixpercent=0 --prepopulate_block_cache=0 --preserve_internal_time_seconds=0 --progress_reports=0 --read_fault_one_in=1000 --readahead_size=524288 --readpercent=50 --recycle_log_file_num=0 --reopen=0 --secondary_cache_fault_one_in=0 --secondary_cache_uri= --set_options_one_in=10000 --skip_verifydb=1 --snapshot_hold_ops=100000 --sst_file_manager_bytes_per_sec=0 --sst_file_manager_bytes_per_truncate=0 --stats_dump_period_sec=0 --subcompactions=2 --sync=0 --sync_fault_injection=0 --target_file_size_base=2097152 --target_file_size_multiplier=2 --test_batches_snapshots=0 --top_level_index_pinning=3 --unpartitioned_pinning=3 --use_direct_io_for_flush_and_compaction=0 --use_direct_reads=0 --use_full_merge_v1=0 --use_get_entity=1 --use_merge=1 --use_multi_get_entity=0 --use_multiget=1 --use_put_entity_one_in=10 --use_write_buffer_manager=0 --user_timestamp_size=0 --value_size_mult=32 --verification_only=0 --verify_checksum=1 --verify_checksum_one_in=1000000 --verify_db_one_in=0 --verify_file_checksums_one_in=1000000 --verify_iterator_with_expected_state_one_in=5 --verify_sst_unique_id_in_manifest=1 --wal_bytes_per_sync=0 --wal_compression=zstd --write_buffer_size=4194304 --write_dbid_to_manifest=0 --write_fault_one_in=0 --writepercent=35` - make crash_test -j32 Reviewed By: anand1976 Differential Revision: D51986326 Pulled By: akankshamahajan15 fbshipit-source-id: 90e11e63d1f1894770b457a44d8b213ae5512df9 --- db/db_iter.cc | 16 +++------- db/db_iter.h | 1 - include/rocksdb/options.h | 8 ++--- .../block_based/block_based_table_iterator.cc | 29 +++++++++++++++---- .../block_based/block_based_table_iterator.h | 1 - .../bug_fixes/auto_readahead_size.md | 1 + 6 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 unreleased_history/bug_fixes/auto_readahead_size.md diff --git a/db/db_iter.cc b/db/db_iter.cc index 991ec8fc4..507bb2577 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -83,8 +83,7 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options, cfd_(cfd), timestamp_ub_(read_options.timestamp), timestamp_lb_(read_options.iter_start_ts), - timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0), - auto_readahead_size_(read_options.auto_readahead_size) { + timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0) { RecordTick(statistics_, NO_ITERATOR_CREATED); if (pin_thru_lifetime_) { pinned_iters_mgr_.StartPinning(); @@ -747,22 +746,15 @@ bool DBIter::ReverseToBackward() { // When current_entry_is_merged_ is true, iter_ may be positioned on the next // key, which may not exist or may have prefix different from current. // If that's the case, seek to saved_key_. - // - // In case of auto_readahead_size enabled, index_iter moves forward during - // forward scan for block cache lookup and points to different block. If Prev - // op is called, it needs to call SeekForPrev to point to right index_iter_ in - // BlockBasedTableIterator. This only happens when direction is changed from - // forward to backward. - if ((current_entry_is_merged_ && - (!expect_total_order_inner_iter() || !iter_.Valid())) || - auto_readahead_size_) { + if (current_entry_is_merged_ && + (!expect_total_order_inner_iter() || !iter_.Valid())) { IterKey last_key; // Using kMaxSequenceNumber and kValueTypeForSeek // (not kValueTypeForSeekForPrev) to seek to a key strictly smaller // than saved_key_. last_key.SetInternalKey(ParsedInternalKey( saved_key_.GetUserKey(), kMaxSequenceNumber, kValueTypeForSeek)); - if (!expect_total_order_inner_iter() || auto_readahead_size_) { + if (!expect_total_order_inner_iter()) { iter_.SeekForPrev(last_key.GetInternalKey()); } else { // Some iterators may not support SeekForPrev(), so we avoid using it diff --git a/db/db_iter.h b/db/db_iter.h index ac6487802..5022405c3 100644 --- a/db/db_iter.h +++ b/db/db_iter.h @@ -402,7 +402,6 @@ class DBIter final : public Iterator { const Slice* const timestamp_lb_; const size_t timestamp_size_; std::string saved_timestamp_; - bool auto_readahead_size_; }; // Return a new iterator that converts internal keys (yielded by diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index be4eb8fba..415f5a70d 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1744,13 +1744,9 @@ struct ReadOptions { // For this feature to enabled, iterate_upper_bound must also be specified. // // NOTE: - Recommended for forward Scans only. - // - In case of backward scans like Prev or SeekForPrev, the - // cost of these backward operations might increase and affect the - // performace. So this option should not be enabled if workload - // contains backward scans. // - If there is a backward scans, this option will be - // disabled internally and won't be reset if forward scan is done - // again. + // disabled internally and won't be enabled again if the forward scan + // is issued again. // // Default: false bool auto_readahead_size = false; diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index f1b95fb35..93fdd1d61 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -303,12 +303,29 @@ bool BlockBasedTableIterator::NextAndGetResult(IterateResult* result) { } void BlockBasedTableIterator::Prev() { - // Return Error. - if (readahead_cache_lookup_) { - block_iter_.Invalidate( - Status::NotSupported("auto tuning of readahead_size in is not " - "supported with Prev operation.")); - return; + if (readahead_cache_lookup_ && !IsIndexAtCurr()) { + // In case of readahead_cache_lookup_, index_iter_ has moved forward. So we + // need to reseek the index_iter_ to point to current block by using + // block_iter_'s key. + if (Valid()) { + ResetBlockCacheLookupVar(); + direction_ = IterDirection::kBackward; + Slice last_key = key(); + + index_iter_->Seek(last_key); + is_index_at_curr_block_ = true; + + // Check for IO error. + if (!index_iter_->Valid()) { + ResetDataIter(); + return; + } + } + + if (!Valid()) { + ResetDataIter(); + return; + } } ResetBlockCacheLookupVar(); diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index 77fe6aa6d..84c83014c 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -411,7 +411,6 @@ class BlockBasedTableIterator : public InternalIteratorBase { uint64_t& start_updated_offset, uint64_t& end_updated_offset, size_t& prev_handles_size); - // *** END APIs relevant to auto tuning of readahead_size *** }; } // namespace ROCKSDB_NAMESPACE diff --git a/unreleased_history/bug_fixes/auto_readahead_size.md b/unreleased_history/bug_fixes/auto_readahead_size.md new file mode 100644 index 000000000..e82973771 --- /dev/null +++ b/unreleased_history/bug_fixes/auto_readahead_size.md @@ -0,0 +1 @@ +Fix a corner case with auto_readahead_size where Prev Operation returns NOT SUPPORTED error when scans direction is changed from forward to backward. From c74531b1d291985d3c1c6d074a771d81cab48658 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 13 Dec 2023 15:58:46 -0800 Subject: [PATCH 330/386] Fix a nuisance compiler warning from clang (#12144) Summary: Example: ``` cache/clock_cache.cc:56:7: error: fallthrough annotation in unreachable code [-Werror,-Wimplicit-fallthrough] FALLTHROUGH_INTENDED; ^ ./port/lang.h:10:30: note: expanded from macro 'FALLTHROUGH_INTENDED' ^ ``` In clang < 14, this is annoyingly generated from -Wimplicit-fallthrough, but was changed to -Wunreachable-code-fallthrough (implied by -Wunreachable-code) in clang 14. See https://reviews.llvm.org/D107933 for how this nuisance pattern generated false positives similar to ours in the Linux kernel. Just to underscore the ridiculousness of this warning, here an error is reported on the annotation, not the call to do_something(), depending on the constexpr value (https://godbolt.org/z/EvxqdPTdr): ``` #include void do_something(); void test(int v) { switch (v) { case 1: if constexpr (std::atomic::is_always_lock_free) { return; } else { do_something(); [[fallthrough]]; } case 2: return; } } ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/12144 Test Plan: Added the warning to our Makefile for USE_CLANG, which reproduced the warning-as-error as shown above, but is now fixed. Reviewed By: jaykorean Differential Revision: D52139615 Pulled By: pdillinger fbshipit-source-id: ba967ae700c0916d1a478bc465cf917633e337d9 --- Makefile | 2 +- cache/clock_cache.cc | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 50dddc976..d9ebbb72b 100644 --- a/Makefile +++ b/Makefile @@ -539,7 +539,7 @@ endif ifdef USE_CLANG # Used by some teams in Facebook - WARNING_FLAGS += -Wshift-sign-overflow -Wambiguous-reversed-operator + WARNING_FLAGS += -Wshift-sign-overflow -Wambiguous-reversed-operator -Wimplicit-fallthrough endif ifeq ($(PLATFORM), OS_OPENBSD) diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index fd330d90d..e37c03fb5 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -51,14 +51,15 @@ inline uint64_t GetInitialCountdown(Cache::Priority priority) { switch (priority) { case Cache::Priority::HIGH: return ClockHandle::kHighCountdown; - default: - assert(false); - FALLTHROUGH_INTENDED; case Cache::Priority::LOW: return ClockHandle::kLowCountdown; case Cache::Priority::BOTTOM: return ClockHandle::kBottomCountdown; } + // Switch should have been exhaustive. + assert(false); + // For release build, fall back on something reasonable. + return ClockHandle::kLowCountdown; } inline void MarkEmpty(ClockHandle& h) { From cd21e4e69d76ec4ec3b080c8cdae016ac2309cc5 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 13 Dec 2023 17:34:18 -0800 Subject: [PATCH 331/386] Some further cleanup in WriteBatchWithIndex::MultiGetFromBatchAndDB (#12143) Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/12143 https://github.com/facebook/rocksdb/pull/11982 changed `WriteBatchWithIndex::MultiGetFromBatchDB` to preallocate space in the `autovector`s `key_contexts` and `merges` in order to prevent any reallocations, both as an optimization and in order to prevent pointers into the container from being invalidated during subsequent insertions. On second thought, this preallocation can actually be a pessimization in cases when only a small subset of keys require querying the underlying database. To prevent any memory regressions, the PR reverts this preallocation. In addition, it makes some small code hygiene improvements like incorporating the `PinnableWideColumns` object into `MergeTuple`. Reviewed By: jaykorean Differential Revision: D52136513 fbshipit-source-id: 21aa835084433feab27b501d9d1fc5434acea609 --- .../write_batch_with_index.cc | 74 ++++++++++++------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index f969f6067..d5a2f0351 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -669,27 +669,41 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( return; } - autovector existing; - existing.reserve(num_keys); + struct MergeTuple { + MergeTuple(const Slice& _key, Status* _s, MergeContext&& _merge_context, + PinnableSlice* _value) + : key(_key), + s(_s), + merge_context(std::move(_merge_context)), + value(_value) { + assert(s); + assert(value); + } - autovector key_contexts; - key_contexts.reserve(num_keys); + Slice key; + Status* s; + PinnableWideColumns existing; + MergeContext merge_context; + PinnableSlice* value; + }; - using MergeTuple = std::tuple; autovector merges; - merges.reserve(num_keys); + + autovector key_contexts; // Since the lifetime of the WriteBatch is the same as that of the transaction // we cannot pin it as otherwise the returned value will not be available // after the transaction finishes. for (size_t i = 0; i < num_keys; ++i) { + const Slice& key = keys[i]; MergeContext merge_context; std::string batch_value; - Status* s = &statuses[i]; - PinnableSlice* pinnable_val = &values[i]; - pinnable_val->Reset(); + Status* const s = &statuses[i]; auto result = WriteBatchWithIndexInternal::GetFromBatch( - this, column_family, keys[i], &merge_context, &batch_value, s); + this, column_family, key, &merge_context, &batch_value, s); + + PinnableSlice* const pinnable_val = &values[i]; + pinnable_val->Reset(); if (result == WBWIIteratorImpl::kFound) { *pinnable_val->GetSelf() = std::move(batch_value); @@ -708,27 +722,35 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( // Note: we have to retrieve all columns if we have to merge KVs from the // batch and the DB; otherwise, the default column is sufficient. + // The columns field will be populated by the loop below to prevent issues + // with dangling pointers. if (result == WBWIIteratorImpl::kMergeInProgress) { - existing.emplace_back(); - key_contexts.emplace_back(column_family, keys[i], /* value */ nullptr, - &existing.back(), /* timestamp */ nullptr, - &statuses[i]); - merges.emplace_back(&key_contexts.back(), std::move(merge_context), - pinnable_val); + merges.emplace_back(key, s, std::move(merge_context), pinnable_val); + key_contexts.emplace_back(column_family, key, /* value */ nullptr, + /* columns */ nullptr, /* timestamp */ nullptr, + s); continue; } assert(result == WBWIIteratorImpl::kNotFound); - key_contexts.emplace_back(column_family, keys[i], pinnable_val, + key_contexts.emplace_back(column_family, key, pinnable_val, /* columns */ nullptr, - /* timestamp */ nullptr, &statuses[i]); + /* timestamp */ nullptr, s); } autovector sorted_keys; sorted_keys.reserve(key_contexts.size()); - for (KeyContext& key : key_contexts) { - sorted_keys.emplace_back(&key); + size_t merges_idx = 0; + for (KeyContext& key_context : key_contexts) { + if (!key_context.value) { + assert(*key_context.key == merges[merges_idx].key); + + key_context.columns = &merges[merges_idx].existing; + ++merges_idx; + } + + sorted_keys.emplace_back(&key_context); } // Did not find key in batch OR could not resolve Merges. Try DB. @@ -738,14 +760,10 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( ->MultiGetWithCallback(read_options, column_family, callback, &sorted_keys); - for (auto iter = merges.begin(); iter != merges.end(); ++iter) { - auto& [key_context, merge_context, value] = *iter; - - if (key_context->s->ok() || - key_context->s->IsNotFound()) { // DB lookup succeeded - MergeAcrossBatchAndDB(column_family, *key_context->key, - *key_context->columns, merge_context, value, - key_context->s); + for (const auto& merge : merges) { + if (merge.s->ok() || merge.s->IsNotFound()) { // DB lookup succeeded + MergeAcrossBatchAndDB(column_family, merge.key, merge.existing, + merge.merge_context, merge.value, merge.s); } } } From e7c6259447df84c96f4dc555f8d4ccefcc0a54f2 Mon Sep 17 00:00:00 2001 From: akankshamahajan Date: Thu, 14 Dec 2023 11:25:51 -0800 Subject: [PATCH 332/386] Make auto_readahead_size default true (#12080) Summary: Make auto_readahead_size option default true Pull Request resolved: https://github.com/facebook/rocksdb/pull/12080 Test Plan: benchmarks and exisiting tests Reviewed By: anand1976 Differential Revision: D52152132 Pulled By: akankshamahajan15 fbshipit-source-id: f1515563564e77df457dff2e865e4ede8c3ddf44 --- file/file_prefetch_buffer.cc | 22 +- file/file_prefetch_buffer.h | 39 +- file/prefetch_test.cc | 386 +----------------- include/rocksdb/options.h | 6 +- .../block_based/block_based_table_iterator.cc | 49 --- .../block_based/block_based_table_iterator.h | 1 - table/block_based/block_based_table_reader.cc | 2 +- table/block_based/block_based_table_reader.h | 11 +- table/block_based/block_prefetcher.cc | 14 +- table/block_based/block_prefetcher.h | 11 - table/block_based/partitioned_filter_block.cc | 2 +- table/block_based/partitioned_index_reader.cc | 2 +- table/table_test.cc | 30 ++ .../behavior_changes/auto_readahead_size.md | 1 + 14 files changed, 69 insertions(+), 507 deletions(-) create mode 100644 unreleased_history/behavior_changes/auto_readahead_size.md diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index d392537b9..2bd8c4a85 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -356,6 +356,7 @@ void FilePrefetchBuffer::ReadAheadSizeTuning( uint64_t updated_end_offset = Roundup(start_offset + length + readahead_size, alignment); uint64_t initial_end_offset = updated_end_offset; + uint64_t initial_start_offset = updated_start_offset; // Callback to tune the start and end offsets. if (readaheadsize_cb_ != nullptr && readahead_size > 0) { @@ -365,6 +366,8 @@ void FilePrefetchBuffer::ReadAheadSizeTuning( // read_len will be 0 and there is nothing to read/prefetch. if (updated_start_offset == updated_end_offset) { + UpdateReadAheadTrimmedStat((initial_end_offset - initial_start_offset), + (updated_end_offset - updated_start_offset)); return; } @@ -377,6 +380,8 @@ void FilePrefetchBuffer::ReadAheadSizeTuning( // means data has been already prefetched. if (updated_end_offset <= prev_buf_end_offset) { start_offset = end_offset = prev_buf_end_offset; + UpdateReadAheadTrimmedStat((initial_end_offset - initial_start_offset), + (end_offset - start_offset)); return; } } @@ -404,6 +409,9 @@ void FilePrefetchBuffer::ReadAheadSizeTuning( // offset of next prefetch. bufs_[index].initial_end_offset_ = initial_end_offset; read_len = static_cast(roundup_len - chunk_len); + + UpdateReadAheadTrimmedStat((initial_end_offset - initial_start_offset), + (end_offset - start_offset)); } Status FilePrefetchBuffer::HandleOverlappingData( @@ -449,8 +457,7 @@ Status FilePrefetchBuffer::HandleOverlappingData( uint64_t start_offset = bufs_[second].initial_end_offset_; // Second buffer might be out of bound if first buffer already prefetched // that data. - if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size && - !IsOffsetOutOfBound(start_offset)) { + if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size) { size_t read_len = 0; uint64_t end_offset = start_offset, chunk_len = 0; @@ -635,9 +642,6 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, // prefetching. uint64_t start_offset2 = bufs_[curr_].initial_end_offset_; - // Second buffer might be out of bound if first buffer already prefetched - // that data. - if (!IsOffsetOutOfBound(start_offset2)) { // Find updated readahead size after tuning size_t read_len2 = 0; uint64_t end_offset2 = start_offset2, chunk_len2 = 0; @@ -653,7 +657,6 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(const IOOptions& opts, bufs_[second].ClearBuffer(); return s; } - } } } @@ -737,7 +740,6 @@ bool FilePrefetchBuffer::TryReadFromCacheUntracked( return false; } } - UpdateReadAheadSizeForUpperBound(offset, n); s = Prefetch(opts, reader, offset, n + readahead_size_); } if (!s.ok()) { @@ -837,8 +839,6 @@ bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked( } } - UpdateReadAheadSizeForUpperBound(offset, n); - // Prefetch n + readahead_size_/2 synchronously as remaining // readahead_size_/2 will be prefetched asynchronously. s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2, @@ -919,7 +919,6 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, explicit_prefetch_submitted_ = false; bool is_eligible_for_prefetching = false; - UpdateReadAheadSizeForUpperBound(offset, n); if (readahead_size_ > 0 && (!implicit_auto_readahead_ || num_file_reads_ >= num_file_reads_for_auto_readahead_)) { @@ -1014,14 +1013,13 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, start_offset2 = bufs_[curr_].initial_end_offset_; // Second buffer might be out of bound if first buffer already prefetched // that data. - if (!IsOffsetOutOfBound(start_offset2)) { + uint64_t end_offset2 = start_offset2, chunk_len2 = 0; ReadAheadSizeTuning(/*read_curr_block=*/false, /*refit_tail=*/false, /*prev_buf_end_offset=*/end_offset1, second, alignment, /*length=*/0, readahead_size, start_offset2, end_offset2, read_len2, chunk_len2); - } } if (read_len1) { diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index b0aa1f1c6..0c6ba7e66 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -105,8 +105,7 @@ class FilePrefetchBuffer { size_t readahead_size = 0, size_t max_readahead_size = 0, bool enable = true, bool track_min_offset = false, bool implicit_auto_readahead = false, uint64_t num_file_reads = 0, - uint64_t num_file_reads_for_auto_readahead = 0, - uint64_t upper_bound_offset = 0, FileSystem* fs = nullptr, + uint64_t num_file_reads_for_auto_readahead = 0, FileSystem* fs = nullptr, SystemClock* clock = nullptr, Statistics* stats = nullptr, const std::function& cb = nullptr, FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) @@ -127,7 +126,6 @@ class FilePrefetchBuffer { clock_(clock), stats_(stats), usage_(usage), - upper_bound_offset_(upper_bound_offset), readaheadsize_cb_(cb) { assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) || (num_file_reads_ == 0)); @@ -296,11 +294,6 @@ class FilePrefetchBuffer { // Callback function passed to underlying FS in case of asynchronous reads. void PrefetchAsyncCallback(const FSReadRequest& req, void* cb_arg); - void ResetUpperBoundOffset(uint64_t upper_bound_offset) { - upper_bound_offset_ = upper_bound_offset; - readahead_size_ = initial_auto_readahead_size_; - } - void TEST_GetBufferOffsetandSize(uint32_t index, uint64_t& offset, size_t& len) { offset = bufs_[index].offset_; @@ -452,25 +445,6 @@ class FilePrefetchBuffer { uint64_t offset, size_t n, Slice* result, Status* status); - void UpdateReadAheadSizeForUpperBound(uint64_t offset, size_t n) { - // Adjust readhahead_size till upper_bound if upper_bound_offset_ is - // set. - if (readahead_size_ > 0 && upper_bound_offset_ > 0 && - upper_bound_offset_ > offset) { - if (upper_bound_offset_ < offset + n + readahead_size_) { - readahead_size_ = (upper_bound_offset_ - offset) - n; - RecordTick(stats_, READAHEAD_TRIMMED); - } - } - } - - inline bool IsOffsetOutOfBound(uint64_t offset) { - if (upper_bound_offset_ > 0) { - return (offset >= upper_bound_offset_); - } - return false; - } - void ReadAheadSizeTuning(bool read_curr_block, bool refit_tail, uint64_t prev_buf_end_offset, uint32_t index, size_t alignment, size_t length, @@ -487,6 +461,13 @@ class FilePrefetchBuffer { } } + void UpdateReadAheadTrimmedStat(size_t initial_length, + size_t updated_length) { + if (initial_length != updated_length) { + RecordTick(stats_, READAHEAD_TRIMMED); + } + } + std::vector bufs_; // curr_ represents the index for bufs_ indicating which buffer is being // consumed currently. @@ -529,10 +510,6 @@ class FilePrefetchBuffer { FilePrefetchBufferUsage usage_; - // upper_bound_offset_ is set when ReadOptions.iterate_upper_bound and - // ReadOptions.auto_readahead_size are set to trim readahead_size upto - // upper_bound_offset_ during prefetching. - uint64_t upper_bound_offset_ = 0; std::function readaheadsize_cb_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 2c0e8817a..b97564129 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -1355,10 +1355,6 @@ TEST_P(PrefetchTest, PrefetchWithBlockLookupAutoTuneTest) { cmp_iter->Next(); } - uint64_t readahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_GT(readahead_trimmed, 0); - ASSERT_OK(cmp_iter->status()); ASSERT_OK(iter->status()); } @@ -1385,10 +1381,6 @@ TEST_P(PrefetchTest, PrefetchWithBlockLookupAutoTuneTest) { cmp_iter->Next(); } - uint64_t readahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_GT(readahead_trimmed, 0); - ASSERT_OK(cmp_iter->status()); ASSERT_OK(iter->status()); } @@ -2357,314 +2349,6 @@ TEST_P(PrefetchTest1, SeekParallelizationTest) { Close(); } -// This test checks if readahead_size is trimmed when upper_bound is reached. -// It tests with different combinations of async_io disabled/enabled, -// readahead_size (implicit and explicit), and num_file_reads_for_auto_readahead -// from 0 to 2. -TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBound) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - - // First param is if the mockFS support_prefetch or not - std::shared_ptr fs = - std::make_shared(FileSystem::Default(), false); - - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - Options options; - SetGenericOptions(env.get(), /*use_direct_io=*/false, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - ASSERT_OK(s); - - Random rnd(309); - WriteBatch batch; - - for (int i = 0; i < 26; i++) { - std::string key = "my_key_"; - - for (int j = 0; j < 10; j++) { - key += char('a' + i); - ASSERT_OK(batch.Put(key, rnd.RandomString(1000))); - } - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = "my_key_a"; - - std::string end_key = "my_key_"; - for (int j = 0; j < 10; j++) { - end_key += char('a' + 25); - } - - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - int buff_prefetch_count = 0; - - // Try with different num_file_reads_for_auto_readahead from 0 to 3. - for (size_t i = 0; i < 3; i++) { - table_options.num_file_reads_for_auto_readahead = i; - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - s = TryReopen(options); - ASSERT_OK(s); - - int buff_count_with_tuning = 0, buff_count_without_tuning = 0; - int keys_with_tuning = 0, keys_without_tuning = 0; - int reseek_keys_with_tuning = 0, reseek_keys_without_tuning = 0; - buff_prefetch_count = 0; - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::Prefetch:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_prefetch_count++; }); - - SyncPoint::GetInstance()->EnableProcessing(); - - ReadOptions ropts; - if (std::get<0>(GetParam())) { - ropts.readahead_size = 32768; - } - if (std::get<1>(GetParam())) { - ropts.async_io = true; - } - - // With tuning readahead_size. - { - ASSERT_OK(options.statistics->Reset()); - Slice ub = Slice("my_key_uuu"); - Slice* ub_ptr = &ub; - ropts.iterate_upper_bound = ub_ptr; - ropts.auto_readahead_size = true; - - auto iter = std::unique_ptr(db_->NewIterator(ropts)); - - // Seek. - { - Slice seek_key = Slice("my_key_aaa"); - iter->Seek(seek_key); - - while (iter->Valid()) { - keys_with_tuning++; - iter->Next(); - } - - uint64_t readahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_GT(readahead_trimmed, 0); - buff_count_with_tuning = buff_prefetch_count; - } - - // Reseek with new upper_bound_iterator. - { - ub = Slice("my_key_y"); - Slice reseek_key = Slice("my_key_v"); - iter->Seek(reseek_key); - - while (iter->Valid()) { - iter->Next(); - reseek_keys_with_tuning++; - } - ASSERT_OK(iter->status()); - - uint64_t readahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_GT(readahead_trimmed, 0); - ASSERT_GT(reseek_keys_with_tuning, 0); - } - } - - // Without tuning readahead_size - { - Slice ub = Slice("my_key_uuu"); - Slice* ub_ptr = &ub; - ropts.iterate_upper_bound = ub_ptr; - buff_prefetch_count = 0; - ASSERT_OK(options.statistics->Reset()); - ropts.auto_readahead_size = false; - - auto iter = std::unique_ptr(db_->NewIterator(ropts)); - - // Seek. - { - Slice seek_key = Slice("my_key_aaa"); - iter->Seek(seek_key); - - while (iter->Valid()) { - keys_without_tuning++; - iter->Next(); - } - buff_count_without_tuning = buff_prefetch_count; - uint64_t readahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_EQ(readahead_trimmed, 0); - } - - // Reseek with new upper_bound_iterator. - { - ub = Slice("my_key_y"); - Slice reseek_key = Slice("my_key_v"); - iter->Seek(reseek_key); - - while (iter->Valid()) { - iter->Next(); - reseek_keys_without_tuning++; - } - ASSERT_OK(iter->status()); - - uint64_t readahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - ASSERT_EQ(readahead_trimmed, 0); - ASSERT_GT(reseek_keys_without_tuning, 0); - } - } - - { - // Verify results with and without tuning. - if (std::get<1>(GetParam())) { - // In case of async_io. - ASSERT_GE(buff_count_with_tuning, buff_count_without_tuning); - } else { - ASSERT_EQ(buff_count_without_tuning, buff_count_with_tuning); - } - // Prefetching should happen. - ASSERT_GT(buff_count_without_tuning, 0); - ASSERT_GT(buff_count_with_tuning, 0); - // No of keys should be equal. - ASSERT_EQ(keys_without_tuning, keys_with_tuning); - // No of keys after reseek with new upper bound should be equal. - ASSERT_EQ(reseek_keys_without_tuning, reseek_keys_with_tuning); - } - Close(); - } -} - -// This test checks if readahead_size is trimmed when upper_bound is reached -// during Seek in async_io and it goes for polling without any extra -// prefetching. -TEST_P(PrefetchTest, IterReadAheadSizeWithUpperBoundSeekOnly) { - if (mem_env_ || encrypted_env_) { - ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment"); - return; - } - - // First param is if the mockFS support_prefetch or not - std::shared_ptr fs = - std::make_shared(FileSystem::Default(), false); - - bool use_direct_io = false; - if (std::get<0>(GetParam())) { - use_direct_io = true; - } - - std::unique_ptr env(new CompositeEnvWrapper(env_, fs)); - Options options; - SetGenericOptions(env.get(), use_direct_io, options); - options.statistics = CreateDBStatistics(); - BlockBasedTableOptions table_options; - SetBlockBasedTableOptions(table_options); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - Status s = TryReopen(options); - if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) { - // If direct IO is not supported, skip the test - return; - } else { - ASSERT_OK(s); - } - - Random rnd(309); - WriteBatch batch; - - for (int i = 0; i < 26; i++) { - std::string key = "my_key_"; - - for (int j = 0; j < 10; j++) { - key += char('a' + i); - ASSERT_OK(batch.Put(key, rnd.RandomString(1000))); - } - } - ASSERT_OK(db_->Write(WriteOptions(), &batch)); - - std::string start_key = "my_key_a"; - - std::string end_key = "my_key_"; - for (int j = 0; j < 10; j++) { - end_key += char('a' + 25); - } - - Slice least(start_key.data(), start_key.size()); - Slice greatest(end_key.data(), end_key.size()); - - ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); - - s = TryReopen(options); - ASSERT_OK(s); - - int buff_count_with_tuning = 0; - - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::PrefetchAsyncInternal:Start", - [&](void*) { buff_count_with_tuning++; }); - - bool read_async_called = false; - SyncPoint::GetInstance()->SetCallBack( - "UpdateResults::io_uring_result", - [&](void* /*arg*/) { read_async_called = true; }); - - SyncPoint::GetInstance()->EnableProcessing(); - - SyncPoint::GetInstance()->EnableProcessing(); - - ReadOptions ropts; - if (std::get<1>(GetParam())) { - ropts.readahead_size = 32768; - } - ropts.async_io = true; - - Slice ub = Slice("my_key_aaa"); - ropts.iterate_upper_bound = &ub; - Slice seek_key = Slice("my_key_aaa"); - - // With tuning readahead_size. - { - ASSERT_OK(options.statistics->Reset()); - ropts.auto_readahead_size = true; - - auto iter = std::unique_ptr(db_->NewIterator(ropts)); - - iter->Seek(seek_key); - - ASSERT_OK(iter->status()); - - // Verify results. - uint64_t readhahead_trimmed = - options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED); - // Readahead got trimmed. - if (read_async_called) { - ASSERT_GT(readhahead_trimmed, 0); - // Seek called PrefetchAsync to poll the data. - ASSERT_EQ(1, buff_count_with_tuning); - } else { - // async_io disabled. - ASSERT_GE(readhahead_trimmed, 0); - ASSERT_EQ(0, buff_count_with_tuning); - } - } - Close(); -} - namespace { #ifdef GFLAGS const int kMaxArgCount = 100; @@ -3243,7 +2927,7 @@ TEST_F(FilePrefetchBufferTest, SeekWithBlockCacheHit) { std::unique_ptr r; Read(fname, opts, &r); - FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, 0, fs()); + FilePrefetchBuffer fpb(16384, 16384, true, false, false, 0, 0, fs()); Slice result; // Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings, // it will do two reads of 4096+8192 and 8192 @@ -3292,8 +2976,7 @@ TEST_F(FilePrefetchBufferTest, SeekWithoutAlignment) { FilePrefetchBuffer fpb( /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, /*track_min_offset=*/false, /*implicit_auto_readahead=*/true, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2, - /*upper_bound_offset=*/0, fs()); + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2, fs()); Slice result; // Simulate a seek of half of alignment bytes at offset n. Due to the @@ -3324,8 +3007,7 @@ TEST_F(FilePrefetchBufferTest, SeekWithoutAlignment) { FilePrefetchBuffer fpb( /*readahead_size=*/16384, /*max_readahead_size=*/16384, /*enable=*/true, /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2, - /*upper_bound_offset=*/0, fs()); + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/2, fs()); Slice result; // Simulate a seek of half of alignment bytes at offset n. @@ -3361,8 +3043,7 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { FilePrefetchBuffer fpb( /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, - /*upper_bound_offset=*/0, fs()); + /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, fs()); int read_async_called = 0; SyncPoint::GetInstance()->SetCallBack( @@ -3400,63 +3081,6 @@ TEST_F(FilePrefetchBufferTest, NoSyncWithAsyncIO) { ASSERT_EQ(result, async_result); } -// This test checks if during seek in async_io, if first buffer already -// prefetched the data till upper_bound offset, second buffer shouldn't go for -// prefetching. -TEST_F(FilePrefetchBufferTest, IterateUpperBoundTest1) { - std::string fname = "iterate-upperbound-test1"; - Random rand(0); - std::string content = rand.RandomString(32768); - Write(fname, content); - - FileOptions opts; - std::unique_ptr r; - Read(fname, opts, &r); - - FilePrefetchBuffer fpb( - /*readahead_size=*/8192, /*max_readahead_size=*/16384, /*enable=*/true, - /*track_min_offset=*/false, /*implicit_auto_readahead=*/false, - /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, - /*upper_bound_offset=*/8000, fs()); - - int read_async_called = 0; - SyncPoint::GetInstance()->SetCallBack( - "FilePrefetchBuffer::ReadAsync", - [&](void* /*arg*/) { read_async_called++; }); - SyncPoint::GetInstance()->EnableProcessing(); - - Slice async_result; - // Simulate a seek of 4000 bytes at offset 3000. Due to the readahead - // settings, it will do 1 read of 4000+1000 (till 8000 - upper bound). - Status s = fpb.PrefetchAsync(IOOptions(), r.get(), 3000, 4000, &async_result); - - // Platforms that don't have IO uring may not support async IO - if (s.IsNotSupported()) { - return; - } - - ASSERT_TRUE(s.IsTryAgain()); - IOOptions io_opts; - io_opts.rate_limiter_priority = Env::IOPriority::IO_LOW; - ASSERT_TRUE(fpb.TryReadFromCacheAsync(io_opts, r.get(), /*offset=*/3000, - /*length=*/4000, &async_result, &s)); - // No sync call should be made. - HistogramData sst_read_micros; - stats()->histogramData(SST_READ_MICROS, &sst_read_micros); - ASSERT_EQ(sst_read_micros.count, 0); - - // Number of async calls should be 1. - // No Prefetching should happen in second buffer as first buffer has already - // prefetched till offset. - ASSERT_EQ(read_async_called, 1); - // Length should be 4000. - ASSERT_EQ(async_result.size(), 4000); - // Data correctness. - Slice result(&content[3000], 4000); - ASSERT_EQ(result.size(), 4000); - ASSERT_EQ(result, async_result); -} - TEST_F(FilePrefetchBufferTest, SyncReadaheadStats) { std::string fname = "seek-with-block-cache-hit"; Random rand(0); @@ -3468,7 +3092,7 @@ TEST_F(FilePrefetchBufferTest, SyncReadaheadStats) { Read(fname, opts, &r); std::shared_ptr stats = CreateDBStatistics(); - FilePrefetchBuffer fpb(8192, 8192, true, false, false, 0, 0, 0, fs(), nullptr, + FilePrefetchBuffer fpb(8192, 8192, true, false, false, 0, 0, fs(), nullptr, stats.get()); Slice result; // Simulate a seek of 4096 bytes at offset 0. Due to the readahead settings, diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 415f5a70d..ae5ed2c26 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1737,8 +1737,6 @@ struct ReadOptions { // Default: empty (every table will be scanned) std::function table_filter; - // Experimental - // // If auto_readahead_size is set to true, it will auto tune the readahead_size // during scans internally. // For this feature to enabled, iterate_upper_bound must also be specified. @@ -1748,8 +1746,8 @@ struct ReadOptions { // disabled internally and won't be enabled again if the forward scan // is issued again. // - // Default: false - bool auto_readahead_size = false; + // Default: true + bool auto_readahead_size = true; // *** END options only relevant to iterators or scans *** diff --git a/table/block_based/block_based_table_iterator.cc b/table/block_based/block_based_table_iterator.cc index 93fdd1d61..d08def5a0 100644 --- a/table/block_based/block_based_table_iterator.cc +++ b/table/block_based/block_based_table_iterator.cc @@ -114,21 +114,6 @@ void BlockBasedTableIterator::SeekImpl(const Slice* target, } } - if (autotune_readaheadsize) { - FindReadAheadSizeUpperBound(); - if (target) { - index_iter_->Seek(*target); - } else { - index_iter_->SeekToFirst(); - } - - // Check for IO error. - if (!index_iter_->Valid()) { - ResetDataIter(); - return; - } - } - // After reseek, index_iter_ point to the right key i.e. target in // case of readahead_cache_lookup_. So index_iter_ can be used directly. IndexValue v = index_iter_->value(); @@ -691,40 +676,6 @@ void BlockBasedTableIterator::CheckDataBlockWithinUpperBound() { } } -void BlockBasedTableIterator::FindReadAheadSizeUpperBound() { - size_t total_bytes_till_upper_bound = 0; - size_t footer = table_->get_rep()->footer.GetBlockTrailerSize(); - uint64_t start_offset = index_iter_->value().handle.offset(); - - do { - BlockHandle block_handle = index_iter_->value().handle; - total_bytes_till_upper_bound += block_handle.size(); - total_bytes_till_upper_bound += footer; - - // Can't figure out for current block if current block - // is out of bound. But for next block we can find that. - // If curr block's index key >= iterate_upper_bound, it - // means all the keys in next block or above are out of - // bound. - if (IsNextBlockOutOfBound()) { - break; - } - - // Since next block is not out of bound, iterate to that - // index block and add it's Data block size to - // readahead_size. - index_iter_->Next(); - - if (!index_iter_->Valid()) { - break; - } - - } while (true); - - block_prefetcher_.SetUpperBoundOffset(start_offset + - total_bytes_till_upper_bound); -} - void BlockBasedTableIterator::InitializeStartAndEndOffsets( bool read_curr_block, bool& found_first_miss_block, uint64_t& start_updated_offset, uint64_t& end_updated_offset, diff --git a/table/block_based/block_based_table_iterator.h b/table/block_based/block_based_table_iterator.h index 84c83014c..554785305 100644 --- a/table/block_based/block_based_table_iterator.h +++ b/table/block_based/block_based_table_iterator.h @@ -371,7 +371,6 @@ class BlockBasedTableIterator : public InternalIteratorBase { } // *** BEGIN APIs relevant to auto tuning of readahead_size *** - void FindReadAheadSizeUpperBound(); // This API is called to lookup the data blocks ahead in the cache to tune // the start and end offsets passed. diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 385622461..a184264df 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -887,7 +887,7 @@ Status BlockBasedTable::PrefetchTail( 0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */, true /* track_min_offset */, false /* implicit_auto_readahead */, 0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */, - 0 /* upper_bound_offset */, nullptr /* fs */, nullptr /* clock */, stats, + nullptr /* fs */, nullptr /* clock */, stats, /* readahead_cb */ nullptr, FilePrefetchBufferUsage::kTableOpenPrefetchTail)); diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 43ea1602d..a7a94cd0b 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -699,30 +699,29 @@ struct BlockBasedTable::Rep { size_t readahead_size, size_t max_readahead_size, std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, - uint64_t upper_bound_offset, + const std::function& readaheadsize_cb, FilePrefetchBufferUsage usage) const { fpb->reset(new FilePrefetchBuffer( readahead_size, max_readahead_size, !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */, implicit_auto_readahead, num_file_reads, - num_file_reads_for_auto_readahead, upper_bound_offset, - ioptions.fs.get(), ioptions.clock, ioptions.stats, readaheadsize_cb, - usage)); + num_file_reads_for_auto_readahead, ioptions.fs.get(), ioptions.clock, + ioptions.stats, readaheadsize_cb, usage)); } void CreateFilePrefetchBufferIfNotExists( size_t readahead_size, size_t max_readahead_size, std::unique_ptr* fpb, bool implicit_auto_readahead, uint64_t num_file_reads, uint64_t num_file_reads_for_auto_readahead, - uint64_t upper_bound_offset, + const std::function& readaheadsize_cb, FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown) const { if (!(*fpb)) { CreateFilePrefetchBuffer(readahead_size, max_readahead_size, fpb, implicit_auto_readahead, num_file_reads, num_file_reads_for_auto_readahead, - upper_bound_offset, readaheadsize_cb, usage); + readaheadsize_cb, usage); } } diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index 4e750d799..b974f9190 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -48,7 +48,7 @@ void BlockPrefetcher::PrefetchIfNeeded( compaction_readahead_size_, compaction_readahead_size_, &prefetch_buffer_, /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, /*num_file_reads_for_auto_readahead=*/0, - /*upper_bound_offset=*/0, /*readaheadsize_cb=*/nullptr); + /*readaheadsize_cb=*/nullptr); return; } @@ -57,8 +57,7 @@ void BlockPrefetcher::PrefetchIfNeeded( rep->CreateFilePrefetchBufferIfNotExists( readahead_size, readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/false, /*num_file_reads=*/0, - /*num_file_reads_for_auto_readahead=*/0, upper_bound_offset_, - readaheadsize_cb, + /*num_file_reads_for_auto_readahead=*/0, readaheadsize_cb, /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } @@ -83,8 +82,7 @@ void BlockPrefetcher::PrefetchIfNeeded( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, /*num_file_reads=*/0, - rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_, readaheadsize_cb, + rep->table_options.num_file_reads_for_auto_readahead, readaheadsize_cb, /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } @@ -115,8 +113,7 @@ void BlockPrefetcher::PrefetchIfNeeded( rep->CreateFilePrefetchBufferIfNotExists( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, - rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_, readaheadsize_cb, + rep->table_options.num_file_reads_for_auto_readahead, readaheadsize_cb, /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } @@ -138,8 +135,7 @@ void BlockPrefetcher::PrefetchIfNeeded( rep->CreateFilePrefetchBufferIfNotExists( initial_auto_readahead_size_, max_auto_readahead_size, &prefetch_buffer_, /*implicit_auto_readahead=*/true, num_file_reads_, - rep->table_options.num_file_reads_for_auto_readahead, - upper_bound_offset_, readaheadsize_cb, + rep->table_options.num_file_reads_for_auto_readahead, readaheadsize_cb, /*usage=*/FilePrefetchBufferUsage::kUserScanPrefetch); return; } diff --git a/table/block_based/block_prefetcher.h b/table/block_based/block_prefetcher.h index af0a63018..e46aaf614 100644 --- a/table/block_based/block_prefetcher.h +++ b/table/block_based/block_prefetcher.h @@ -53,15 +53,6 @@ class BlockPrefetcher { &initial_auto_readahead_size_); } - void SetUpperBoundOffset(uint64_t upper_bound_offset) { - upper_bound_offset_ = upper_bound_offset; - if (prefetch_buffer() != nullptr) { - // Upper bound can be changed on reseek. So update that in - // FilePrefetchBuffer. - prefetch_buffer()->ResetUpperBoundOffset(upper_bound_offset); - } - } - private: // Readahead size used in compaction, its value is used only if // lookup_context_.caller = kCompaction. @@ -78,7 +69,5 @@ class BlockPrefetcher { uint64_t prev_offset_ = 0; size_t prev_len_ = 0; std::unique_ptr prefetch_buffer_; - - uint64_t upper_bound_offset_ = 0; }; } // namespace ROCKSDB_NAMESPACE diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index c908db41d..47ac98b9c 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -498,7 +498,7 @@ Status PartitionedFilterBlockReader::CacheDependencies( rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /* Implicit autoreadahead */, 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, - /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr, + /*readaheadsize_cb*/ nullptr, /*usage=*/FilePrefetchBufferUsage::kUnknown); IOOptions opts; diff --git a/table/block_based/partitioned_index_reader.cc b/table/block_based/partitioned_index_reader.cc index f82590718..b4a16dd22 100644 --- a/table/block_based/partitioned_index_reader.cc +++ b/table/block_based/partitioned_index_reader.cc @@ -170,7 +170,7 @@ Status PartitionIndexReader::CacheDependencies( rep->CreateFilePrefetchBuffer( 0, 0, &prefetch_buffer, false /*Implicit auto readahead*/, 0 /*num_reads_*/, 0 /*num_file_reads_for_auto_readahead*/, - /*upper_bound_offset*/ 0, /*readaheadsize_cb*/ nullptr, + /*readaheadsize_cb*/ nullptr, /*usage=*/FilePrefetchBufferUsage::kUnknown); IOOptions opts; { diff --git a/table/table_test.cc b/table/table_test.cc index 2904792c2..298e25fbd 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -3189,6 +3189,7 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupSeqScans) { Options options; BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); options.create_if_missing = true; + options.statistics = CreateDBStatistics(); table_options.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; table_options.block_cache = NewLRUCache(1024 * 1024, 0); @@ -3232,6 +3233,8 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupSeqScans) { "00000255"}; WarmUpCache(&c, moptions, warm_keys); + ASSERT_OK(options.statistics->Reset()); + std::unique_ptr iter(c.GetTableReader()->NewIterator( read_options, moptions.prefix_extractor.get(), /*arena=*/nullptr, /*skip_filters=*/false, TableReaderCaller::kUncategorized)); @@ -3256,6 +3259,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupSeqScans) { // One block data. ASSERT_EQ(buffer_len, 4096); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 1); } { @@ -3309,6 +3315,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupSeqScans) { bbt->TEST_GetDataBlockHandle(read_options, kv_iter->first, block_handle); ASSERT_EQ(buffer_offset, 106496); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 1); } } c.ResetTableReader(); @@ -3320,6 +3329,7 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { std::unique_ptr env( new CompositeEnvWrapper(c.env_, FileSystem::Default())); options.env = env.get(); + options.statistics = CreateDBStatistics(); c.env_ = env.get(); BlockBasedTableOptions table_options = GetBlockBasedTableOptions(); @@ -3369,6 +3379,8 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { "00000255"}; WarmUpCache(&c, moptions, warm_keys); + ASSERT_OK(options.statistics->Reset()); + std::unique_ptr iter(c.GetTableReader()->NewIterator( read_options, moptions.prefix_extractor.get(), nullptr, false, TableReaderCaller::kUncategorized)); @@ -3396,6 +3408,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { prefetch_buffer->TEST_GetBufferOffsetandSize(1, buffer_offset, buffer_len); ASSERT_EQ(buffer_len, 0); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 2); } { // Check the behavior when it's - @@ -3442,6 +3457,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { block_handle); ASSERT_EQ(buffer_len, 8192); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 1); } } @@ -3492,6 +3510,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { block_handle); ASSERT_EQ(buffer_len, 8192); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 1); } // Third prefetch ReadAsync (buffers will swap). @@ -3525,6 +3546,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { block_handle); ASSERT_EQ(buffer_len, 4096); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 1); } // 4th Prefetch ReadAsync (buffers will swap). @@ -3558,6 +3582,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { block_handle); ASSERT_EQ(buffer_len, 4096); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 1); } // 5th Prefetch ReadAsync. @@ -3591,6 +3618,9 @@ TEST_P(BlockBasedTableTest, BlockCacheLookupAsyncScansSeek) { block_handle); ASSERT_EQ(buffer_len, 8192); ASSERT_EQ(buffer_offset, block_handle.offset()); + + ASSERT_EQ(options.statistics->getAndResetTickerCount(READAHEAD_TRIMMED), + 0); } } } diff --git a/unreleased_history/behavior_changes/auto_readahead_size.md b/unreleased_history/behavior_changes/auto_readahead_size.md new file mode 100644 index 000000000..b1c98dd86 --- /dev/null +++ b/unreleased_history/behavior_changes/auto_readahead_size.md @@ -0,0 +1 @@ +Make ReadOptions.auto_readahead_size default true which does prefetching optimizations for forward scans if iterate_upper_bound and block_cache is also specified. From 5502f0672908ce2c5891a3290bdfb57182435adb Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 14 Dec 2023 11:27:17 -0800 Subject: [PATCH 333/386] Add support for linux-riscv64 (#12139) Summary: Following https://github.com/evolvedbinary/docker-rocksjava/pull/2, we can now build rocksdb on riscv64. I've verified this works as expected with `make rocksdbjavastaticdockerriscv64`. Also fixes https://github.com/facebook/rocksdb/issues/10500 https://github.com/facebook/rocksdb/issues/11994 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12139 Reviewed By: jaykorean Differential Revision: D52128098 Pulled By: akankshamahajan15 fbshipit-source-id: 706d36a3f8a9e990b76f426bc450937a0cd1a537 --- Makefile | 8 ++++++-- build_tools/build_detect_platform | 9 +++++---- java/src/main/java/org/rocksdb/util/Environment.java | 6 +++++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index d9ebbb72b..8829be9d8 100644 --- a/Makefile +++ b/Makefile @@ -2060,7 +2060,7 @@ JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux ifeq ($(PLATFORM), OS_SOLARIS) ARCH := $(shell isainfo -b) else ifeq ($(PLATFORM), OS_OPENBSD) - ifneq (,$(filter amd64 ppc64 ppc64le s390x arm64 aarch64 sparc64 loongarch64, $(MACHINE))) + ifneq (,$(filter amd64 ppc64 ppc64le s390x arm64 aarch64 riscv64 sparc64 loongarch64, $(MACHINE))) ARCH := 64 else ARCH := 32 @@ -2081,7 +2081,7 @@ ifneq ($(origin JNI_LIBC), undefined) endif ifeq (,$(ROCKSDBJNILIB)) -ifneq (,$(filter ppc% s390x arm64 aarch64 sparc64 loongarch64, $(MACHINE))) +ifneq (,$(filter ppc% s390x arm64 aarch64 riscv64 sparc64 loongarch64, $(MACHINE))) ROCKSDBJNILIB = librocksdbjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so else ROCKSDBJNILIB = librocksdbjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so @@ -2346,6 +2346,10 @@ rocksdbjavastaticdockers390x: mkdir -p java/target docker run --rm --name rocksdb_linux_s390x-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:ubuntu18_s390x-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh +rocksdbjavastaticdockerriscv64: + mkdir -p java/target + docker run --rm --name rocksdb_linux_riscv64-be --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:ubuntu20_riscv64-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh + rocksdbjavastaticdockerx86musl: mkdir -p java/target docker run --rm --name rocksdb_linux_x86-musl-be --platform linux/386 --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:alpine3_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-alpine.sh diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index fd70a9835..a5e2b5aa2 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -647,8 +647,10 @@ if [ "$PORTABLE" == "" ] || [ "$PORTABLE" == 0 ]; then fi COMMON_FLAGS="$COMMON_FLAGS" elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then - RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-) - COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}" + RISC_ISA=$(cat /proc/cpuinfo | grep -E '^isa\s*:' | head -1 | cut --delimiter=: -f 2 | cut -b 2-) + if [ -n "${RISCV_ISA}" ]; then + COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}" + fi elif [ "$TARGET_OS" == "IOS" ]; then COMMON_FLAGS="$COMMON_FLAGS" else @@ -660,8 +662,7 @@ else if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then COMMON_FLAGS="$COMMON_FLAGS -march=z196 " elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then - RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-) - COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}" + COMMON_FLAGS="$COMMON_FLAGS -march=rv64gc" elif test "$USE_SSE"; then # USE_SSE is DEPRECATED # This is a rough approximation of the old USE_SSE behavior diff --git a/java/src/main/java/org/rocksdb/util/Environment.java b/java/src/main/java/org/rocksdb/util/Environment.java index 53ff65d26..78b73dc5d 100644 --- a/java/src/main/java/org/rocksdb/util/Environment.java +++ b/java/src/main/java/org/rocksdb/util/Environment.java @@ -36,6 +36,10 @@ public static boolean isS390x() { return ARCH.contains("s390x"); } + public static boolean isRiscv64() { + return ARCH.contains("riscv64"); + } + public static boolean isWindows() { return (OS.contains("win")); } @@ -180,7 +184,7 @@ private static String getLibcPostfix() { public static String getJniLibraryName(final String name) { if (isUnix()) { final String arch = is64Bit() ? "64" : "32"; - if (isPowerPC() || isAarch64()) { + if (isPowerPC() || isAarch64() || isRiscv64()) { return String.format("%sjni-linux-%s%s", name, ARCH, getLibcPostfix()); } else if (isS390x()) { return String.format("%sjni-linux-%s", name, ARCH); From cd577f605948894b51fbaab39d1df03a04dfd70f Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan <43301668+akankshamahajan15@users.noreply.github.com> Date: Thu, 14 Dec 2023 13:45:06 -0800 Subject: [PATCH 334/386] Fix WRITE_STALL start_time (#12147) Summary: `Delayed` is set true in two cases. One is when `delay` is specified. Other one is in the `while` loop - https://github.com/facebook/rocksdb/blob/cd21e4e69d76ec4ec3b080c8cdae016ac2309cc5/db/db_impl/db_impl_write.cc#L1876 However start_time is not initialized in second case, resulting in time_delayed = immutable_db_options_.clock->NowMicros() - 0(start_time); Pull Request resolved: https://github.com/facebook/rocksdb/pull/12147 Test Plan: Existing CircleCI Reviewed By: cbi42 Differential Revision: D52173481 Pulled By: akankshamahajan15 fbshipit-source-id: fb9183b24c191d287a1d715346467bee66190f98 --- db/db_impl/db_impl_write.cc | 3 ++- unreleased_history/bug_fixes/fix_stall_counter.md | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 unreleased_history/bug_fixes/fix_stall_counter.md diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 8add1e990..df67ba8c8 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1839,11 +1839,12 @@ Status DBImpl::DelayWrite(uint64_t num_bytes, WriteThread& write_thread, delay = 0; } TEST_SYNC_POINT("DBImpl::DelayWrite:Start"); + start_time = immutable_db_options_.clock->NowMicros(); + if (delay > 0) { if (write_options.no_slowdown) { return Status::Incomplete("Write stall"); } - start_time = immutable_db_options_.clock->NowMicros(); TEST_SYNC_POINT("DBImpl::DelayWrite:Sleep"); // Notify write_thread about the stall so it can setup a barrier and diff --git a/unreleased_history/bug_fixes/fix_stall_counter.md b/unreleased_history/bug_fixes/fix_stall_counter.md new file mode 100644 index 000000000..be9e5122f --- /dev/null +++ b/unreleased_history/bug_fixes/fix_stall_counter.md @@ -0,0 +1 @@ +Fix a WRITE_STALL counter that was reporting wrong value in few cases. From 88bc91f3cc2b492b8a45ba2c49650f527df97ad8 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 14 Dec 2023 22:13:32 -0800 Subject: [PATCH 335/386] Cap eviction effort (CPU under stress) in HyperClockCache (#12141) Summary: HyperClockCache is intended to mitigate performance problems under stress conditions (as well as optimizing average-case parallel performance). In LRUCache, the biggest such problem is lock contention when one or a small number of cache entries becomes particularly hot. Regardless of cache sharding, accesses to any particular cache entry are linearized against a single mutex, which is held while each access updates the LRU list. All HCC variants are fully lock/wait-free for accessing blocks already in the cache, which fully mitigates this contention problem. However, HCC (and CLOCK in general) can exhibit extremely degraded performance under a different stress condition: when no (or almost no) entries in a cache shard are evictable (they are pinned). Unlike LRU which can find any evictable entries immediately (at the cost of more coordination / synchronization on each access), CLOCK has to search for evictable entries. Under the right conditions (almost exclusively MB-scale caches not GB-scale), the CPU cost of each cache miss could fall off a cliff and bog down the whole system. To effectively mitigate this problem (IMHO), I'm introducing a new default behavior and tuning parameter for HCC, `eviction_effort_cap`. See the comments on the new config parameter in the public API. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12141 Test Plan: unit test included ## Performance test We can use cache_bench to validate no regression (CPU and memory) in normal operation, and to measure change in behavior when cache is almost entirely pinned. (TODO: I'm not sure why I had to get the pinned ratio parameter well over 1.0 to see truly bad performance, but the behavior is there.) Build with `make DEBUG_LEVEL=0 USE_CLANG=1 PORTABLE=0 cache_bench`. We also set MALLOC_CONF="narenas:1" for all these runs to essentially remove jemalloc variances from the results, so that the max RSS given by /usr/bin/time is essentially ideal (assuming the allocator minimizes fragmentation and other memory overheads well). Base command reproducing bad behavior: ``` ./cache_bench -cache_type=auto_hyper_clock_cache -threads=12 -histograms=0 -pinned_ratio=1.7 ``` ``` Before, LRU (alternate baseline not exhibiting bad behavior): Rough parallel ops/sec = 2290997 1088060 maxresident Before, AutoHCC (bad behavior): Rough parallel ops/sec = 141011 <- Yes, more than 10x slower 1083932 maxresident ``` Now let us sample a range of values in the solution space: ``` After, AutoHCC, eviction_effort_cap = 1: Rough parallel ops/sec = 3212586 2402216 maxresident After, AutoHCC, eviction_effort_cap = 10: Rough parallel ops/sec = 2371639 1248884 maxresident After, AutoHCC, eviction_effort_cap = 30: Rough parallel ops/sec = 1981092 1131596 maxresident After, AutoHCC, eviction_effort_cap = 100: Rough parallel ops/sec = 1446188 1090976 maxresident After, AutoHCC, eviction_effort_cap = 1000: Rough parallel ops/sec = 549568 1084064 maxresident ``` I looks like `cap=30` is a sweet spot balancing acceptable CPU and memory overheads, so is chosen as the default. ``` Change to -pinned_ratio=0.85 Before, LRU: Rough parallel ops/sec = 2108373 1078232 maxresident Before, AutoHCC, averaged over ~20 runs: Rough parallel ops/sec = 2164910 1077312 maxresident After, AutoHCC, eviction_effort_cap = 30, averaged over ~20 runs: Rough parallel ops/sec = 2145542 1077216 maxresident ``` The slight CPU improvement above is consistent with the cap, with no measurable memory overhead under moderate stress. ``` Change to -pinned_ratio=0.25 (low stress) Before, AutoHCC, averaged over ~20 runs: Rough parallel ops/sec = 2221149 1076540 maxresident After, AutoHCC, eviction_effort_cap = 30, averaged over ~20 runs: Rough parallel ops/sec = 2224521 1076664 maxresident ``` No measurable difference under normal circumstances. Some tests repeated with FixedHCC, with similar results. Reviewed By: anand1976 Differential Revision: D52174755 Pulled By: pdillinger fbshipit-source-id: d278108031b1220c1fa4c89c5a9d34b7cf4ef1b8 --- cache/cache_bench_tool.cc | 5 + cache/clock_cache.cc | 135 ++++++++++++------ cache/clock_cache.h | 68 ++++++--- cache/compressed_secondary_cache_test.cc | 2 + cache/lru_cache_test.cc | 60 +++++++- include/rocksdb/cache.h | 40 +++++- .../behavior_changes/eviction_effort_cap.md | 1 + 7 files changed, 246 insertions(+), 65 deletions(-) create mode 100644 unreleased_history/behavior_changes/eviction_effort_cap.md diff --git a/cache/cache_bench_tool.cc b/cache/cache_bench_tool.cc index 89945abf7..07cd1a1f6 100644 --- a/cache/cache_bench_tool.cc +++ b/cache/cache_bench_tool.cc @@ -48,6 +48,10 @@ DEFINE_uint64(cache_size, 1 * GiB, "Number of bytes to use as a cache of uncompressed data."); DEFINE_int32(num_shard_bits, -1, "ShardedCacheOptions::shard_bits. Default = auto"); +DEFINE_int32( + eviction_effort_cap, + ROCKSDB_NAMESPACE::HyperClockCacheOptions(1, 1).eviction_effort_cap, + "HyperClockCacheOptions::eviction_effort_cap"); DEFINE_double(resident_ratio, 0.25, "Ratio of keys fitting in cache to keyspace."); @@ -391,6 +395,7 @@ class CacheBench { FLAGS_cache_size, /*estimated_entry_charge=*/0, FLAGS_num_shard_bits); opts.hash_seed = BitwiseAnd(FLAGS_seed, INT32_MAX); opts.memory_allocator = allocator; + opts.eviction_effort_cap = FLAGS_eviction_effort_cap; if (FLAGS_cache_type == "fixed_hyper_clock_cache" || FLAGS_cache_type == "hyper_clock_cache") { opts.estimated_entry_charge = FLAGS_value_bytes_estimate > 0 diff --git a/cache/clock_cache.cc b/cache/clock_cache.cc index e37c03fb5..de2e56186 100644 --- a/cache/clock_cache.cc +++ b/cache/clock_cache.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -94,7 +95,8 @@ inline void Unref(const ClockHandle& h, uint64_t count = 1) { (void)old_meta; } -inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { +inline bool ClockUpdate(ClockHandle& h, BaseClockTable::EvictionData* data, + bool* purgeable = nullptr) { uint64_t meta; if (purgeable) { assert(*purgeable == false); @@ -126,6 +128,7 @@ inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { (meta >> ClockHandle::kReleaseCounterShift) & ClockHandle::kCounterMask; if (acquire_count != release_count) { // Only clock update entries with no outstanding refs + data->seen_pinned_count++; return false; } if ((meta >> ClockHandle::kStateShift == ClockHandle::kStateVisible) && @@ -149,6 +152,8 @@ inline bool ClockUpdate(ClockHandle& h, bool* purgeable = nullptr) { << ClockHandle::kStateShift) | (meta & ClockHandle::kHitBitMask))) { // Took ownership. + data->freed_charge += h.GetTotalCharge(); + data->freed_count += 1; return true; } else { // Compare-exchange failing probably @@ -356,6 +361,18 @@ void ConstApplyToEntriesRange(const Func& func, const HandleImpl* begin, } } +constexpr uint32_t kStrictCapacityLimitBit = 1u << 31; + +uint32_t SanitizeEncodeEecAndScl(int eviction_effort_cap, + bool strict_capacit_limit) { + eviction_effort_cap = std::max(int{1}, eviction_effort_cap); + eviction_effort_cap = + std::min(static_cast(~kStrictCapacityLimitBit), eviction_effort_cap); + uint32_t eec_and_scl = static_cast(eviction_effort_cap); + eec_and_scl |= strict_capacit_limit ? kStrictCapacityLimitBit : 0; + return eec_and_scl; +} + } // namespace void ClockHandleBasicData::FreeData(MemoryAllocator* allocator) const { @@ -385,17 +402,20 @@ HandleImpl* BaseClockTable::StandaloneInsert( template typename Table::HandleImpl* BaseClockTable::CreateStandalone( - ClockHandleBasicData& proto, size_t capacity, bool strict_capacity_limit, + ClockHandleBasicData& proto, size_t capacity, uint32_t eec_and_scl, bool allow_uncharged) { Table& derived = static_cast(*this); typename Table::InsertState state; derived.StartInsert(state); const size_t total_charge = proto.GetTotalCharge(); - if (strict_capacity_limit) { + // NOTE: we can use eec_and_scl as eviction_effort_cap below because + // strict_capacity_limit=true is supposed to disable the limit on eviction + // effort, and a large value effectively does that. + if (eec_and_scl & kStrictCapacityLimitBit) { Status s = ChargeUsageMaybeEvictStrict

    ( total_charge, capacity, - /*need_evict_for_occupancy=*/false, state); + /*need_evict_for_occupancy=*/false, eec_and_scl, state); if (!s.ok()) { if (allow_uncharged) { proto.total_charge = 0; @@ -407,7 +427,7 @@ typename Table::HandleImpl* BaseClockTable::CreateStandalone( // Case strict_capacity_limit == false bool success = ChargeUsageMaybeEvictNonStrict
    ( total_charge, capacity, - /*need_evict_for_occupancy=*/false, state); + /*need_evict_for_occupancy=*/false, eec_and_scl, state); if (!success) { // Force the issue usage_.FetchAddRelaxed(total_charge); @@ -420,7 +440,7 @@ typename Table::HandleImpl* BaseClockTable::CreateStandalone( template Status BaseClockTable::ChargeUsageMaybeEvictStrict( size_t total_charge, size_t capacity, bool need_evict_for_occupancy, - typename Table::InsertState& state) { + uint32_t eviction_effort_cap, typename Table::InsertState& state) { if (total_charge > capacity) { return Status::MemoryLimit( "Cache entry too large for a single cache shard: " + @@ -445,7 +465,8 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( } if (request_evict_charge > 0) { EvictionData data; - static_cast(this)->Evict(request_evict_charge, state, &data); + static_cast(this)->Evict(request_evict_charge, state, &data, + eviction_effort_cap); occupancy_.FetchSub(data.freed_count); if (LIKELY(data.freed_charge > need_evict_charge)) { assert(data.freed_count > 0); @@ -475,7 +496,7 @@ Status BaseClockTable::ChargeUsageMaybeEvictStrict( template inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( size_t total_charge, size_t capacity, bool need_evict_for_occupancy, - typename Table::InsertState& state) { + uint32_t eviction_effort_cap, typename Table::InsertState& state) { // For simplicity, we consider that either the cache can accept the insert // with no evictions, or we must evict enough to make (at least) enough // space. It could lead to unnecessary failures or excessive evictions in @@ -511,7 +532,8 @@ inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( } EvictionData data; if (need_evict_charge > 0) { - static_cast(this)->Evict(need_evict_charge, state, &data); + static_cast(this)->Evict(need_evict_charge, state, &data, + eviction_effort_cap); // Deal with potential occupancy deficit if (UNLIKELY(need_evict_for_occupancy) && data.freed_count == 0) { assert(data.freed_charge == 0); @@ -530,11 +552,7 @@ inline bool BaseClockTable::ChargeUsageMaybeEvictNonStrict( return true; } -void BaseClockTable::TrackAndReleaseEvictedEntry( - ClockHandle* h, BaseClockTable::EvictionData* data) { - data->freed_charge += h->GetTotalCharge(); - data->freed_count += 1; - +void BaseClockTable::TrackAndReleaseEvictedEntry(ClockHandle* h) { bool took_value_ownership = false; if (eviction_callback_) { // For key reconstructed from hash @@ -551,11 +569,20 @@ void BaseClockTable::TrackAndReleaseEvictedEntry( MarkEmpty(*h); } +bool IsEvictionEffortExceeded(const BaseClockTable::EvictionData& data, + uint32_t eviction_effort_cap) { + // Basically checks whether the ratio of useful effort to wasted effort is + // too low, with a start-up allowance for wasted effort before any useful + // effort. + return (data.freed_count + 1U) * uint64_t{eviction_effort_cap} <= + data.seen_pinned_count; +} + template Status BaseClockTable::Insert(const ClockHandleBasicData& proto, typename Table::HandleImpl** handle, Cache::Priority priority, size_t capacity, - bool strict_capacity_limit) { + uint32_t eec_and_scl) { using HandleImpl = typename Table::HandleImpl; Table& derived = static_cast(*this); @@ -573,9 +600,12 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, // strict_capacity_limit, but mostly pessimistic. bool use_standalone_insert = false; const size_t total_charge = proto.GetTotalCharge(); - if (strict_capacity_limit) { + // NOTE: we can use eec_and_scl as eviction_effort_cap below because + // strict_capacity_limit=true is supposed to disable the limit on eviction + // effort, and a large value effectively does that. + if (eec_and_scl & kStrictCapacityLimitBit) { Status s = ChargeUsageMaybeEvictStrict
    ( - total_charge, capacity, need_evict_for_occupancy, state); + total_charge, capacity, need_evict_for_occupancy, eec_and_scl, state); if (!s.ok()) { // Revert occupancy occupancy_.FetchSubRelaxed(1); @@ -584,7 +614,7 @@ Status BaseClockTable::Insert(const ClockHandleBasicData& proto, } else { // Case strict_capacity_limit == false bool success = ChargeUsageMaybeEvictNonStrict
    ( - total_charge, capacity, need_evict_for_occupancy, state); + total_charge, capacity, need_evict_for_occupancy, eec_and_scl, state); if (!success) { // Revert occupancy occupancy_.FetchSubRelaxed(1); @@ -688,8 +718,7 @@ void BaseClockTable::TEST_ReleaseNMinus1(ClockHandle* h, size_t n) { #endif FixedHyperClockTable::FixedHyperClockTable( - size_t capacity, bool /*strict_capacity_limit*/, - CacheMetadataChargePolicy metadata_charge_policy, + size_t capacity, CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed, const Opts& opts) @@ -1084,7 +1113,8 @@ inline void FixedHyperClockTable::ReclaimEntryUsage(size_t total_charge) { } inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&, - EvictionData* data) { + EvictionData* data, + uint32_t eviction_effort_cap) { // precondition assert(requested_charge > 0); @@ -1105,10 +1135,10 @@ inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&, for (;;) { for (size_t i = 0; i < step_size; i++) { HandleImpl& h = array_[ModTableSize(Lower32of64(old_clock_pointer + i))]; - bool evicting = ClockUpdate(h); + bool evicting = ClockUpdate(h, data); if (evicting) { Rollback(h.hashed_key, &h); - TrackAndReleaseEvictedEntry(&h, data); + TrackAndReleaseEvictedEntry(&h); } } @@ -1119,6 +1149,10 @@ inline void FixedHyperClockTable::Evict(size_t requested_charge, InsertState&, if (old_clock_pointer >= max_clock_pointer) { return; } + if (IsEvictionEffortExceeded(*data, eviction_effort_cap)) { + eviction_effort_exceeded_count_.FetchAddRelaxed(1); + return; + } // Advance clock pointer (concurrently) old_clock_pointer = clock_pointer_.FetchAddRelaxed(step_size); @@ -1133,10 +1167,11 @@ ClockCacheShard
    ::ClockCacheShard( const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed, const typename Table::Opts& opts) : CacheShardBase(metadata_charge_policy), - table_(capacity, strict_capacity_limit, metadata_charge_policy, allocator, - eviction_callback, hash_seed, opts), + table_(capacity, metadata_charge_policy, allocator, eviction_callback, + hash_seed, opts), capacity_(capacity), - strict_capacity_limit_(strict_capacity_limit) { + eec_and_scl_(SanitizeEncodeEecAndScl(opts.eviction_effort_cap, + strict_capacity_limit)) { // Initial charge metadata should not exceed capacity assert(table_.GetUsage() <= capacity_.LoadRelaxed() || capacity_.LoadRelaxed() < sizeof(HandleImpl)); @@ -1212,7 +1247,11 @@ void ClockCacheShard
    ::SetCapacity(size_t capacity) { template void ClockCacheShard
    ::SetStrictCapacityLimit( bool strict_capacity_limit) { - strict_capacity_limit_.StoreRelaxed(strict_capacity_limit); + if (strict_capacity_limit) { + eec_and_scl_.FetchOrRelaxed(kStrictCapacityLimitBit); + } else { + eec_and_scl_.FetchAndRelaxed(~kStrictCapacityLimitBit); + } // next Insert will take care of any necessary evictions } @@ -1234,7 +1273,7 @@ Status ClockCacheShard
    ::Insert(const Slice& key, proto.total_charge = charge; return table_.template Insert
    (proto, handle, priority, capacity_.LoadRelaxed(), - strict_capacity_limit_.LoadRelaxed()); + eec_and_scl_.LoadRelaxed()); } template @@ -1249,9 +1288,9 @@ typename Table::HandleImpl* ClockCacheShard
    ::CreateStandalone( proto.value = obj; proto.helper = helper; proto.total_charge = charge; - return table_.template CreateStandalone
    ( - proto, capacity_.LoadRelaxed(), strict_capacity_limit_.LoadRelaxed(), - allow_uncharged); + return table_.template CreateStandalone
    (proto, capacity_.LoadRelaxed(), + eec_and_scl_.LoadRelaxed(), + allow_uncharged); } template @@ -1503,14 +1542,20 @@ void BaseHyperClockCache
    ::ReportProblems( const std::shared_ptr& info_log) const { if (info_log->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) { LoadVarianceStats slot_stats; + uint64_t eviction_effort_exceeded_count = 0; this->ForEachShard([&](const BaseHyperClockCache
    ::Shard* shard) { size_t count = shard->GetTableAddressCount(); for (size_t i = 0; i < count; ++i) { slot_stats.Add(IsSlotOccupied(*shard->GetTable().HandlePtr(i))); } + eviction_effort_exceeded_count += + shard->GetTable().GetEvictionEffortExceededCount(); }); ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL, "Slot occupancy stats: %s", slot_stats.Report().c_str()); + ROCKS_LOG_AT_LEVEL(info_log, InfoLogLevel::DEBUG_LEVEL, + "Eviction effort exceeded: %" PRIu64, + eviction_effort_exceeded_count); } } @@ -1908,8 +1953,7 @@ class AutoHyperClockTable::ChainRewriteLock { }; AutoHyperClockTable::AutoHyperClockTable( - size_t capacity, bool /*strict_capacity_limit*/, - CacheMetadataChargePolicy metadata_charge_policy, + size_t capacity, CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, const Cache::EvictionCallback* eviction_callback, const uint32_t* hash_seed, const Opts& opts) @@ -2590,7 +2634,8 @@ using ClockUpdateChainLockedOpData = template void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, ChainRewriteLock& rewrite_lock, - size_t home) { + size_t home, + BaseClockTable::EvictionData* data) { constexpr bool kIsPurge = std::is_same_v; constexpr bool kIsClockUpdateChain = std::is_same_v; @@ -2632,7 +2677,7 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, assert(home == BottomNBits(h->hashed_key[1], home_shift)); if constexpr (kIsClockUpdateChain) { // Clock update and/or check for purgeable (under (de)construction) - if (ClockUpdate(*h, &purgeable)) { + if (ClockUpdate(*h, data, &purgeable)) { // Remember for finishing eviction op_data->push_back(h); // Entries for eviction become purgeable @@ -2642,6 +2687,7 @@ void AutoHyperClockTable::PurgeImplLocked(OpData* op_data, } } else { (void)op_data; + (void)data; purgeable = ((h->meta.Load() >> ClockHandle::kStateShift) & ClockHandle::kStateShareableBit) == 0; } @@ -2719,7 +2765,8 @@ using PurgeOpData = const UniqueId64x2; using ClockUpdateChainOpData = ClockUpdateChainLockedOpData; template -void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { +void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home, + BaseClockTable::EvictionData* data) { // Early efforts to make AutoHCC fully wait-free ran into too many problems // that needed obscure and potentially inefficient work-arounds to have a // chance at working. @@ -2800,9 +2847,9 @@ void AutoHyperClockTable::PurgeImpl(OpData* op_data, size_t home) { if (!rewrite_lock.IsEnd()) { if constexpr (kIsPurge) { PurgeLockedOpData* locked_op_data{}; - PurgeImplLocked(locked_op_data, rewrite_lock, home); + PurgeImplLocked(locked_op_data, rewrite_lock, home, data); } else { - PurgeImplLocked(op_data, rewrite_lock, home); + PurgeImplLocked(op_data, rewrite_lock, home, data); } } } @@ -3405,7 +3452,8 @@ void AutoHyperClockTable::EraseUnRefEntries() { } void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, - EvictionData* data) { + EvictionData* data, + uint32_t eviction_effort_cap) { // precondition assert(requested_charge > 0); @@ -3463,12 +3511,12 @@ void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, if (home >= used_length) { break; } - PurgeImpl(&to_finish_eviction, home); + PurgeImpl(&to_finish_eviction, home, data); } } for (HandleImpl* h : to_finish_eviction) { - TrackAndReleaseEvictedEntry(h, data); + TrackAndReleaseEvictedEntry(h); // NOTE: setting likely_empty_slot here can cause us to reduce the // portion of "at home" entries, probably because an evicted entry // is more likely to come back than a random new entry and would be @@ -3496,6 +3544,11 @@ void AutoHyperClockTable::Evict(size_t requested_charge, InsertState& state, if (old_clock_pointer + step_size >= max_clock_pointer) { return; } + + if (IsEvictionEffortExceeded(*data, eviction_effort_cap)) { + eviction_effort_exceeded_count_.FetchAddRelaxed(1); + return; + } } } diff --git a/cache/clock_cache.h b/cache/clock_cache.h index 3086e7e97..54d656021 100644 --- a/cache/clock_cache.h +++ b/cache/clock_cache.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -374,6 +375,14 @@ struct ClockHandle : public ClockHandleBasicData { class BaseClockTable { public: + struct BaseOpts { + explicit BaseOpts(int _eviction_effort_cap) + : eviction_effort_cap(_eviction_effort_cap) {} + explicit BaseOpts(const HyperClockCacheOptions& opts) + : BaseOpts(opts.eviction_effort_cap) {} + int eviction_effort_cap; + }; + BaseClockTable(CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, const Cache::EvictionCallback* eviction_callback, @@ -386,13 +395,13 @@ class BaseClockTable { template typename Table::HandleImpl* CreateStandalone(ClockHandleBasicData& proto, size_t capacity, - bool strict_capacity_limit, + uint32_t eec_and_scl, bool allow_uncharged); template Status Insert(const ClockHandleBasicData& proto, typename Table::HandleImpl** handle, Cache::Priority priority, - size_t capacity, bool strict_capacity_limit); + size_t capacity, uint32_t eec_and_scl); void Ref(ClockHandle& handle); @@ -406,12 +415,17 @@ class BaseClockTable { uint64_t GetYieldCount() const { return yield_count_.LoadRelaxed(); } + uint64_t GetEvictionEffortExceededCount() const { + return eviction_effort_exceeded_count_.LoadRelaxed(); + } + struct EvictionData { size_t freed_charge = 0; size_t freed_count = 0; + size_t seen_pinned_count = 0; }; - void TrackAndReleaseEvictedEntry(ClockHandle* h, EvictionData* data); + void TrackAndReleaseEvictedEntry(ClockHandle* h); #ifndef NDEBUG // Acquire N references @@ -436,6 +450,7 @@ class BaseClockTable { template Status ChargeUsageMaybeEvictStrict(size_t total_charge, size_t capacity, bool need_evict_for_occupancy, + uint32_t eviction_effort_cap, typename Table::InsertState& state); // Helper for updating `usage_` for new entry with given `total_charge` @@ -449,6 +464,7 @@ class BaseClockTable { template bool ChargeUsageMaybeEvictNonStrict(size_t total_charge, size_t capacity, bool need_evict_for_occupancy, + uint32_t eviction_effort_cap, typename Table::InsertState& state); protected: // data @@ -461,9 +477,15 @@ class BaseClockTable { RelaxedAtomic clock_pointer_{}; // Counter for number of times we yield to wait on another thread. + // It is normal for this to occur rarely in normal operation. // (Relaxed: a simple stat counter.) RelaxedAtomic yield_count_{}; + // Counter for number of times eviction effort cap is exceeded. + // It is normal for this to occur rarely in normal operation. + // (Relaxed: a simple stat counter.) + RelaxedAtomic eviction_effort_exceeded_count_{}; + // TODO: is this separation needed if we don't do background evictions? ALIGN_AS(CACHE_LINE_SIZE) // Number of elements in the table. @@ -517,17 +539,19 @@ class FixedHyperClockTable : public BaseClockTable { inline void SetStandalone() { standalone = true; } }; // struct HandleImpl - struct Opts { - explicit Opts(size_t _estimated_value_size) - : estimated_value_size(_estimated_value_size) {} - explicit Opts(const HyperClockCacheOptions& opts) { + struct Opts : public BaseOpts { + explicit Opts(size_t _estimated_value_size, int _eviction_effort_cap) + : BaseOpts(_eviction_effort_cap), + estimated_value_size(_estimated_value_size) {} + explicit Opts(const HyperClockCacheOptions& opts) + : BaseOpts(opts.eviction_effort_cap) { assert(opts.estimated_entry_charge > 0); estimated_value_size = opts.estimated_entry_charge; } size_t estimated_value_size; }; - FixedHyperClockTable(size_t capacity, bool strict_capacity_limit, + FixedHyperClockTable(size_t capacity, CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, const Cache::EvictionCallback* eviction_callback, @@ -549,7 +573,8 @@ class FixedHyperClockTable : public BaseClockTable { // Runs the clock eviction algorithm trying to reclaim at least // requested_charge. Returns how much is evicted, which could be less // if it appears impossible to evict the requested amount without blocking. - void Evict(size_t requested_charge, InsertState& state, EvictionData* data); + void Evict(size_t requested_charge, InsertState& state, EvictionData* data, + uint32_t eviction_effort_cap); HandleImpl* Lookup(const UniqueId64x2& hashed_key); @@ -803,18 +828,20 @@ class AutoHyperClockTable : public BaseClockTable { } }; // struct HandleImpl - struct Opts { - explicit Opts(size_t _min_avg_value_size) - : min_avg_value_size(_min_avg_value_size) {} + struct Opts : public BaseOpts { + explicit Opts(size_t _min_avg_value_size, int _eviction_effort_cap) + : BaseOpts(_eviction_effort_cap), + min_avg_value_size(_min_avg_value_size) {} - explicit Opts(const HyperClockCacheOptions& opts) { + explicit Opts(const HyperClockCacheOptions& opts) + : BaseOpts(opts.eviction_effort_cap) { assert(opts.estimated_entry_charge == 0); min_avg_value_size = opts.min_avg_entry_charge; } size_t min_avg_value_size; }; - AutoHyperClockTable(size_t capacity, bool strict_capacity_limit, + AutoHyperClockTable(size_t capacity, CacheMetadataChargePolicy metadata_charge_policy, MemoryAllocator* allocator, const Cache::EvictionCallback* eviction_callback, @@ -841,7 +868,8 @@ class AutoHyperClockTable : public BaseClockTable { // Runs the clock eviction algorithm trying to reclaim at least // requested_charge. Returns how much is evicted, which could be less // if it appears impossible to evict the requested amount without blocking. - void Evict(size_t requested_charge, InsertState& state, EvictionData* data); + void Evict(size_t requested_charge, InsertState& state, EvictionData* data, + uint32_t eviction_effort_cap); HandleImpl* Lookup(const UniqueId64x2& hashed_key); @@ -906,7 +934,8 @@ class AutoHyperClockTable : public BaseClockTable { // with proper handling to ensure all existing data is seen even in the // presence of concurrent insertions, etc. (See implementation.) template - void PurgeImpl(OpData* op_data, size_t home = SIZE_MAX); + void PurgeImpl(OpData* op_data, size_t home = SIZE_MAX, + EvictionData* data = nullptr); // An RAII wrapper for locking a chain of entries for removals. See // implementation. @@ -916,7 +945,7 @@ class AutoHyperClockTable : public BaseClockTable { // implementation. template void PurgeImplLocked(OpData* op_data, ChainRewriteLock& rewrite_lock, - size_t home); + size_t home, EvictionData* data); // Update length_info_ as much as possible without waiting, given a known // usable (ready for inserts and lookups) grow_home. (Previous grow_homes @@ -1078,9 +1107,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShardBase { // (Relaxed: eventual consistency/update is OK) RelaxedAtomic capacity_; - // Whether to reject insertion if cache reaches its full capacity. + // Encodes eviction_effort_cap (bottom 31 bits) and strict_capacity_limit + // (top bit). See HyperClockCacheOptions::eviction_effort_cap etc. // (Relaxed: eventual consistency/update is OK) - RelaxedAtomic strict_capacity_limit_; + RelaxedAtomic eec_and_scl_; }; // class ClockCacheShard template diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index d72680b84..79f40868a 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -992,6 +992,8 @@ class CompressedSecCacheTestWithTiered /*_capacity=*/0, /*_estimated_entry_charge=*/256 << 10, /*_num_shard_bits=*/0); + // eviction_effort_cap setting simply to avoid churn in existing test + hcc_opts.eviction_effort_cap = 100; TieredCacheOptions opts; lru_opts.capacity = 0; lru_opts.num_shard_bits = 0; diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 27fd5cc85..91b1d02c1 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -389,12 +389,13 @@ class ClockCacheTest : public testing::Test { } } - void NewShard(size_t capacity, bool strict_capacity_limit = true) { + void NewShard(size_t capacity, bool strict_capacity_limit = true, + int eviction_effort_cap = 30) { DeleteShard(); shard_ = reinterpret_cast(port::cacheline_aligned_alloc(sizeof(Shard))); - TableOpts opts{1 /*value_size*/}; + TableOpts opts{1 /*value_size*/, eviction_effort_cap}; new (shard_) Shard(capacity, strict_capacity_limit, kDontChargeCacheMetadata, /*allocator*/ nullptr, &eviction_callback_, &hash_seed_, opts); @@ -445,12 +446,20 @@ class ClockCacheTest : public testing::Test { return Slice(reinterpret_cast(&hashed_key), 16U); } + // A bad hash function for testing / stressing collision handling static inline UniqueId64x2 TestHashedKey(char key) { // For testing hash near-collision behavior, put the variance in // hashed_key in bits that are unlikely to be used as hash bits. return {(static_cast(key) << 56) + 1234U, 5678U}; } + // A reasonable hash function, for testing "typical behavior" etc. + template + static inline UniqueId64x2 CheapHash(T i) { + return {static_cast(i) * uint64_t{0x85EBCA77C2B2AE63}, + static_cast(i) * uint64_t{0xC2B2AE3D27D4EB4F}}; + } + Shard* shard_ = nullptr; private: @@ -683,6 +692,53 @@ TYPED_TEST(ClockCacheTest, ClockEvictionTest) { } } +TYPED_TEST(ClockCacheTest, ClockEvictionEffortCapTest) { + using HandleImpl = typename ClockCacheTest::Shard::HandleImpl; + for (bool strict_capacity_limit : {true, false}) { + SCOPED_TRACE("strict_capacity_limit = " + + std::to_string(strict_capacity_limit)); + for (int eec : {-42, 0, 1, 10, 100, 1000}) { + SCOPED_TRACE("eviction_effort_cap = " + std::to_string(eec)); + constexpr size_t kCapacity = 1000; + // Start with much larger capacity to ensure that we can go way over + // capacity without reaching table occupancy limit. + this->NewShard(3 * kCapacity, strict_capacity_limit, eec); + auto& shard = *this->shard_; + shard.SetCapacity(kCapacity); + + // Nearly fill the cache with pinned entries, then add a bunch of + // non-pinned entries. eviction_effort_cap should affect how many + // evictable entries are present beyond the cache capacity, despite + // being evictable. + constexpr size_t kCount = kCapacity - 1; + std::unique_ptr ha { new HandleImpl* [kCount] {} }; + for (size_t i = 0; i < 2 * kCount; ++i) { + UniqueId64x2 hkey = this->CheapHash(i); + ASSERT_OK(shard.Insert( + this->TestKey(hkey), hkey, nullptr /*value*/, &kNoopCacheItemHelper, + 1 /*charge*/, i < kCount ? &ha[i] : nullptr, Cache::Priority::LOW)); + } + + if (strict_capacity_limit) { + // If strict_capacity_limit is enabled, the cache will never exceed its + // capacity + EXPECT_EQ(shard.GetOccupancyCount(), kCapacity); + } else { + // Rough inverse relationship between cap and possible memory + // explosion, which shows up as increased table occupancy count. + int effective_eec = std::max(int{1}, eec) + 1; + EXPECT_NEAR(shard.GetOccupancyCount() * 1.0, + kCount * (1 + 1.4 / effective_eec), + kCount * (0.6 / effective_eec) + 1.0); + } + + for (size_t i = 0; i < kCount; ++i) { + shard.Release(ha[i]); + } + } + } +} + namespace { struct DeleteCounter { int deleted = 0; diff --git a/include/rocksdb/cache.h b/include/rocksdb/cache.h index 43a422049..2a358504e 100644 --- a/include/rocksdb/cache.h +++ b/include/rocksdb/cache.h @@ -380,9 +380,6 @@ inline std::shared_ptr NewCompressedSecondaryCache( // to find the appropriate balance automatically. // * Cache priorities are less aggressively enforced, which could cause // cache dilution from long range scans (unless they use fill_cache=false). -// * Can be worse for small caches, because if almost all of a cache shard is -// pinned (more likely with non-partitioned filters), then CLOCK eviction -// becomes very CPU intensive. // // See internal cache/clock_cache.h for full description. struct HyperClockCacheOptions : public ShardedCacheOptions { @@ -441,6 +438,43 @@ struct HyperClockCacheOptions : public ShardedCacheOptions { // load factor for efficient Lookup, Insert, etc. size_t min_avg_entry_charge = 450; + // A tuning parameter to cap eviction CPU usage in a "thrashing" situation + // by allowing the memory capacity to be exceeded slightly as needed. The + // default setting should offer balanced protection against excessive CPU + // and memory usage under extreme stress conditions, with no effect on + // normal operation. Such stress conditions are proportionally more likely + // with small caches (10s of MB or less) vs. large caches (GB-scale). + // (NOTE: With the unusual setting of strict_capacity_limit=true, this + // parameter is ignored.) + // + // BACKGROUND: Without some kind of limiter, inserting into a CLOCK-based + // cache with no evictable entries (all "pinned") requires scanning the + // entire cache to determine that nothing can be evicted. (By contrast, + // LRU caches can determine no entries are evictable in O(1) time, but + // require more synchronization/coordination on that eviction metadata.) + // This aspect of a CLOCK cache can make a stressed situation worse by + // bogging down the CPU with repeated scans of the cache. And with + // strict_capacity_limit=false (normal setting), finding something evictable + // doesn't change the outcome of insertion: the entry is inserted anyway + // and the cache is allowed to exceed its target capacity if necessary. + // + // SOLUTION: Eviction is aborted upon seeing some number of pinned + // entries before evicting anything, or if the ratio of pinned to evicted + // is too high. This setting `eviction_effort_cap` essentially controls both + // that allowed initial number of pinned entries and the maximum allowed + // ratio. As the pinned size approaches the target cache capacity, roughly + // 1/eviction_effort_cap additional portion of the capacity might be kept + // in memory and evictable in order to keep CLOCK eviction reasonably + // performant. Under the default setting and high stress conditions, this + // memory overhead is around 3-5%. Under normal or even moderate stress + // conditions, the memory overhead is negligible to zero. + // + // A large value like 1000 offers some protection with essentially no + // memory overhead, while the minimum value of 1 could be useful for a + // small cache where roughly doubling in size under stress could be OK to + // keep operations very fast. + int eviction_effort_cap = 30; + HyperClockCacheOptions( size_t _capacity, size_t _estimated_entry_charge, int _num_shard_bits = -1, bool _strict_capacity_limit = false, diff --git a/unreleased_history/behavior_changes/eviction_effort_cap.md b/unreleased_history/behavior_changes/eviction_effort_cap.md new file mode 100644 index 000000000..7772141e1 --- /dev/null +++ b/unreleased_history/behavior_changes/eviction_effort_cap.md @@ -0,0 +1 @@ +HyperClockCache now has built-in protection against excessive CPU consumption under the extreme stress condition of no (or very few) evictable cache entries, which can slightly increase memory usage such conditions. New option `HyperClockCacheOptions::eviction_effort_cap` controls the space-time trade-off of the response. The default should be generally well-balanced, with no measurable affect on normal operation. From cc069f25b31be9be30a8831910df983c74c82b21 Mon Sep 17 00:00:00 2001 From: anand76 Date: Fri, 15 Dec 2023 11:34:08 -0800 Subject: [PATCH 336/386] Add some compressed and tiered secondary cache stats (#12150) Summary: Add statistics for more visibility. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12150 Reviewed By: akankshamahajan15 Differential Revision: D52184633 Pulled By: anand1976 fbshipit-source-id: 9969e05d65223811cd12627102b020bb6d229352 --- cache/compressed_secondary_cache.cc | 4 ++- cache/compressed_secondary_cache.h | 2 +- cache/compressed_secondary_cache_test.cc | 26 ++++++++++---------- cache/lru_cache_test.cc | 3 ++- cache/secondary_cache_adapter.cc | 11 +++++---- cache/tiered_secondary_cache.cc | 18 ++++++++++---- cache/tiered_secondary_cache.h | 3 ++- cache/tiered_secondary_cache_test.cc | 2 +- db/blob/blob_source_test.cc | 4 +-- include/rocksdb/secondary_cache.h | 6 ++--- include/rocksdb/statistics.h | 6 +++++ java/rocksjni/portal.h | 20 +++++++++++++++ monitoring/statistics.cc | 8 ++++++ options/customizable_test.cc | 3 ++- utilities/fault_injection_secondary_cache.cc | 8 +++--- utilities/fault_injection_secondary_cache.h | 2 +- 16 files changed, 88 insertions(+), 38 deletions(-) diff --git a/cache/compressed_secondary_cache.cc b/cache/compressed_secondary_cache.cc index b29670b77..6c19e7921 100644 --- a/cache/compressed_secondary_cache.cc +++ b/cache/compressed_secondary_cache.cc @@ -31,7 +31,7 @@ CompressedSecondaryCache::~CompressedSecondaryCache() {} std::unique_ptr CompressedSecondaryCache::Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase, - bool& kept_in_sec_cache) { + Statistics* stats, bool& kept_in_sec_cache) { assert(helper); // This is a minor optimization. Its ok to skip it in TSAN in order to // avoid a false positive. @@ -51,6 +51,7 @@ std::unique_ptr CompressedSecondaryCache::Lookup( void* handle_value = cache_->Value(lru_handle); if (handle_value == nullptr) { cache_->Release(lru_handle, /*erase_if_last_ref=*/false); + RecordTick(stats, COMPRESSED_SECONDARY_CACHE_DUMMY_HITS); return nullptr; } @@ -137,6 +138,7 @@ std::unique_ptr CompressedSecondaryCache::Lookup( cache_->Release(lru_handle, /*erase_if_last_ref=*/false); } handle.reset(new CompressedSecondaryCacheResultHandle(value, charge)); + RecordTick(stats, COMPRESSED_SECONDARY_CACHE_HITS); return handle; } diff --git a/cache/compressed_secondary_cache.h b/cache/compressed_secondary_cache.h index 32e6fd0df..90e134fcf 100644 --- a/cache/compressed_secondary_cache.h +++ b/cache/compressed_secondary_cache.h @@ -86,7 +86,7 @@ class CompressedSecondaryCache : public SecondaryCache { std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool /*wait*/, bool advise_erase, - bool& kept_in_sec_cache) override; + Statistics* stats, bool& kept_in_sec_cache) override; bool SupportForceErase() const override { return true; } diff --git a/cache/compressed_secondary_cache_test.cc b/cache/compressed_secondary_cache_test.cc index 79f40868a..ac1786f16 100644 --- a/cache/compressed_secondary_cache_test.cc +++ b/cache/compressed_secondary_cache_test.cc @@ -44,7 +44,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, // Lookup an non-existent key. std::unique_ptr handle0 = sec_cache->Lookup(key0, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle0, nullptr); Random rnd(301); @@ -59,7 +59,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, std::unique_ptr handle1_1 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle1_1, nullptr); // Insert and Lookup the item k1 for the second time and advise erasing it. @@ -68,7 +68,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, std::unique_ptr handle1_2 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_NE(handle1_2, nullptr); ASSERT_FALSE(kept_in_sec_cache); if (sec_cache_is_compressed) { @@ -89,7 +89,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, // Lookup the item k1 again. std::unique_ptr handle1_3 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle1_3, nullptr); // Insert and Lookup the item k2. @@ -99,7 +99,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_dummy_count, 2); std::unique_ptr handle2_1 = sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle2_1, nullptr); ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper(), false)); @@ -115,7 +115,7 @@ class CompressedSecondaryCacheTestBase : public testing::Test, } std::unique_ptr handle2_2 = sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_NE(handle2_2, nullptr); std::unique_ptr val2 = std::unique_ptr(static_cast(handle2_2->Value())); @@ -196,14 +196,14 @@ class CompressedSecondaryCacheTestBase : public testing::Test, bool kept_in_sec_cache{false}; std::unique_ptr handle1 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle1, nullptr); // Insert k2 and k1 is evicted. ASSERT_OK(sec_cache->Insert(key2, &item2, GetHelper(), false)); std::unique_ptr handle2 = sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_NE(handle2, nullptr); std::unique_ptr val2 = std::unique_ptr(static_cast(handle2->Value())); @@ -215,14 +215,14 @@ class CompressedSecondaryCacheTestBase : public testing::Test, std::unique_ptr handle1_1 = sec_cache->Lookup(key1, GetHelper(), this, true, /*advise_erase=*/false, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle1_1, nullptr); // Create Fails. SetFailCreate(true); std::unique_ptr handle2_1 = sec_cache->Lookup(key2, GetHelper(), this, true, /*advise_erase=*/true, - kept_in_sec_cache); + /*stats=*/nullptr, kept_in_sec_cache); ASSERT_EQ(handle2_1, nullptr); // Save Fails. @@ -912,9 +912,9 @@ TEST_P(CompressedSecondaryCacheTestWithCompressionParam, EntryRoles) { ASSERT_EQ(get_perf_context()->compressed_sec_cache_insert_real_count, 1U); bool kept_in_sec_cache{true}; - std::unique_ptr handle = - sec_cache->Lookup(ith_key, GetHelper(role), this, true, - /*advise_erase=*/true, kept_in_sec_cache); + std::unique_ptr handle = sec_cache->Lookup( + ith_key, GetHelper(role), this, true, + /*advise_erase=*/true, /*stats=*/nullptr, kept_in_sec_cache); ASSERT_NE(handle, nullptr); // Lookup returns the right data diff --git a/cache/lru_cache_test.cc b/cache/lru_cache_test.cc index 91b1d02c1..9f70a54cf 100644 --- a/cache/lru_cache_test.cc +++ b/cache/lru_cache_test.cc @@ -1091,7 +1091,8 @@ class TestSecondaryCache : public SecondaryCache { std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool /*wait*/, - bool /*advise_erase*/, bool& kept_in_sec_cache) override { + bool /*advise_erase*/, Statistics* /*stats*/, + bool& kept_in_sec_cache) override { std::string key_str = key.ToString(); TEST_SYNC_POINT_CALLBACK("TestSecondaryCache::Lookup", &key_str); diff --git a/cache/secondary_cache_adapter.cc b/cache/secondary_cache_adapter.cc index dce18390e..6261b8ce6 100644 --- a/cache/secondary_cache_adapter.cc +++ b/cache/secondary_cache_adapter.cc @@ -294,7 +294,8 @@ Cache::Handle* CacheWithSecondaryAdapter::Lookup(const Slice& key, bool kept_in_sec_cache = false; std::unique_ptr secondary_handle = secondary_cache_->Lookup(key, helper, create_context, /*wait*/ true, - found_dummy_entry, /*out*/ kept_in_sec_cache); + found_dummy_entry, stats, + /*out*/ kept_in_sec_cache); if (secondary_handle) { result = Promote(std::move(secondary_handle), key, helper, priority, stats, found_dummy_entry, kept_in_sec_cache); @@ -348,10 +349,10 @@ void CacheWithSecondaryAdapter::StartAsyncLookupOnMySecondary( assert(async_handle.result_handle == nullptr); std::unique_ptr secondary_handle = - secondary_cache_->Lookup(async_handle.key, async_handle.helper, - async_handle.create_context, /*wait*/ false, - async_handle.found_dummy_entry, - /*out*/ async_handle.kept_in_sec_cache); + secondary_cache_->Lookup( + async_handle.key, async_handle.helper, async_handle.create_context, + /*wait*/ false, async_handle.found_dummy_entry, async_handle.stats, + /*out*/ async_handle.kept_in_sec_cache); if (secondary_handle) { // TODO with stacked secondaries: Check & process if already ready? async_handle.pending_handle = secondary_handle.release(); diff --git a/cache/tiered_secondary_cache.cc b/cache/tiered_secondary_cache.cc index 1a1201a4d..f7d5dd91d 100644 --- a/cache/tiered_secondary_cache.cc +++ b/cache/tiered_secondary_cache.cc @@ -5,6 +5,8 @@ #include "cache/tiered_secondary_cache.h" +#include "monitoring/statistics_impl.h" + namespace ROCKSDB_NAMESPACE { // Creation callback for use in the lookup path. It calls the upper layer @@ -29,6 +31,9 @@ Status TieredSecondaryCache::MaybeInsertAndCreate( // TODO: Don't hardcode the source context->comp_sec_cache->InsertSaved(*context->key, data, type, source) .PermitUncheckedError(); + RecordTick(context->stats, COMPRESSED_SECONDARY_CACHE_PROMOTIONS); + } else { + RecordTick(context->stats, COMPRESSED_SECONDARY_CACHE_PROMOTION_SKIPS); } // Primary cache will accept the object, so call its helper to create // the object @@ -43,10 +48,10 @@ Status TieredSecondaryCache::MaybeInsertAndCreate( std::unique_ptr TieredSecondaryCache::Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, - bool& kept_in_sec_cache) { + Statistics* stats, bool& kept_in_sec_cache) { bool dummy = false; std::unique_ptr result = - target()->Lookup(key, helper, create_context, wait, advise_erase, + target()->Lookup(key, helper, create_context, wait, advise_erase, stats, /*kept_in_sec_cache=*/dummy); // We never want the item to spill back into the secondary cache kept_in_sec_cache = true; @@ -66,9 +71,10 @@ std::unique_ptr TieredSecondaryCache::Lookup( ctx.helper = helper; ctx.inner_ctx = create_context; ctx.comp_sec_cache = target(); + ctx.stats = stats; return nvm_sec_cache_->Lookup(key, outer_helper, &ctx, wait, advise_erase, - kept_in_sec_cache); + stats, kept_in_sec_cache); } // If wait is false, i.e its an async lookup, we have to allocate a result @@ -80,8 +86,10 @@ std::unique_ptr TieredSecondaryCache::Lookup( handle->ctx()->helper = helper; handle->ctx()->inner_ctx = create_context; handle->ctx()->comp_sec_cache = target(); - handle->SetInnerHandle(nvm_sec_cache_->Lookup( - key, outer_helper, handle->ctx(), wait, advise_erase, kept_in_sec_cache)); + handle->ctx()->stats = stats; + handle->SetInnerHandle( + nvm_sec_cache_->Lookup(key, outer_helper, handle->ctx(), wait, + advise_erase, stats, kept_in_sec_cache)); if (!handle->inner_handle()) { handle.reset(); } else { diff --git a/cache/tiered_secondary_cache.h b/cache/tiered_secondary_cache.h index 46e3eb084..80542ba49 100644 --- a/cache/tiered_secondary_cache.h +++ b/cache/tiered_secondary_cache.h @@ -59,7 +59,7 @@ class TieredSecondaryCache : public SecondaryCacheWrapper { virtual std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, - bool& kept_in_sec_cache) override; + Statistics* stats, bool& kept_in_sec_cache) override; virtual void WaitAll( std::vector handles) override; @@ -72,6 +72,7 @@ class TieredSecondaryCache : public SecondaryCacheWrapper { Cache::CreateContext* inner_ctx; std::shared_ptr inner_handle; SecondaryCache* comp_sec_cache; + Statistics* stats; }; class ResultHandle : public SecondaryCacheResultHandle { diff --git a/cache/tiered_secondary_cache_test.cc b/cache/tiered_secondary_cache_test.cc index d641254df..28a393325 100644 --- a/cache/tiered_secondary_cache_test.cc +++ b/cache/tiered_secondary_cache_test.cc @@ -62,7 +62,7 @@ class TestSecondaryCache : public SecondaryCache { std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool /*advise_erase*/, - bool& kept_in_sec_cache) override { + Statistics* /*stats*/, bool& kept_in_sec_cache) override { std::string key_str = key.ToString(); TEST_SYNC_POINT_CALLBACK("TestSecondaryCache::Lookup", &key_str); diff --git a/db/blob/blob_source_test.cc b/db/blob/blob_source_test.cc index c0e1aba6e..258d2da5e 100644 --- a/db/blob/blob_source_test.cc +++ b/db/blob/blob_source_test.cc @@ -1220,7 +1220,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) { auto sec_handle0 = secondary_cache->Lookup( key0, BlobSource::SharedCacheInterface::GetFullHelper(), /*context*/ nullptr, true, - /*advise_erase=*/true, kept_in_sec_cache); + /*advise_erase=*/true, /*stats=*/nullptr, kept_in_sec_cache); ASSERT_FALSE(kept_in_sec_cache); ASSERT_NE(sec_handle0, nullptr); ASSERT_TRUE(sec_handle0->IsReady()); @@ -1248,7 +1248,7 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) { auto sec_handle1 = secondary_cache->Lookup( key1, BlobSource::SharedCacheInterface::GetFullHelper(), /*context*/ nullptr, true, - /*advise_erase=*/true, kept_in_sec_cache); + /*advise_erase=*/true, /*stats=*/nullptr, kept_in_sec_cache); ASSERT_FALSE(kept_in_sec_cache); ASSERT_EQ(sec_handle1, nullptr); diff --git a/include/rocksdb/secondary_cache.h b/include/rocksdb/secondary_cache.h index 49792ca67..b0419b121 100644 --- a/include/rocksdb/secondary_cache.h +++ b/include/rocksdb/secondary_cache.h @@ -114,7 +114,7 @@ class SecondaryCache : public Customizable { virtual std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, - bool& kept_in_sec_cache) = 0; + Statistics* stats, bool& kept_in_sec_cache) = 0; // Indicate whether a handle can be erased in this secondary cache. [[nodiscard]] virtual bool SupportForceErase() const = 0; @@ -176,9 +176,9 @@ class SecondaryCacheWrapper : public SecondaryCache { virtual std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, - bool& kept_in_sec_cache) override { + Statistics* stats, bool& kept_in_sec_cache) override { return target()->Lookup(key, helper, create_context, wait, advise_erase, - kept_in_sec_cache); + stats, kept_in_sec_cache); } virtual bool SupportForceErase() const override { diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index ecddf5c7a..9aab33712 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -531,6 +531,12 @@ enum Tickers : uint32_t { // Number of FS reads avoided due to scan prefetching PREFETCH_HITS, + // Compressed secondary cache related stats + COMPRESSED_SECONDARY_CACHE_DUMMY_HITS, + COMPRESSED_SECONDARY_CACHE_HITS, + COMPRESSED_SECONDARY_CACHE_PROMOTIONS, + COMPRESSED_SECONDARY_CACHE_PROMOTION_SKIPS, + TICKER_ENUM_MAX }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 840956dae..45d0c184c 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5175,6 +5175,15 @@ class TickerTypeJni { return -0x41; case ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS: return -0x42; + case ROCKSDB_NAMESPACE::Tickers::COMPRESSED_SECONDARY_CACHE_DUMMY_HITS: + return -0x43; + case ROCKSDB_NAMESPACE::Tickers::COMPRESSED_SECONDARY_CACHE_HITS: + return -0x44; + case ROCKSDB_NAMESPACE::Tickers::COMPRESSED_SECONDARY_CACHE_PROMOTIONS: + return -0x45; + case ROCKSDB_NAMESPACE::Tickers:: + COMPRESSED_SECONDARY_CACHE_PROMOTION_SKIPS: + return -0x46; case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep @@ -5550,6 +5559,17 @@ class TickerTypeJni { return ROCKSDB_NAMESPACE::Tickers::PREFETCH_BYTES_USEFUL; case -0x42: return ROCKSDB_NAMESPACE::Tickers::PREFETCH_HITS; + case -0x43: + return ROCKSDB_NAMESPACE::Tickers:: + COMPRESSED_SECONDARY_CACHE_DUMMY_HITS; + case -0x44: + return ROCKSDB_NAMESPACE::Tickers::COMPRESSED_SECONDARY_CACHE_HITS; + case -0x45: + return ROCKSDB_NAMESPACE::Tickers:: + COMPRESSED_SECONDARY_CACHE_PROMOTIONS; + case -0x46: + return ROCKSDB_NAMESPACE::Tickers:: + COMPRESSED_SECONDARY_CACHE_PROMOTION_SKIPS; case 0x5F: // 0x5F was the max value in the initial copy of tickers to Java. // Since these values are exposed directly to Java clients, we keep diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index d0a2a71a8..cc679ec0a 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -264,6 +264,14 @@ const std::vector> TickersNameMap = { {PREFETCH_BYTES, "rocksdb.prefetch.bytes"}, {PREFETCH_BYTES_USEFUL, "rocksdb.prefetch.bytes.useful"}, {PREFETCH_HITS, "rocksdb.prefetch.hits"}, + {COMPRESSED_SECONDARY_CACHE_DUMMY_HITS, + "rocksdb.compressed.secondary.cache.dummy.hits"}, + {COMPRESSED_SECONDARY_CACHE_HITS, + "rocksdb.compressed.secondary.cache.hits"}, + {COMPRESSED_SECONDARY_CACHE_PROMOTIONS, + "rocksdb.compressed.secondary.cache.promotions"}, + {COMPRESSED_SECONDARY_CACHE_PROMOTION_SKIPS, + "rocksdb.compressed.secondary.cache.promotion.skips"}, }; const std::vector> HistogramsNameMap = { diff --git a/options/customizable_test.cc b/options/customizable_test.cc index f59724656..696f1b25e 100644 --- a/options/customizable_test.cc +++ b/options/customizable_test.cc @@ -1241,7 +1241,8 @@ class TestSecondaryCache : public SecondaryCache { std::unique_ptr Lookup( const Slice& /*key*/, const Cache::CacheItemHelper* /*helper*/, Cache::CreateContext* /*create_context*/, bool /*wait*/, - bool /*advise_erase*/, bool& kept_in_sec_cache) override { + bool /*advise_erase*/, Statistics* /*stats*/, + bool& kept_in_sec_cache) override { kept_in_sec_cache = true; return nullptr; } diff --git a/utilities/fault_injection_secondary_cache.cc b/utilities/fault_injection_secondary_cache.cc index c2ea12535..fa93e8244 100644 --- a/utilities/fault_injection_secondary_cache.cc +++ b/utilities/fault_injection_secondary_cache.cc @@ -92,6 +92,7 @@ FaultInjectionSecondaryCache::Lookup(const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, + Statistics* stats, bool& kept_in_sec_cache) { ErrorContext* ctx = GetErrorContext(); if (base_is_compressed_sec_cache_) { @@ -99,11 +100,12 @@ FaultInjectionSecondaryCache::Lookup(const Slice& key, return nullptr; } else { return base_->Lookup(key, helper, create_context, wait, advise_erase, - kept_in_sec_cache); + stats, kept_in_sec_cache); } } else { - std::unique_ptr hdl = base_->Lookup( - key, helper, create_context, wait, advise_erase, kept_in_sec_cache); + std::unique_ptr hdl = + base_->Lookup(key, helper, create_context, wait, advise_erase, stats, + kept_in_sec_cache); if (wait && ctx->rand.OneIn(prob_)) { hdl.reset(); } diff --git a/utilities/fault_injection_secondary_cache.h b/utilities/fault_injection_secondary_cache.h index dd73ac156..502478d40 100644 --- a/utilities/fault_injection_secondary_cache.h +++ b/utilities/fault_injection_secondary_cache.h @@ -43,7 +43,7 @@ class FaultInjectionSecondaryCache : public SecondaryCache { std::unique_ptr Lookup( const Slice& key, const Cache::CacheItemHelper* helper, Cache::CreateContext* create_context, bool wait, bool advise_erase, - bool& kept_in_sec_cache) override; + Statistics* stats, bool& kept_in_sec_cache) override; bool SupportForceErase() const override { return base_->SupportForceErase(); } From 54d628602706c0c718cf81f87202e0b8f6615faf Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 15 Dec 2023 13:01:14 -0800 Subject: [PATCH 337/386] Update HISTORY.md for 8.10 --- HISTORY.md | 38 +++++++++++++++++++ .../behavior_changes/auto_readahead_size.md | 1 - .../behavior_changes/debt_based_speedup.md | 1 - .../behavior_changes/eviction_effort_cap.md | 1 - .../bug_fixes/auto_readahead_size.md | 1 - .../bug_fixes/avoid_destroying_timer.md | 1 - ...ix_bytes_written_ticker_double_counting.md | 1 - .../bug_fixes/fix_stall_counter.md | 1 - ...tiered_cache_low_latency_false_negative.md | 1 - .../async_support_tune_readahead.md | 1 - .../new_features/wbwi_wide_columns.md | 1 - .../java_api_consistency.md | 16 -------- .../public_api_changes/null_collector.md | 1 - 13 files changed, 38 insertions(+), 27 deletions(-) delete mode 100644 unreleased_history/behavior_changes/auto_readahead_size.md delete mode 100644 unreleased_history/behavior_changes/debt_based_speedup.md delete mode 100644 unreleased_history/behavior_changes/eviction_effort_cap.md delete mode 100644 unreleased_history/bug_fixes/auto_readahead_size.md delete mode 100644 unreleased_history/bug_fixes/avoid_destroying_timer.md delete mode 100644 unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md delete mode 100644 unreleased_history/bug_fixes/fix_stall_counter.md delete mode 100644 unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md delete mode 100644 unreleased_history/new_features/async_support_tune_readahead.md delete mode 100644 unreleased_history/new_features/wbwi_wide_columns.md delete mode 100644 unreleased_history/performance_improvements/java_api_consistency.md delete mode 100644 unreleased_history/public_api_changes/null_collector.md diff --git a/HISTORY.md b/HISTORY.md index 47adf0040..0d7c284b2 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,44 @@ # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` +## 8.10.0 (12/15/2023) +### New Features +* Provide support for async_io to trim readahead_size by doing block cache lookup +* Added initial wide-column support in `WriteBatchWithIndex`. This includes the `PutEntity` API and support for wide columns in the existing read APIs (`GetFromBatch`, `GetFromBatchAndDB`, `MultiGetFromBatchAndDB`, and `BaseDeltaIterator`). + +### Public API Changes +* Custom implementations of `TablePropertiesCollectorFactory` may now return a `nullptr` collector to decline processing a file, reducing callback overheads in such cases. + +### Behavior Changes +* Make ReadOptions.auto_readahead_size default true which does prefetching optimizations for forward scans if iterate_upper_bound and block_cache is also specified. +* Compactions can be scheduled in parallel in an additional scenario: high compaction debt relative to the data size +* HyperClockCache now has built-in protection against excessive CPU consumption under the extreme stress condition of no (or very few) evictable cache entries, which can slightly increase memory usage such conditions. New option `HyperClockCacheOptions::eviction_effort_cap` controls the space-time trade-off of the response. The default should be generally well-balanced, with no measurable affect on normal operation. + +### Bug Fixes +* Fix a corner case with auto_readahead_size where Prev Operation returns NOT SUPPORTED error when scans direction is changed from forward to backward. +* Avoid destroying the periodic task scheduler's default timer in order to prevent static destruction order issues. +* Fix double counting of BYTES_WRITTEN ticker when doing writes with transactions. +* Fix a WRITE_STALL counter that was reporting wrong value in few cases. +* A lookup by MultiGet in a TieredCache that goes to the local flash cache and finishes with very low latency, i.e before the subsequent call to WaitAll, is ignored, resulting in a false negative and a memory leak. + +### Performance Improvements +* Java API extensions to improve consistency and completeness of APIs +1 Extended `RocksDB.get([ColumnFamilyHandle columnFamilyHandle,] ReadOptions opt, ByteBuffer key, ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters +2 Extended `RocksDB.put( [ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters +3 Added `RocksDB.merge([ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOptions, ByteBuffer key, ByteBuffer value)` methods with the same parameter options as `put(...)` - direct and indirect buffers are supported +4 Added `RocksIterator.key( byte[] key [, int offset, int len])` methods which retrieve the iterator key into the supplied buffer +5 Added `RocksIterator.value( byte[] value [, int offset, int len])` methods which retrieve the iterator value into the supplied buffer +6 Deprecated `get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, byte[])` in favour of `get(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, byte[])` which has consistent parameter ordering with other methods in the same class +7 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value)` methods which retrieve the requested value into the supplied buffer +8 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which retrieve the requested value into the supplied buffer +9 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer +10 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer +11 Added `Transaction.getIterator()` method as a convenience which defaults the `ReadOptions` value supplied to existing `Transaction.iterator()` methods. This mirrors the existing `RocksDB.iterator()` method. +12 Added `Transaction.put([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written in a `ByteBuffer` parameter +13 Added `Transaction.merge([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter +14 Added `Transaction.mergeUntracked([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter + + ## 8.9.0 (11/17/2023) ### New Features * Add GetEntity() and PutEntity() API implementation for Attribute Group support. Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. diff --git a/unreleased_history/behavior_changes/auto_readahead_size.md b/unreleased_history/behavior_changes/auto_readahead_size.md deleted file mode 100644 index b1c98dd86..000000000 --- a/unreleased_history/behavior_changes/auto_readahead_size.md +++ /dev/null @@ -1 +0,0 @@ -Make ReadOptions.auto_readahead_size default true which does prefetching optimizations for forward scans if iterate_upper_bound and block_cache is also specified. diff --git a/unreleased_history/behavior_changes/debt_based_speedup.md b/unreleased_history/behavior_changes/debt_based_speedup.md deleted file mode 100644 index 8db8b6688..000000000 --- a/unreleased_history/behavior_changes/debt_based_speedup.md +++ /dev/null @@ -1 +0,0 @@ -Compactions can be scheduled in parallel in an additional scenario: high compaction debt relative to the data size diff --git a/unreleased_history/behavior_changes/eviction_effort_cap.md b/unreleased_history/behavior_changes/eviction_effort_cap.md deleted file mode 100644 index 7772141e1..000000000 --- a/unreleased_history/behavior_changes/eviction_effort_cap.md +++ /dev/null @@ -1 +0,0 @@ -HyperClockCache now has built-in protection against excessive CPU consumption under the extreme stress condition of no (or very few) evictable cache entries, which can slightly increase memory usage such conditions. New option `HyperClockCacheOptions::eviction_effort_cap` controls the space-time trade-off of the response. The default should be generally well-balanced, with no measurable affect on normal operation. diff --git a/unreleased_history/bug_fixes/auto_readahead_size.md b/unreleased_history/bug_fixes/auto_readahead_size.md deleted file mode 100644 index e82973771..000000000 --- a/unreleased_history/bug_fixes/auto_readahead_size.md +++ /dev/null @@ -1 +0,0 @@ -Fix a corner case with auto_readahead_size where Prev Operation returns NOT SUPPORTED error when scans direction is changed from forward to backward. diff --git a/unreleased_history/bug_fixes/avoid_destroying_timer.md b/unreleased_history/bug_fixes/avoid_destroying_timer.md deleted file mode 100644 index 2a70b6b31..000000000 --- a/unreleased_history/bug_fixes/avoid_destroying_timer.md +++ /dev/null @@ -1 +0,0 @@ -Avoid destroying the periodic task scheduler's default timer in order to prevent static destruction order issues. diff --git a/unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md b/unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md deleted file mode 100644 index fe6859874..000000000 --- a/unreleased_history/bug_fixes/fix_bytes_written_ticker_double_counting.md +++ /dev/null @@ -1 +0,0 @@ -Fix double counting of BYTES_WRITTEN ticker when doing writes with transactions. diff --git a/unreleased_history/bug_fixes/fix_stall_counter.md b/unreleased_history/bug_fixes/fix_stall_counter.md deleted file mode 100644 index be9e5122f..000000000 --- a/unreleased_history/bug_fixes/fix_stall_counter.md +++ /dev/null @@ -1 +0,0 @@ -Fix a WRITE_STALL counter that was reporting wrong value in few cases. diff --git a/unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md b/unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md deleted file mode 100644 index c271724ad..000000000 --- a/unreleased_history/bug_fixes/tiered_cache_low_latency_false_negative.md +++ /dev/null @@ -1 +0,0 @@ -A lookup by MultiGet in a TieredCache that goes to the local flash cache and finishes with very low latency, i.e before the subsequent call to WaitAll, is ignored, resulting in a false negative and a memory leak. diff --git a/unreleased_history/new_features/async_support_tune_readahead.md b/unreleased_history/new_features/async_support_tune_readahead.md deleted file mode 100644 index 8794a493a..000000000 --- a/unreleased_history/new_features/async_support_tune_readahead.md +++ /dev/null @@ -1 +0,0 @@ -Provide support for async_io to trim readahead_size by doing block cache lookup diff --git a/unreleased_history/new_features/wbwi_wide_columns.md b/unreleased_history/new_features/wbwi_wide_columns.md deleted file mode 100644 index 908279e1c..000000000 --- a/unreleased_history/new_features/wbwi_wide_columns.md +++ /dev/null @@ -1 +0,0 @@ -Added initial wide-column support in `WriteBatchWithIndex`. This includes the `PutEntity` API and support for wide columns in the existing read APIs (`GetFromBatch`, `GetFromBatchAndDB`, `MultiGetFromBatchAndDB`, and `BaseDeltaIterator`). diff --git a/unreleased_history/performance_improvements/java_api_consistency.md b/unreleased_history/performance_improvements/java_api_consistency.md deleted file mode 100644 index 7e29b629c..000000000 --- a/unreleased_history/performance_improvements/java_api_consistency.md +++ /dev/null @@ -1,16 +0,0 @@ -* Java API extensions to improve consistency and completeness of APIs - 1 Extended `RocksDB.get([ColumnFamilyHandle columnFamilyHandle,] ReadOptions opt, ByteBuffer key, ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters - 2 Extended `RocksDB.put( [ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters - 3 Added `RocksDB.merge([ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOptions, ByteBuffer key, ByteBuffer value)` methods with the same parameter options as `put(...)` - direct and indirect buffers are supported - 4 Added `RocksIterator.key( byte[] key [, int offset, int len])` methods which retrieve the iterator key into the supplied buffer - 5 Added `RocksIterator.value( byte[] value [, int offset, int len])` methods which retrieve the iterator value into the supplied buffer - 6 Deprecated `get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, byte[])` in favour of `get(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, byte[])` which has consistent parameter ordering with other methods in the same class - 7 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value)` methods which retrieve the requested value into the supplied buffer - 8 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which retrieve the requested value into the supplied buffer - 9 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer - 10 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer - 11 Added `Transaction.getIterator()` method as a convenience which defaults the `ReadOptions` value supplied to existing `Transaction.iterator()` methods. This mirrors the existing `RocksDB.iterator()` method. - 12 Added `Transaction.put([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written in a `ByteBuffer` parameter - 13 Added `Transaction.merge([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter - 14 Added `Transaction.mergeUntracked([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter - diff --git a/unreleased_history/public_api_changes/null_collector.md b/unreleased_history/public_api_changes/null_collector.md deleted file mode 100644 index 2af478c83..000000000 --- a/unreleased_history/public_api_changes/null_collector.md +++ /dev/null @@ -1 +0,0 @@ -Custom implementations of `TablePropertiesCollectorFactory` may now return a `nullptr` collector to decline processing a file, reducing callback overheads in such cases. From 39abded9c18c93cf9ef3ec831d140f5d107f5f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Wed, 10 Jan 2024 14:34:12 -0800 Subject: [PATCH 338/386] Fix the compactRange with wrong cf handle when ClipColumnFamily (#12219) Summary: - **Context**: In ClipColumnFamily, the DeleteRange API will be used to delete data, and then CompactRange will be called for physical deletion. But now However, the ColumnFamilyHandle is not passed , so by default only the DefaultColumnFamily will be CompactRanged. Therefore, it may cause that the data in some sst files of CompactionRange cannot be physically deleted. - **In this change** Pass the ColumnFamilyHandle when call CompactRange Pull Request resolved: https://github.com/facebook/rocksdb/pull/12219 Reviewed By: ajkr Differential Revision: D52665162 Pulled By: cbi42 fbshipit-source-id: e8e997aa25ec4ca40e347be89edc7e84a7a0edce --- db/db_impl/db_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 297c6aceb..78121f783 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -6215,7 +6215,7 @@ Status DBImpl::ClipColumnFamily(ColumnFamilyHandle* column_family, // last level to compact to and that range tombstones are not dropped // during non-bottommost compactions, calling CompactRange() on these two // ranges may not clear all range tombstones. - status = CompactRange(compact_options, nullptr, nullptr); + status = CompactRange(compact_options, column_family, nullptr, nullptr); } return status; } From ef430fc72407950f94ca2a4fbb2b15de7ae8ff4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Mon, 5 Feb 2024 11:05:57 -0800 Subject: [PATCH 339/386] Fix the problem that wrong Key may be passed when using CompactRange JAVA API (#12328) Summary: When using the Rocksdb Java API. When we use Java code to call `db.compactRange (columnFamilyHandle, start, null)` which means we hope to perform range compaction on keys bigger than **start**. we expected call to the corresponding C++ code : `db->compactRange (columnFamilyHandle, &start, nullptr)` But in reality, what is being called is `db ->compactRange (columnFamilyHandle,start,"")` The problem here is the `null` in Java are not converted to `nullptr`, but rather to `""`, which may result in some unexpected results Pull Request resolved: https://github.com/facebook/rocksdb/pull/12328 Reviewed By: jowlyzhang Differential Revision: D53432749 Pulled By: cbi42 fbshipit-source-id: eeadd19d05667230568668946d2ef1d5b2568268 --- java/rocksjni/rocksjni.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/java/rocksjni/rocksjni.cc b/java/rocksjni/rocksjni.cc index 97244dd5e..8b44a21dc 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/rocksjni/rocksjni.cc @@ -3055,17 +3055,20 @@ void Java_org_rocksdb_RocksDB_compactRange(JNIEnv* env, jobject, } ROCKSDB_NAMESPACE::Status s; - if (jbegin_len > 0 || jend_len > 0) { - const ROCKSDB_NAMESPACE::Slice begin(str_begin); - const ROCKSDB_NAMESPACE::Slice end(str_end); - s = db->CompactRange(*compact_range_opts, cf_handle, &begin, &end); - } else { - s = db->CompactRange(*compact_range_opts, cf_handle, nullptr, nullptr); - } + std::unique_ptr begin; + std::unique_ptr end; + if (jbegin_len > 0) { + begin.reset(new ROCKSDB_NAMESPACE::Slice(str_begin)); + } + if (jend_len > 0) { + end.reset(new ROCKSDB_NAMESPACE::Slice(str_end)); + } + s = db->CompactRange(*compact_range_opts, cf_handle, begin.get(), end.get()); if (jcompact_range_opts_handle == 0) { delete compact_range_opts; } + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); } From 5db6ad70c58c4eeaf55b83deaf3e1acf220d9c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Tue, 13 Feb 2024 10:48:31 -0800 Subject: [PATCH 340/386] add unit test for compactRangeWithNullBoundaries java api (#12333) Summary: The purpose of this PR is to supplement a set of unit tests for https://github.com/facebook/rocksdb/pull/12328 Pull Request resolved: https://github.com/facebook/rocksdb/pull/12333 Reviewed By: ltamasi Differential Revision: D53553830 Pulled By: cbi42 fbshipit-source-id: d21490f7ce7b30f42807ee37eda455ca6abdd072 --- .../test/java/org/rocksdb/RocksDBTest.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 625b8e089..d6b00ed6a 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1136,6 +1136,40 @@ public void compactRangeToLevelColumnFamily() } } + @Test + public void compactRangeWithNullBoundaries() throws RocksDBException { + try (final Options opt = new Options() + .setCreateIfMissing(true) + .setDisableAutoCompactions(true) + .setCompactionStyle(CompactionStyle.LEVEL) + .setNumLevels(4) + .setWriteBufferSize(100 << 10) + .setLevelZeroFileNumCompactionTrigger(3) + .setTargetFileSizeBase(200 << 10) + .setTargetFileSizeMultiplier(1) + .setMaxBytesForLevelBase(500 << 10) + .setMaxBytesForLevelMultiplier(1) + .setDisableAutoCompactions(true); + final FlushOptions flushOptions = new FlushOptions(); + final RocksDB db = RocksDB.open(opt, dbFolder.getRoot().getAbsolutePath())) { + final byte[] b = new byte[10000]; + // Create an SST containing key4, key5, and key6 + db.put(("key4").getBytes(), b); + db.put(("key5").getBytes(), b); + db.put(("key6").getBytes(), b); + db.flush(flushOptions); + // Create a new SST that includes the tombstones of all keys + db.delete(("key4").getBytes()); + db.delete(("key5").getBytes()); + db.delete(("key6").getBytes()); + db.flush(flushOptions); + + db.compactRange(("key4").getBytes(), null); + List liveFilesMetaData = db.getLiveFilesMetaData(); + assertThat(liveFilesMetaData.size()).isEqualTo(0); + } + } + @Test public void continueBackgroundWorkAfterCancelAllBackgroundWork() throws RocksDBException { final int KEY_SIZE = 20; From d606c9450bef7d2a22c794f406d7940d9d2f29a4 Mon Sep 17 00:00:00 2001 From: azagrebin Date: Wed, 6 Feb 2019 15:38:57 +0100 Subject: [PATCH 341/386] [FLINK-10471] Add Apache Flink specific compaction filter to evict expired state which has time-to-live --- CMakeLists.txt | 2 + Makefile | 3 + TARGETS | 5 + Vagrantfile | 2 +- java/CMakeLists.txt | 3 + java/Makefile | 1 + java/crossbuild/Vagrantfile | 2 +- java/rocksjni/flink_compactionfilterjni.cc | 239 ++++++++++++ .../org/rocksdb/FlinkCompactionFilter.java | 177 +++++++++ .../src/test/java/org/rocksdb/FilterTest.java | 2 +- .../rocksdb/FlinkCompactionFilterTest.java | 356 ++++++++++++++++++ src.mk | 3 + utilities/flink/flink_compaction_filter.cc | 206 ++++++++++ utilities/flink/flink_compaction_filter.h | 191 ++++++++++ .../flink/flink_compaction_filter_test.cc | 226 +++++++++++ 15 files changed, 1415 insertions(+), 3 deletions(-) create mode 100644 java/rocksjni/flink_compactionfilterjni.cc create mode 100644 java/src/main/java/org/rocksdb/FlinkCompactionFilter.java create mode 100644 java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java create mode 100644 utilities/flink/flink_compaction_filter.cc create mode 100644 utilities/flink/flink_compaction_filter.h create mode 100644 utilities/flink/flink_compaction_filter_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 23a4014bc..4c11b4fe4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -894,6 +894,7 @@ set(SOURCES utilities/fault_injection_env.cc utilities/fault_injection_fs.cc utilities/fault_injection_secondary_cache.cc + utilities/flink/flink_compaction_filter.cc utilities/leveldb_options/leveldb_options.cc utilities/memory/memory_util.cc utilities/merge_operators.cc @@ -1441,6 +1442,7 @@ if(WITH_TESTS) utilities/cassandra/cassandra_format_test.cc utilities/cassandra/cassandra_row_merge_test.cc utilities/cassandra/cassandra_serialize_test.cc + utilities/flink/flink_compaction_filter_test.cc utilities/checkpoint/checkpoint_test.cc utilities/env_timed_test.cc utilities/memory/memory_test.cc diff --git a/Makefile b/Makefile index 8829be9d8..09900875b 100644 --- a/Makefile +++ b/Makefile @@ -1414,6 +1414,9 @@ histogram_test: $(OBJ_DIR)/monitoring/histogram_test.o $(TEST_LIBRARY) $(LIBRARY thread_local_test: $(OBJ_DIR)/util/thread_local_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) +flink_compaction_filter_test: $(OBJ_DIR)/utilities/flink/flink_compaction_filter_test.o $(TEST_LIBRARY) $(LIBRARY) + $(AM_LINK) + work_queue_test: $(OBJ_DIR)/util/work_queue_test.o $(TEST_LIBRARY) $(LIBRARY) $(AM_LINK) diff --git a/TARGETS b/TARGETS index e8aaf325d..a28dd0ae1 100644 --- a/TARGETS +++ b/TARGETS @@ -292,6 +292,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "utilities/fault_injection_env.cc", "utilities/fault_injection_fs.cc", "utilities/fault_injection_secondary_cache.cc", + "utilities/flink/flink_compaction_filter.cc", "utilities/leveldb_options/leveldb_options.cc", "utilities/memory/memory_util.cc", "utilities/merge_operators.cc", @@ -5081,6 +5082,10 @@ cpp_unittest_wrapper(name="filename_test", deps=[":rocksdb_test_lib"], extra_compiler_flags=[]) +cpp_unittest_wrapper(name="flink_compaction_filter_test", + srcs=["utilities/flink/flink_compaction_filter_test.cc"], + deps=[":rocksdb_test_lib"], + extra_compiler_flags=[]) cpp_unittest_wrapper(name="flush_job_test", srcs=["db/flush_job_test.cc"], diff --git a/Vagrantfile b/Vagrantfile index 07f2e99fd..3dcedaf76 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -2,7 +2,7 @@ Vagrant.configure("2") do |config| config.vm.provider "virtualbox" do |v| - v.memory = 4096 + v.memory = 6096 v.cpus = 2 end diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index cf859ae95..f9fd9b564 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -41,6 +41,7 @@ set(JNI_NATIVE_SOURCES rocksjni/event_listener.cc rocksjni/event_listener_jnicallback.cc rocksjni/export_import_files_metadatajni.cc + rocksjni/flink_compactionfilterjni.cc rocksjni/filter.cc rocksjni/import_column_family_options.cc rocksjni/hyper_clock_cache.cc @@ -165,6 +166,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/Filter.java src/main/java/org/rocksdb/FilterPolicyType.java src/main/java/org/rocksdb/FileOperationInfo.java + src/main/java/org/rocksdb/FlinkCompactionFilter.java src/main/java/org/rocksdb/FlushJobInfo.java src/main/java/org/rocksdb/FlushReason.java src/main/java/org/rocksdb/FlushOptions.java @@ -685,6 +687,7 @@ if(${CMAKE_VERSION} VERSION_LESS "3.11.4") org.rocksdb.Env org.rocksdb.EnvOptions org.rocksdb.Filter + org.rocksdb.FlinkCompactionFilter org.rocksdb.FlushOptions org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig diff --git a/java/Makefile b/java/Makefile index a887a24b3..1a3dd3643 100644 --- a/java/Makefile +++ b/java/Makefile @@ -32,6 +32,7 @@ NATIVE_JAVA_CLASSES = \ org.rocksdb.DirectSlice\ org.rocksdb.Env\ org.rocksdb.EnvOptions\ + org.rocksdb.FlinkCompactionFilter\ org.rocksdb.FlushOptions\ org.rocksdb.Filter\ org.rocksdb.IngestExternalFileOptions\ diff --git a/java/crossbuild/Vagrantfile b/java/crossbuild/Vagrantfile index 0ee50de2c..a3035e683 100644 --- a/java/crossbuild/Vagrantfile +++ b/java/crossbuild/Vagrantfile @@ -33,7 +33,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| end config.vm.provider "virtualbox" do |v| - v.memory = 2048 + v.memory = 6048 v.cpus = 4 v.customize ["modifyvm", :id, "--nictype1", "virtio" ] end diff --git a/java/rocksjni/flink_compactionfilterjni.cc b/java/rocksjni/flink_compactionfilterjni.cc new file mode 100644 index 000000000..9f0954b43 --- /dev/null +++ b/java/rocksjni/flink_compactionfilterjni.cc @@ -0,0 +1,239 @@ +#include // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include +#include + +#include "include/org_rocksdb_FlinkCompactionFilter.h" +#include "loggerjnicallback.h" +#include "portal.h" +#include "rocksjni/jnicallback.h" +#include "utilities/flink/flink_compaction_filter.h" + +using namespace ROCKSDB_NAMESPACE::flink; + +class JniCallbackBase : public ROCKSDB_NAMESPACE::JniCallback { + public: + JniCallbackBase(JNIEnv* env, jobject jcallback_obj) + : JniCallback(env, jcallback_obj) {} + + protected: + inline void CheckAndRethrowException(JNIEnv* env) const { + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + env->Throw(env->ExceptionOccurred()); + } + } +}; + +// This list element filter operates on list state for which byte length of +// elements is unknown (variable), the list element serializer has to be used in +// this case to compute the offset of the next element. The filter wraps java +// object implenented in Flink. The java object holds element serializer and +// performs filtering. +class JavaListElementFilter + : public ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::ListElementFilter, + JniCallbackBase { + public: + JavaListElementFilter(JNIEnv* env, jobject jlist_filter) + : JniCallbackBase(env, jlist_filter) { + jclass jclazz = ROCKSDB_NAMESPACE::JavaClass::getJClass( + env, "org/rocksdb/FlinkCompactionFilter$ListElementFilter"); + if (jclazz == nullptr) { + // exception occurred accessing class + return; + } + m_jnext_unexpired_offset_methodid = + env->GetMethodID(jclazz, "nextUnexpiredOffset", "([BJJ)I"); + assert(m_jnext_unexpired_offset_methodid != nullptr); + } + + std::size_t NextUnexpiredOffset(const ROCKSDB_NAMESPACE::Slice& list, + int64_t ttl, + int64_t current_timestamp) const override { + jboolean attached_thread = JNI_FALSE; + JNIEnv* env = getJniEnv(&attached_thread); + jbyteArray jlist = ROCKSDB_NAMESPACE::JniUtil::copyBytes(env, list); + CheckAndRethrowException(env); + if (jlist == nullptr) { + return static_cast(-1); + } + auto jl_ttl = static_cast(ttl); + auto jl_current_timestamp = static_cast(current_timestamp); + jint next_offset = + env->CallIntMethod(m_jcallback_obj, m_jnext_unexpired_offset_methodid, + jlist, jl_ttl, jl_current_timestamp); + CheckAndRethrowException(env); + env->DeleteLocalRef(jlist); + releaseJniEnv(attached_thread); + return static_cast(next_offset); + }; + + private: + jmethodID m_jnext_unexpired_offset_methodid; +}; + +class JavaListElemenFilterFactory + : public ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter:: + ListElementFilterFactory, + JniCallbackBase { + public: + JavaListElemenFilterFactory(JNIEnv* env, jobject jlist_filter_factory) + : JniCallbackBase(env, jlist_filter_factory) { + jclass jclazz = ROCKSDB_NAMESPACE::JavaClass::getJClass( + env, "org/rocksdb/FlinkCompactionFilter$ListElementFilterFactory"); + if (jclazz == nullptr) { + // exception occurred accessing class + return; + } + m_jcreate_filter_methodid = env->GetMethodID( + jclazz, "createListElementFilter", + "()Lorg/rocksdb/FlinkCompactionFilter$ListElementFilter;"); + assert(m_jcreate_filter_methodid != nullptr); + } + + FlinkCompactionFilter::ListElementFilter* CreateListElementFilter( + std::shared_ptr /*logger*/) const override { + jboolean attached_thread = JNI_FALSE; + JNIEnv* env = getJniEnv(&attached_thread); + auto jlist_filter = + env->CallObjectMethod(m_jcallback_obj, m_jcreate_filter_methodid); + auto list_filter = new JavaListElementFilter(env, jlist_filter); + CheckAndRethrowException(env); + releaseJniEnv(attached_thread); + return list_filter; + }; + + private: + jmethodID m_jcreate_filter_methodid; +}; + +class JavaTimeProvider + : public ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::TimeProvider, + JniCallbackBase { + public: + JavaTimeProvider(JNIEnv* env, jobject jtime_provider) + : JniCallbackBase(env, jtime_provider) { + jclass jclazz = ROCKSDB_NAMESPACE::JavaClass::getJClass( + env, "org/rocksdb/FlinkCompactionFilter$TimeProvider"); + if (jclazz == nullptr) { + // exception occurred accessing class + return; + } + m_jcurrent_timestamp_methodid = + env->GetMethodID(jclazz, "currentTimestamp", "()J"); + assert(m_jcurrent_timestamp_methodid != nullptr); + } + + int64_t CurrentTimestamp() const override { + jboolean attached_thread = JNI_FALSE; + JNIEnv* env = getJniEnv(&attached_thread); + auto jtimestamp = + env->CallLongMethod(m_jcallback_obj, m_jcurrent_timestamp_methodid); + CheckAndRethrowException(env); + releaseJniEnv(attached_thread); + return static_cast(jtimestamp); + }; + + private: + jmethodID m_jcurrent_timestamp_methodid; +}; + +static FlinkCompactionFilter::ListElementFilterFactory* +createListElementFilterFactory(JNIEnv* env, jint ji_list_elem_len, + jobject jlist_filter_factory) { + FlinkCompactionFilter::ListElementFilterFactory* list_filter_factory = + nullptr; + if (ji_list_elem_len > 0) { + auto fixed_size = static_cast(ji_list_elem_len); + list_filter_factory = + new FlinkCompactionFilter::FixedListElementFilterFactory( + fixed_size, static_cast(0)); + } else if (jlist_filter_factory != nullptr) { + list_filter_factory = + new JavaListElemenFilterFactory(env, jlist_filter_factory); + } + return list_filter_factory; +} + +/*x + * Class: org_rocksdb_FlinkCompactionFilter + * Method: createNewFlinkCompactionFilterConfigHolder + * Signature: ()J + */ +jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfigHolder( + JNIEnv* /* env */, jclass /* jcls */) { + using namespace ROCKSDB_NAMESPACE::flink; + return reinterpret_cast( + new std::shared_ptr( + new FlinkCompactionFilter::ConfigHolder())); +} + +/* + * Class: org_rocksdb_FlinkCompactionFilter + * Method: disposeFlinkCompactionFilterConfigHolder + * Signature: (J)V + */ +void Java_org_rocksdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHolder( + JNIEnv* /* env */, jclass /* jcls */, jlong handle) { + using namespace ROCKSDB_NAMESPACE::flink; + auto* config_holder = + reinterpret_cast*>( + handle); + delete config_holder; +} + +/* + * Class: org_rocksdb_FlinkCompactionFilter + * Method: createNewFlinkCompactionFilter0 + * Signature: (JJJ)J + */ +jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( + JNIEnv* env, jclass /* jcls */, jlong config_holder_handle, + jobject jtime_provider, jlong logger_handle) { + using namespace ROCKSDB_NAMESPACE::flink; + auto config_holder = + *(reinterpret_cast*>( + config_holder_handle)); + auto time_provider = new JavaTimeProvider(env, jtime_provider); + auto logger = + logger_handle == 0 + ? nullptr + : *(reinterpret_cast< + std::shared_ptr*>( + logger_handle)); + return reinterpret_cast(new FlinkCompactionFilter( + config_holder, + std::unique_ptr(time_provider), + logger)); +} + +/* + * Class: org_rocksdb_FlinkCompactionFilter + * Method: configureFlinkCompactionFilter + * Signature: (JIIJJILorg/rocksdb/FlinkCompactionFilter$ListElementFilter;)Z + */ +jboolean Java_org_rocksdb_FlinkCompactionFilter_configureFlinkCompactionFilter( + JNIEnv* env, jclass /* jcls */, jlong handle, jint ji_state_type, + jint ji_timestamp_offset, jlong jl_ttl_milli, + jlong jquery_time_after_num_entries, jint ji_list_elem_len, + jobject jlist_filter_factory) { + auto state_type = + static_cast(ji_state_type); + auto timestamp_offset = static_cast(ji_timestamp_offset); + auto ttl = static_cast(jl_ttl_milli); + auto query_time_after_num_entries = + static_cast(jquery_time_after_num_entries); + auto config_holder = + *(reinterpret_cast*>( + handle)); + auto list_filter_factory = createListElementFilterFactory( + env, ji_list_elem_len, jlist_filter_factory); + auto config = new FlinkCompactionFilter::Config{ + state_type, timestamp_offset, ttl, query_time_after_num_entries, + std::unique_ptr( + list_filter_factory)}; + return static_cast(config_holder->Configure(config)); +} \ No newline at end of file diff --git a/java/src/main/java/org/rocksdb/FlinkCompactionFilter.java b/java/src/main/java/org/rocksdb/FlinkCompactionFilter.java new file mode 100644 index 000000000..ee575d5ba --- /dev/null +++ b/java/src/main/java/org/rocksdb/FlinkCompactionFilter.java @@ -0,0 +1,177 @@ +// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +package org.rocksdb; + +/** + * Just a Java wrapper around FlinkCompactionFilter implemented in C++. + * + * Note: this compaction filter is a special implementation, designed for usage only in Apache Flink + * project. + */ +public class FlinkCompactionFilter extends AbstractCompactionFilter { + public enum StateType { + // WARNING!!! Do not change the order of enum entries as it is important for jni translation + Disabled, + Value, + List + } + + public FlinkCompactionFilter(ConfigHolder configHolder, TimeProvider timeProvider) { + this(configHolder, timeProvider, null); + } + + public FlinkCompactionFilter( + ConfigHolder configHolder, TimeProvider timeProvider, Logger logger) { + super(createNewFlinkCompactionFilter0( + configHolder.nativeHandle_, timeProvider, logger == null ? 0 : logger.nativeHandle_)); + } + + private native static long createNewFlinkCompactionFilter0( + long configHolderHandle, TimeProvider timeProvider, long loggerHandle); + private native static long createNewFlinkCompactionFilterConfigHolder(); + private native static void disposeFlinkCompactionFilterConfigHolder(long configHolderHandle); + private native static boolean configureFlinkCompactionFilter(long configHolderHandle, + int stateType, int timestampOffset, long ttl, long queryTimeAfterNumEntries, + int fixedElementLength, ListElementFilterFactory listElementFilterFactory); + + public interface ListElementFilter { + /** + * Gets offset of the first unexpired element in the list. + * + *

    Native code wraps this java object and calls it for list state + * for which element byte length is unknown and Flink custom type serializer has to be used + * to compute offset of the next element in serialized form. + * + * @param list serialised list of elements with timestamp + * @param ttl time-to-live of the list elements + * @param currentTimestamp current timestamp to check expiration against + * @return offset of the first unexpired element in the list + */ + @SuppressWarnings("unused") + int nextUnexpiredOffset(byte[] list, long ttl, long currentTimestamp); + } + + public interface ListElementFilterFactory { + @SuppressWarnings("unused") ListElementFilter createListElementFilter(); + } + + public static class Config { + final StateType stateType; + final int timestampOffset; + final long ttl; + /** + * Number of state entries to process by compaction filter before updating current timestamp. + */ + final long queryTimeAfterNumEntries; + final int fixedElementLength; + final ListElementFilterFactory listElementFilterFactory; + + private Config(StateType stateType, int timestampOffset, long ttl, + long queryTimeAfterNumEntries, int fixedElementLength, + ListElementFilterFactory listElementFilterFactory) { + this.stateType = stateType; + this.timestampOffset = timestampOffset; + this.ttl = ttl; + this.queryTimeAfterNumEntries = queryTimeAfterNumEntries; + this.fixedElementLength = fixedElementLength; + this.listElementFilterFactory = listElementFilterFactory; + } + + @SuppressWarnings("WeakerAccess") + public static Config createNotList( + StateType stateType, int timestampOffset, long ttl, long queryTimeAfterNumEntries) { + return new Config(stateType, timestampOffset, ttl, queryTimeAfterNumEntries, -1, null); + } + + @SuppressWarnings("unused") + public static Config createForValue(long ttl, long queryTimeAfterNumEntries) { + return createNotList(StateType.Value, 0, ttl, queryTimeAfterNumEntries); + } + + @SuppressWarnings("unused") + public static Config createForMap(long ttl, long queryTimeAfterNumEntries) { + return createNotList(StateType.Value, 1, ttl, queryTimeAfterNumEntries); + } + + @SuppressWarnings("WeakerAccess") + public static Config createForFixedElementList( + long ttl, long queryTimeAfterNumEntries, int fixedElementLength) { + return new Config(StateType.List, 0, ttl, queryTimeAfterNumEntries, fixedElementLength, null); + } + + @SuppressWarnings("WeakerAccess") + public static Config createForList(long ttl, long queryTimeAfterNumEntries, + ListElementFilterFactory listElementFilterFactory) { + return new Config( + StateType.List, 0, ttl, queryTimeAfterNumEntries, -1, listElementFilterFactory); + } + } + + private static class ConfigHolder extends RocksObject { + ConfigHolder() { + super(createNewFlinkCompactionFilterConfigHolder()); + } + + @Override + protected void disposeInternal(long handle) { + disposeFlinkCompactionFilterConfigHolder(handle); + } + } + + /** Provides current timestamp to check expiration, it must be thread safe. */ + public interface TimeProvider { + long currentTimestamp(); + } + + public static class FlinkCompactionFilterFactory + extends AbstractCompactionFilterFactory { + private final ConfigHolder configHolder; + private final TimeProvider timeProvider; + private final Logger logger; + + @SuppressWarnings("unused") + public FlinkCompactionFilterFactory(TimeProvider timeProvider) { + this(timeProvider, null); + } + + @SuppressWarnings("WeakerAccess") + public FlinkCompactionFilterFactory(TimeProvider timeProvider, Logger logger) { + this.configHolder = new ConfigHolder(); + this.timeProvider = timeProvider; + this.logger = logger; + } + + @Override + public void close() { + super.close(); + configHolder.close(); + if (logger != null) { + logger.close(); + } + } + + @Override + public FlinkCompactionFilter createCompactionFilter(Context context) { + return new FlinkCompactionFilter(configHolder, timeProvider, logger); + } + + @Override + public String name() { + return "FlinkCompactionFilterFactory"; + } + + @SuppressWarnings("WeakerAccess") + public void configure(Config config) { + boolean already_configured = + !configureFlinkCompactionFilter(configHolder.nativeHandle_, config.stateType.ordinal(), + config.timestampOffset, config.ttl, config.queryTimeAfterNumEntries, + config.fixedElementLength, config.listElementFilterFactory); + if (already_configured) { + throw new IllegalStateException("Compaction filter is already configured"); + } + } + } +} diff --git a/java/src/test/java/org/rocksdb/FilterTest.java b/java/src/test/java/org/rocksdb/FilterTest.java index dc5c19fbc..e308ffefb 100644 --- a/java/src/test/java/org/rocksdb/FilterTest.java +++ b/java/src/test/java/org/rocksdb/FilterTest.java @@ -16,7 +16,7 @@ public class FilterTest { @Test public void filter() { - // new Bloom filter + // new Bloom filterFactory final BlockBasedTableConfig blockConfig = new BlockBasedTableConfig(); try(final Options options = new Options()) { diff --git a/java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java b/java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java new file mode 100644 index 000000000..40320e9d5 --- /dev/null +++ b/java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java @@ -0,0 +1,356 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.rocksdb; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.rocksdb.FlinkCompactionFilter.StateType; +import org.rocksdb.FlinkCompactionFilter.TimeProvider; + +public class FlinkCompactionFilterTest { + private static final int LONG_LENGTH = 8; + private static final int INT_LENGTH = 4; + private static final String MERGE_OPERATOR_NAME = "stringappendtest"; + private static final byte DELIMITER = ','; + private static final long TTL = 100; + private static final long QUERY_TIME_AFTER_NUM_ENTRIES = 100; + private static final int TEST_TIMESTAMP_OFFSET = 2; + private static final Random rnd = new Random(); + + private TestTimeProvider timeProvider; + private List stateContexts; + private List cfDescs; + private List cfHandles; + + @Rule public TemporaryFolder dbFolder = new TemporaryFolder(); + + @Before + public void init() { + timeProvider = new TestTimeProvider(); + timeProvider.time = rnd.nextLong(); + stateContexts = + Arrays.asList(new StateContext(StateType.Value, timeProvider, TEST_TIMESTAMP_OFFSET), + new FixedElementListStateContext(timeProvider), + new NonFixedElementListStateContext(timeProvider)); + cfDescs = new ArrayList<>(); + cfHandles = new ArrayList<>(); + cfDescs.add(new ColumnFamilyDescriptor(RocksDB.DEFAULT_COLUMN_FAMILY)); + for (StateContext stateContext : stateContexts) { + cfDescs.add(stateContext.getCfDesc()); + } + } + + @After + public void cleanup() { + for (StateContext stateContext : stateContexts) { + stateContext.cfDesc.getOptions().close(); + stateContext.filterFactory.close(); + } + } + + @Test + public void checkStateTypeEnumOrder() { + // if the order changes it also needs to be adjusted + // in utilities/flink/flink_compaction_filter.h + // and in utilities/flink/flink_compaction_filter_test.cc + assertThat(StateType.Disabled.ordinal()).isEqualTo(0); + assertThat(StateType.Value.ordinal()).isEqualTo(1); + assertThat(StateType.List.ordinal()).isEqualTo(2); + } + + @Test + public void testCompactionFilter() throws RocksDBException { + try (DBOptions options = createDbOptions(); RocksDB rocksDb = setupDb(options)) { + try { + for (StateContext stateContext : stateContexts) { + stateContext.updateValueWithTimestamp(rocksDb); + stateContext.checkUnexpired(rocksDb); + rocksDb.compactRange(stateContext.columnFamilyHandle); + stateContext.checkUnexpired(rocksDb); + } + + timeProvider.time += TTL + TTL / 2; // expire state + + for (StateContext stateContext : stateContexts) { + stateContext.checkUnexpired(rocksDb); + rocksDb.compactRange(stateContext.columnFamilyHandle); + stateContext.checkExpired(rocksDb); + rocksDb.compactRange(stateContext.columnFamilyHandle); + } + } finally { + for (ColumnFamilyHandle cfHandle : cfHandles) { + cfHandle.close(); + } + } + } + } + + private static DBOptions createDbOptions() { + return new DBOptions().setCreateIfMissing(true).setCreateMissingColumnFamilies(true); + } + + private RocksDB setupDb(DBOptions options) throws RocksDBException { + RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath(), cfDescs, cfHandles); + for (int i = 0; i < stateContexts.size(); i++) { + stateContexts.get(i).columnFamilyHandle = cfHandles.get(i + 1); + } + return db; + } + + private static class StateContext { + private final String cf; + final String key; + final ColumnFamilyDescriptor cfDesc; + final String userValue; + final long currentTime; + final FlinkCompactionFilter.FlinkCompactionFilterFactory filterFactory; + + ColumnFamilyHandle columnFamilyHandle; + + private StateContext(StateType type, TimeProvider timeProvider, int timestampOffset) { + this.currentTime = timeProvider.currentTimestamp(); + userValue = type.name() + "StateValue"; + cf = getClass().getSimpleName() + "StateCf"; + key = type.name() + "StateKey"; + filterFactory = + new FlinkCompactionFilter.FlinkCompactionFilterFactory(timeProvider, createLogger()); + filterFactory.configure(createConfig(type, timestampOffset)); + cfDesc = new ColumnFamilyDescriptor(getASCII(cf), getOptionsWithFilter(filterFactory)); + } + + private Logger createLogger() { + try (DBOptions opts = new DBOptions().setInfoLogLevel(InfoLogLevel.DEBUG_LEVEL)) { + return new Logger(opts) { + @Override + protected void log(InfoLogLevel infoLogLevel, String logMsg) { + System.out.println(infoLogLevel + ": " + logMsg); + } + }; + } + } + + FlinkCompactionFilter.Config createConfig(StateType type, int timestampOffset) { + return FlinkCompactionFilter.Config.createNotList( + type, timestampOffset, TTL, QUERY_TIME_AFTER_NUM_ENTRIES); + } + + private static ColumnFamilyOptions getOptionsWithFilter( + FlinkCompactionFilter.FlinkCompactionFilterFactory filterFactory) { + return new ColumnFamilyOptions() + .setCompactionFilterFactory(filterFactory) + .setMergeOperatorName(MERGE_OPERATOR_NAME); + } + + public String getKey() { + return key; + } + + ColumnFamilyDescriptor getCfDesc() { + return cfDesc; + } + + byte[] getValueWithTimestamp(RocksDB db) throws RocksDBException { + return db.get(columnFamilyHandle, getASCII(key)); + } + + void updateValueWithTimestamp(RocksDB db) throws RocksDBException { + db.put(columnFamilyHandle, getASCII(key), valueWithTimestamp()); + } + + byte[] valueWithTimestamp() { + return valueWithTimestamp(TEST_TIMESTAMP_OFFSET); + } + + byte[] valueWithTimestamp(@SuppressWarnings("SameParameterValue") int offset) { + return valueWithTimestamp(offset, currentTime); + } + + byte[] valueWithTimestamp(int offset, long timestamp) { + ByteBuffer buffer = getByteBuffer(offset); + buffer.put(new byte[offset]); + appendValueWithTimestamp(buffer, userValue, timestamp); + return buffer.array(); + } + + void appendValueWithTimestamp(ByteBuffer buffer, String value, long timestamp) { + buffer.putLong(timestamp); + buffer.putInt(value.length()); + buffer.put(getASCII(value)); + } + + ByteBuffer getByteBuffer(int offset) { + int length = offset + LONG_LENGTH + INT_LENGTH + userValue.length(); + return ByteBuffer.allocate(length); + } + + byte[] unexpiredValue() { + return valueWithTimestamp(); + } + + byte[] expiredValue() { + return null; + } + + void checkUnexpired(RocksDB db) throws RocksDBException { + assertThat(getValueWithTimestamp(db)).isEqualTo(unexpiredValue()); + } + + void checkExpired(RocksDB db) throws RocksDBException { + assertThat(getValueWithTimestamp(db)).isEqualTo(expiredValue()); + } + } + + private static class FixedElementListStateContext extends StateContext { + private FixedElementListStateContext(TimeProvider timeProvider) { + super(StateType.List, timeProvider, 0); + } + + @Override + FlinkCompactionFilter.Config createConfig(StateType type, int timestampOffset) { + // return FlinkCompactionFilter.Config.createForList(TTL, QUERY_TIME_AFTER_NUM_ENTRIES, + // ELEM_FILTER_FACTORY); + return FlinkCompactionFilter.Config.createForFixedElementList( + TTL, QUERY_TIME_AFTER_NUM_ENTRIES, 13 + userValue.getBytes().length); + } + + @Override + void updateValueWithTimestamp(RocksDB db) throws RocksDBException { + db.merge(columnFamilyHandle, getASCII(key), listExpired(3)); + db.merge(columnFamilyHandle, getASCII(key), mixedList(2, 3)); + db.merge(columnFamilyHandle, getASCII(key), listUnexpired(4)); + } + + @Override + byte[] unexpiredValue() { + return mixedList(5, 7); + } + + byte[] mergeBytes(byte[]... bytes) { + int length = 0; + for (byte[] a : bytes) { + length += a.length; + } + ByteBuffer buffer = ByteBuffer.allocate(length); + for (byte[] a : bytes) { + buffer.put(a); + } + return buffer.array(); + } + + @Override + byte[] expiredValue() { + return listUnexpired(7); + } + + private byte[] mixedList(int numberOfExpiredElements, int numberOfUnexpiredElements) { + assert numberOfExpiredElements > 0; + assert numberOfUnexpiredElements > 0; + return mergeBytes(listExpired(numberOfExpiredElements), new byte[] {DELIMITER}, + listUnexpired(numberOfUnexpiredElements)); + } + + private byte[] listExpired(int numberOfElements) { + return list(numberOfElements, currentTime); + } + + private byte[] listUnexpired(int numberOfElements) { + return list(numberOfElements, currentTime + TTL); + } + + private byte[] list(int numberOfElements, long timestamp) { + ByteBuffer buffer = getByteBufferForList(numberOfElements); + for (int i = 0; i < numberOfElements; i++) { + appendValueWithTimestamp(buffer, userValue, timestamp); + if (i < numberOfElements - 1) { + buffer.put(DELIMITER); + } + } + return buffer.array(); + } + + private ByteBuffer getByteBufferForList(int numberOfElements) { + int length = ((LONG_LENGTH + INT_LENGTH + userValue.length() + 1) * numberOfElements) - 1; + return ByteBuffer.allocate(length); + } + } + + private static class NonFixedElementListStateContext extends FixedElementListStateContext { + private static FlinkCompactionFilter.ListElementFilterFactory ELEM_FILTER_FACTORY = + new ListElementFilterFactory(); + + private NonFixedElementListStateContext(TimeProvider timeProvider) { + super(timeProvider); + } + + @Override + FlinkCompactionFilter.Config createConfig(StateType type, int timestampOffset) { + // return FlinkCompactionFilter.Config.createForList(TTL, QUERY_TIME_AFTER_NUM_ENTRIES, + // ELEM_FILTER_FACTORY); + return FlinkCompactionFilter.Config.createForList( + TTL, QUERY_TIME_AFTER_NUM_ENTRIES, ELEM_FILTER_FACTORY); + } + + private static class ListElementFilterFactory + implements FlinkCompactionFilter.ListElementFilterFactory { + @Override + public FlinkCompactionFilter.ListElementFilter createListElementFilter() { + return new FlinkCompactionFilter.ListElementFilter() { + @Override + public int nextUnexpiredOffset(byte[] list, long ttl, long currentTimestamp) { + int currentOffset = 0; + while (currentOffset < list.length) { + ByteBuffer bf = ByteBuffer.wrap(list, currentOffset, list.length - currentOffset); + long timestamp = bf.getLong(); + if (timestamp + ttl > currentTimestamp) { + break; + } + int elemLen = bf.getInt(8); + currentOffset += 13 + elemLen; + } + return currentOffset; + } + }; + } + } + } + + private static byte[] getASCII(String str) { + return str.getBytes(StandardCharsets.US_ASCII); + } + + private static class TestTimeProvider implements TimeProvider { + private long time; + + @Override + public long currentTimestamp() { + return time; + } + } +} \ No newline at end of file diff --git a/src.mk b/src.mk index a03a476ff..2e5253176 100644 --- a/src.mk +++ b/src.mk @@ -280,6 +280,7 @@ LIB_SOURCES = \ utilities/fault_injection_env.cc \ utilities/fault_injection_fs.cc \ utilities/fault_injection_secondary_cache.cc \ + utilities/flink/flink_compaction_filter.cc \ utilities/leveldb_options/leveldb_options.cc \ utilities/memory/memory_util.cc \ utilities/merge_operators.cc \ @@ -612,6 +613,7 @@ TEST_MAIN_SOURCES = \ utilities/cassandra/cassandra_serialize_test.cc \ utilities/checkpoint/checkpoint_test.cc \ utilities/env_timed_test.cc \ + utilities/flink/flink_compaction_filter_test.cc \ utilities/memory/memory_test.cc \ utilities/merge_operators/string_append/stringappend_test.cc \ utilities/object_registry_test.cc \ @@ -668,6 +670,7 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/event_listener.cc \ java/rocksjni/event_listener_jnicallback.cc \ java/rocksjni/import_column_family_options.cc \ + java/rocksjni/flink_compactionfilterjni.cc \ java/rocksjni/ingest_external_file_options.cc \ java/rocksjni/filter.cc \ java/rocksjni/hyper_clock_cache.cc \ diff --git a/utilities/flink/flink_compaction_filter.cc b/utilities/flink/flink_compaction_filter.cc new file mode 100644 index 000000000..4cbdd7e7d --- /dev/null +++ b/utilities/flink/flink_compaction_filter.cc @@ -0,0 +1,206 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "utilities/flink/flink_compaction_filter.h" + +#include +#include + +namespace ROCKSDB_NAMESPACE { +namespace flink { + +int64_t DeserializeTimestamp(const char* src, std::size_t offset) { + uint64_t result = 0; + for (unsigned long i = 0; i < sizeof(uint64_t); i++) { + result |= static_cast(static_cast(src[offset + i])) + << ((sizeof(int64_t) - 1 - i) * BITS_PER_BYTE); + } + return static_cast(result); +} + +CompactionFilter::Decision Decide(const char* ts_bytes, const int64_t ttl, + const std::size_t timestamp_offset, + const int64_t current_timestamp, + const std::shared_ptr& logger) { + int64_t timestamp = DeserializeTimestamp(ts_bytes, timestamp_offset); + const int64_t ttlWithoutOverflow = + timestamp > 0 ? std::min(JAVA_MAX_LONG - timestamp, ttl) : ttl; + Debug(logger.get(), + "Last access timestamp: %" PRId64 " ms, ttlWithoutOverflow: %" PRId64 + " ms, Current timestamp: %" PRId64 " ms", + timestamp, ttlWithoutOverflow, current_timestamp); + return timestamp + ttlWithoutOverflow <= current_timestamp + ? CompactionFilter::Decision::kRemove + : CompactionFilter::Decision::kKeep; +} + +FlinkCompactionFilter::ConfigHolder::ConfigHolder() + : config_(const_cast(&DISABLED_CONFIG)){}; + +FlinkCompactionFilter::ConfigHolder::~ConfigHolder() { + Config* config = config_.load(); + if (config != &DISABLED_CONFIG) { + delete config; + } +} + +// at the moment Flink configures filters (can be already created) only once +// when user creates state otherwise it can lead to ListElementFilter leak in +// Config or race between its delete in Configure() and usage in FilterV2() the +// method returns true if it was configured before +bool FlinkCompactionFilter::ConfigHolder::Configure(Config* config) { + bool not_configured = GetConfig() == &DISABLED_CONFIG; + if (not_configured) { + assert(config->query_time_after_num_entries_ >= 0); + config_ = config; + } + return not_configured; +} + +FlinkCompactionFilter::Config* +FlinkCompactionFilter::ConfigHolder::GetConfig() { + return config_.load(); +} + +std::size_t FlinkCompactionFilter::FixedListElementFilter::NextUnexpiredOffset( + const Slice& list, int64_t ttl, int64_t current_timestamp) const { + std::size_t offset = 0; + while (offset < list.size()) { + Decision decision = Decide(list.data(), ttl, offset + timestamp_offset_, + current_timestamp, logger_); + if (decision != Decision::kKeep) { + std::size_t new_offset = offset + fixed_size_; + if (new_offset >= JAVA_MAX_SIZE || new_offset < offset) { + return JAVA_MAX_SIZE; + } + offset = new_offset; + } else { + break; + } + } + return offset; +} + +const char* FlinkCompactionFilter::Name() const { + return "FlinkCompactionFilter"; +} + +FlinkCompactionFilter::FlinkCompactionFilter( + std::shared_ptr config_holder, + std::unique_ptr time_provider) + : FlinkCompactionFilter(std::move(config_holder), std::move(time_provider), + nullptr){}; + +FlinkCompactionFilter::FlinkCompactionFilter( + std::shared_ptr config_holder, + std::unique_ptr time_provider, std::shared_ptr logger) + : config_holder_(std::move(config_holder)), + time_provider_(std::move(time_provider)), + logger_(std::move(logger)), + config_cached_(const_cast(&DISABLED_CONFIG)){}; + +inline void FlinkCompactionFilter::InitConfigIfNotYet() const { + const_cast(this)->config_cached_ = + config_cached_ == &DISABLED_CONFIG ? config_holder_->GetConfig() + : config_cached_; +} + +CompactionFilter::Decision FlinkCompactionFilter::FilterV2( + int /*level*/, const Slice& key, ValueType value_type, + const Slice& existing_value, std::string* new_value, + std::string* /*skip_until*/) const { + InitConfigIfNotYet(); + CreateListElementFilterIfNull(); + UpdateCurrentTimestampIfStale(); + + const char* data = existing_value.data(); + + Debug(logger_.get(), + "Call FlinkCompactionFilter::FilterV2 - Key: %s, Data: %s, Value type: " + "%d, " + "State type: %d, TTL: %" PRId64 " ms, timestamp_offset: %zu", + key.ToString().c_str(), existing_value.ToString(true).c_str(), + value_type, config_cached_->state_type_, config_cached_->ttl_, + config_cached_->timestamp_offset_); + + // too short value to have timestamp at all + const bool tooShortValue = + existing_value.size() < + config_cached_->timestamp_offset_ + TIMESTAMP_BYTE_SIZE; + + const StateType state_type = config_cached_->state_type_; + const bool value_or_merge = + value_type == ValueType::kValue || value_type == ValueType::kMergeOperand; + const bool value_state = + state_type == StateType::Value && value_type == ValueType::kValue; + const bool list_entry = state_type == StateType::List && value_or_merge; + const bool toDecide = value_state || list_entry; + const bool list_filter = list_entry && list_element_filter_; + + Decision decision = Decision::kKeep; + if (!tooShortValue && toDecide) { + decision = list_filter ? ListDecide(existing_value, new_value) + : Decide(data, config_cached_->ttl_, + config_cached_->timestamp_offset_, + current_timestamp_, logger_); + } + Debug(logger_.get(), "Decision: %d", static_cast(decision)); + return decision; +} + +CompactionFilter::Decision FlinkCompactionFilter::ListDecide( + const Slice& existing_value, std::string* new_value) const { + std::size_t offset = 0; + if (offset < existing_value.size()) { + Decision decision = Decide(existing_value.data(), config_cached_->ttl_, + offset + config_cached_->timestamp_offset_, + current_timestamp_, logger_); + if (decision != Decision::kKeep) { + offset = + ListNextUnexpiredOffset(existing_value, offset, config_cached_->ttl_); + if (offset >= JAVA_MAX_SIZE) { + return Decision::kKeep; + } + } + } + if (offset >= existing_value.size()) { + return Decision::kRemove; + } else if (offset > 0) { + SetUnexpiredListValue(existing_value, offset, new_value); + return Decision::kChangeValue; + } + return Decision::kKeep; +} + +std::size_t FlinkCompactionFilter::ListNextUnexpiredOffset( + const Slice& existing_value, size_t offset, int64_t ttl) const { + std::size_t new_offset = list_element_filter_->NextUnexpiredOffset( + existing_value, ttl, current_timestamp_); + if (new_offset >= JAVA_MAX_SIZE || new_offset < offset) { + Error(logger_.get(), "Wrong next offset in list filter: %zu -> %zu", offset, + new_offset); + new_offset = JAVA_MAX_SIZE; + } else { + Debug(logger_.get(), "Next unexpired offset: %zu -> %zu", offset, + new_offset); + } + return new_offset; +} + +void FlinkCompactionFilter::SetUnexpiredListValue( + const Slice& existing_value, std::size_t offset, + std::string* new_value) const { + new_value->clear(); + auto new_value_char = existing_value.data() + offset; + auto new_value_size = existing_value.size() - offset; + new_value->assign(new_value_char, new_value_size); + Logger* logger = logger_.get(); + if (logger && logger->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) { + Slice new_value_slice = Slice(new_value_char, new_value_size); + Debug(logger, "New list value: %s", new_value_slice.ToString(true).c_str()); + } +} +} // namespace flink +} // namespace ROCKSDB_NAMESPACE diff --git a/utilities/flink/flink_compaction_filter.h b/utilities/flink/flink_compaction_filter.h new file mode 100644 index 000000000..3b3b651ea --- /dev/null +++ b/utilities/flink/flink_compaction_filter.h @@ -0,0 +1,191 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once +#include + +#include +#include +#include +#include +#include + +#include "rocksdb/compaction_filter.h" +#include "rocksdb/slice.h" + +namespace ROCKSDB_NAMESPACE { +namespace flink { + +static const std::size_t BITS_PER_BYTE = static_cast(8); +static const std::size_t TIMESTAMP_BYTE_SIZE = static_cast(8); +static const int64_t JAVA_MIN_LONG = static_cast(0x8000000000000000); +static const int64_t JAVA_MAX_LONG = static_cast(0x7fffffffffffffff); +static const std::size_t JAVA_MAX_SIZE = static_cast(0x7fffffff); + +/** + * Compaction filter for removing expired Flink state entries with ttl. + * + * Note: this compaction filter is a special implementation, designed for usage + * only in Apache Flink project. + */ +class FlinkCompactionFilter : public CompactionFilter { + public: + enum StateType { + // WARNING!!! Do not change the order of enum entries as it is important for + // jni translation + Disabled, + Value, + List + }; + + // Provides current timestamp to check expiration, it must thread safe. + class TimeProvider { + public: + virtual ~TimeProvider() = default; + virtual int64_t CurrentTimestamp() const = 0; + }; + + // accepts serialized list state and checks elements for expiration starting + // from the head stops upon discovery of unexpired element and returns its + // offset or returns offset greater or equal to list byte length. + class ListElementFilter { + public: + virtual ~ListElementFilter() = default; + virtual std::size_t NextUnexpiredOffset( + const Slice& list, int64_t ttl, int64_t current_timestamp) const = 0; + }; + + // this filter can operate directly on list state bytes + // because the byte length of list element and last acess timestamp position + // are known. + class FixedListElementFilter : public ListElementFilter { + public: + explicit FixedListElementFilter(std::size_t fixed_size, + std::size_t timestamp_offset, + std::shared_ptr logger) + : fixed_size_(fixed_size), + timestamp_offset_(timestamp_offset), + logger_(std::move(logger)) {} + std::size_t NextUnexpiredOffset(const Slice& list, int64_t ttl, + int64_t current_timestamp) const override; + + private: + std::size_t fixed_size_; + std::size_t timestamp_offset_; + std::shared_ptr logger_; + }; + + // Factory is needed to create one filter per filter/thread + // and avoid concurrent access to the filter state + class ListElementFilterFactory { + public: + virtual ~ListElementFilterFactory() = default; + virtual ListElementFilter* CreateListElementFilter( + std::shared_ptr logger) const = 0; + }; + + class FixedListElementFilterFactory : public ListElementFilterFactory { + public: + explicit FixedListElementFilterFactory(std::size_t fixed_size, + std::size_t timestamp_offset) + : fixed_size_(fixed_size), timestamp_offset_(timestamp_offset) {} + FixedListElementFilter* CreateListElementFilter( + std::shared_ptr logger) const override { + return new FixedListElementFilter(fixed_size_, timestamp_offset_, logger); + }; + + private: + std::size_t fixed_size_; + std::size_t timestamp_offset_; + }; + + struct Config { + StateType state_type_; + std::size_t timestamp_offset_; + int64_t ttl_; + // Number of state entries to process by compaction filter before updating + // current timestamp. + int64_t query_time_after_num_entries_; + std::unique_ptr list_element_filter_factory_; + }; + + // Allows to configure at once all FlinkCompactionFilters created by the + // factory. The ConfigHolder holds the shared Config. + class ConfigHolder { + public: + explicit ConfigHolder(); + ~ConfigHolder(); + bool Configure(Config* config); + Config* GetConfig(); + + private: + std::atomic config_; + }; + + explicit FlinkCompactionFilter(std::shared_ptr config_holder, + std::unique_ptr time_provider); + + explicit FlinkCompactionFilter(std::shared_ptr config_holder, + std::unique_ptr time_provider, + std::shared_ptr logger); + + const char* Name() const override; + Decision FilterV2(int level, const Slice& key, ValueType value_type, + const Slice& existing_value, std::string* new_value, + std::string* skip_until) const override; + + bool IgnoreSnapshots() const override { return true; } + + private: + inline void InitConfigIfNotYet() const; + + Decision ListDecide(const Slice& existing_value, + std::string* new_value) const; + + inline std::size_t ListNextUnexpiredOffset(const Slice& existing_value, + std::size_t offset, + int64_t ttl) const; + + inline void SetUnexpiredListValue(const Slice& existing_value, + std::size_t offset, + std::string* new_value) const; + + inline void CreateListElementFilterIfNull() const { + if (!list_element_filter_ && config_cached_->list_element_filter_factory_) { + const_cast(this)->list_element_filter_ = + std::unique_ptr( + config_cached_->list_element_filter_factory_ + ->CreateListElementFilter(logger_)); + } + } + + inline void UpdateCurrentTimestampIfStale() const { + bool is_stale = + record_counter_ >= config_cached_->query_time_after_num_entries_; + if (is_stale) { + const_cast(this)->record_counter_ = 0; + const_cast(this)->current_timestamp_ = + time_provider_->CurrentTimestamp(); + } + const_cast(this)->record_counter_ = + record_counter_ + 1; + } + + std::shared_ptr config_holder_; + std::unique_ptr time_provider_; + std::shared_ptr logger_; + Config* config_cached_; + std::unique_ptr list_element_filter_; + int64_t current_timestamp_ = std::numeric_limits::max(); + int64_t record_counter_ = std::numeric_limits::max(); +}; + +static const FlinkCompactionFilter::Config DISABLED_CONFIG = + FlinkCompactionFilter::Config{FlinkCompactionFilter::StateType::Disabled, 0, + std::numeric_limits::max(), + std::numeric_limits::max(), nullptr}; + +} // namespace flink +} // namespace ROCKSDB_NAMESPACE diff --git a/utilities/flink/flink_compaction_filter_test.cc b/utilities/flink/flink_compaction_filter_test.cc new file mode 100644 index 000000000..26613ae68 --- /dev/null +++ b/utilities/flink/flink_compaction_filter_test.cc @@ -0,0 +1,226 @@ +// Copyright (c) 2017-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "utilities/flink/flink_compaction_filter.h" + +#include + +#include "test_util/testharness.h" + +namespace ROCKSDB_NAMESPACE { +namespace flink { + +#define DISABLED FlinkCompactionFilter::StateType::Disabled +#define VALUE FlinkCompactionFilter::StateType::Value +#define LIST FlinkCompactionFilter::StateType::List + +#define KVALUE CompactionFilter::ValueType::kValue +#define KMERGE CompactionFilter::ValueType::kMergeOperand +#define KBLOB CompactionFilter::ValueType::kBlobIndex + +#define KKEEP CompactionFilter::Decision::kKeep +#define KREMOVE CompactionFilter::Decision::kRemove +#define KCHANGE CompactionFilter::Decision::kChangeValue + +#define EXPIRE (time += ttl + 20) + +#define EXPECT_ARR_EQ(arr1, arr2, num) \ + EXPECT_TRUE(0 == memcmp(arr1, arr2, num)); + +static const std::size_t TEST_TIMESTAMP_OFFSET = static_cast(2); + +static const std::size_t LIST_ELEM_FIXED_LEN = static_cast(8 + 4); + +static const int64_t QUERY_TIME_AFTER_NUM_ENTRIES = static_cast(10); + +class ConsoleLogger : public Logger { + public: + using Logger::Logv; + ConsoleLogger() : Logger(InfoLogLevel::DEBUG_LEVEL) {} + + void Logv(const char* format, va_list ap) override { + vprintf(format, ap); + printf("\n"); + } +}; + +int64_t time = 0; + +class TestTimeProvider : public FlinkCompactionFilter::TimeProvider { + public: + int64_t CurrentTimestamp() const override { return time; } +}; + +std::random_device rd; // NOLINT +std::mt19937 mt(rd()); // NOLINT +std::uniform_int_distribution rnd(JAVA_MIN_LONG, + JAVA_MAX_LONG); // NOLINT + +int64_t ttl = 100; + +Slice key = Slice("key"); // NOLINT +char data[24]; +std::string new_list = ""; // NOLINT +std::string stub = ""; // NOLINT + +FlinkCompactionFilter::StateType state_type; +CompactionFilter::ValueType value_type; +FlinkCompactionFilter* filter; // NOLINT + +void SetTimestamp(int64_t timestamp, size_t offset = 0, char* value = data) { + for (unsigned long i = 0; i < sizeof(uint64_t); i++) { + value[offset + i] = + static_cast(static_cast(timestamp) >> + ((sizeof(int64_t) - 1 - i) * BITS_PER_BYTE)); + } +} + +CompactionFilter::Decision decide(size_t data_size = sizeof(data)) { + return filter->FilterV2(0, key, value_type, Slice(data, data_size), &new_list, + &stub); +} + +void Init( + FlinkCompactionFilter::StateType stype, CompactionFilter::ValueType vtype, + FlinkCompactionFilter::ListElementFilterFactory* fixed_len_filter_factory, + size_t timestamp_offset, bool expired = false) { + time = expired ? time + ttl + 20 : time; + state_type = stype; + value_type = vtype; + + auto config_holder = std::make_shared(); + auto time_provider = new TestTimeProvider(); + auto logger = std::make_shared(); + + filter = new FlinkCompactionFilter( + config_holder, + std::unique_ptr(time_provider), + logger); + auto config = new FlinkCompactionFilter::Config{ + state_type, timestamp_offset, ttl, QUERY_TIME_AFTER_NUM_ENTRIES, + std::unique_ptr( + fixed_len_filter_factory)}; + EXPECT_EQ(decide(), KKEEP); // test disabled config + EXPECT_TRUE(config_holder->Configure(config)); + EXPECT_FALSE(config_holder->Configure(config)); +} + +void InitValue(FlinkCompactionFilter::StateType stype, + CompactionFilter::ValueType vtype, bool expired = false, + size_t timestamp_offset = TEST_TIMESTAMP_OFFSET) { + time = rnd(mt); + SetTimestamp(time, timestamp_offset); + Init(stype, vtype, nullptr, timestamp_offset, expired); +} + +void InitList(CompactionFilter::ValueType vtype, bool all_expired = false, + bool first_elem_expired = false, size_t timestamp_offset = 0) { + time = rnd(mt); + SetTimestamp(first_elem_expired ? time - ttl - 20 : time, + timestamp_offset); // elem 1 ts + SetTimestamp(time, LIST_ELEM_FIXED_LEN + timestamp_offset); // elem 2 ts + auto fixed_len_filter_factory = + new FlinkCompactionFilter::FixedListElementFilterFactory( + LIST_ELEM_FIXED_LEN, static_cast(0)); + Init(LIST, vtype, fixed_len_filter_factory, timestamp_offset, all_expired); +} + +void Deinit() { delete filter; } + +TEST(FlinkStateTtlTest, CheckStateTypeEnumOrder) { // NOLINT + // if the order changes it also needs to be adjusted in Java client: + // in org.rocksdb.FlinkCompactionFilter + // and in org.rocksdb.FlinkCompactionFilterTest + EXPECT_EQ(DISABLED, 0); + EXPECT_EQ(VALUE, 1); + EXPECT_EQ(LIST, 2); +} + +TEST(FlinkStateTtlTest, SkipShortDataWithoutTimestamp) { // NOLINT + InitValue(VALUE, KVALUE, true); + EXPECT_EQ(decide(TIMESTAMP_BYTE_SIZE - 1), KKEEP); + Deinit(); +} + +TEST(FlinkValueStateTtlTest, Unexpired) { // NOLINT + InitValue(VALUE, KVALUE); + EXPECT_EQ(decide(), KKEEP); + Deinit(); +} + +TEST(FlinkValueStateTtlTest, Expired) { // NOLINT + InitValue(VALUE, KVALUE, true); + EXPECT_EQ(decide(), KREMOVE); + Deinit(); +} + +TEST(FlinkValueStateTtlTest, CachedTimeUpdate) { // NOLINT + InitValue(VALUE, KVALUE); + EXPECT_EQ(decide(), KKEEP); // also implicitly cache current timestamp + EXPIRE; // advance current timestamp to expire but cached should be used + // QUERY_TIME_AFTER_NUM_ENTRIES - 2: + // -1 -> for decide disabled in InitValue + // and -1 -> for decide right after InitValue + for (int64_t i = 0; i < QUERY_TIME_AFTER_NUM_ENTRIES - 2; i++) { + EXPECT_EQ(decide(), KKEEP); + } + EXPECT_EQ(decide(), KREMOVE); // advanced current timestamp should be updated + // in cache and expire state + Deinit(); +} + +TEST(FlinkValueStateTtlTest, WrongFilterValueType) { // NOLINT + InitValue(VALUE, KMERGE, true); + EXPECT_EQ(decide(), KKEEP); + Deinit(); +} + +TEST(FlinkListStateTtlTest, Unexpired) { // NOLINT + InitList(KMERGE); + EXPECT_EQ(decide(), KKEEP); + Deinit(); + + InitList(KVALUE); + EXPECT_EQ(decide(), KKEEP); + Deinit(); +} + +TEST(FlinkListStateTtlTest, Expired) { // NOLINT + InitList(KMERGE, true); + EXPECT_EQ(decide(), KREMOVE); + Deinit(); + + InitList(KVALUE, true); + EXPECT_EQ(decide(), KREMOVE); + Deinit(); +} + +TEST(FlinkListStateTtlTest, HalfExpired) { // NOLINT + InitList(KMERGE, false, true); + EXPECT_EQ(decide(), KCHANGE); + EXPECT_ARR_EQ(new_list.data(), data + LIST_ELEM_FIXED_LEN, + LIST_ELEM_FIXED_LEN); + Deinit(); + + InitList(KVALUE, false, true); + EXPECT_EQ(decide(), KCHANGE); + EXPECT_ARR_EQ(new_list.data(), data + LIST_ELEM_FIXED_LEN, + LIST_ELEM_FIXED_LEN); + Deinit(); +} + +TEST(FlinkListStateTtlTest, WrongFilterValueType) { // NOLINT + InitList(KBLOB, true); + EXPECT_EQ(decide(), KKEEP); + Deinit(); +} + +} // namespace flink +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 927b17e10d2112270ac30c4566238950baba4b7b Mon Sep 17 00:00:00 2001 From: Robin Cassan Date: Mon, 21 Nov 2022 17:25:58 +0100 Subject: [PATCH 342/386] fix(CompactionFilter): avoid expensive `ToString` call when not in Debug` --- utilities/flink/flink_compaction_filter.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utilities/flink/flink_compaction_filter.cc b/utilities/flink/flink_compaction_filter.cc index 4cbdd7e7d..1239e6935 100644 --- a/utilities/flink/flink_compaction_filter.cc +++ b/utilities/flink/flink_compaction_filter.cc @@ -117,13 +117,16 @@ CompactionFilter::Decision FlinkCompactionFilter::FilterV2( const char* data = existing_value.data(); - Debug(logger_.get(), + if (logger_ && logger_->GetInfoLogLevel() <= InfoLogLevel::DEBUG_LEVEL) { + Debug( + logger_.get(), "Call FlinkCompactionFilter::FilterV2 - Key: %s, Data: %s, Value type: " "%d, " "State type: %d, TTL: %" PRId64 " ms, timestamp_offset: %zu", key.ToString().c_str(), existing_value.ToString(true).c_str(), value_type, config_cached_->state_type_, config_cached_->ttl_, config_cached_->timestamp_offset_); + } // too short value to have timestamp at all const bool tooShortValue = From b58ba05a380d9bf0c223bc707f14897ce392ce1b Mon Sep 17 00:00:00 2001 From: Yun Tang Date: Mon, 9 Aug 2021 20:50:13 +0800 Subject: [PATCH 343/386] FRocksDB release guide and helping scripts --- FROCKSDB-RELEASE.md | 113 ++++++++++++++++++++++++++++++++++ HISTORY.md | 6 ++ Makefile | 36 ++++++++++- java/crossbuild/build-win.bat | 16 +++++ java/deploysettings.xml | 12 ++++ java/pom.xml.template | 19 +++--- java/publish-frocksdbjni.sh | 44 +++++++++++++ 7 files changed, 233 insertions(+), 13 deletions(-) create mode 100644 FROCKSDB-RELEASE.md create mode 100644 java/crossbuild/build-win.bat create mode 100644 java/deploysettings.xml create mode 100644 java/publish-frocksdbjni.sh diff --git a/FROCKSDB-RELEASE.md b/FROCKSDB-RELEASE.md new file mode 100644 index 000000000..bd5d0b2cc --- /dev/null +++ b/FROCKSDB-RELEASE.md @@ -0,0 +1,113 @@ +## Summary +For FRocksDB-6.x, we need to release jar package which contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files for Mac OSX, and a .dll for Windows x64. + +## Build in Windows + +Use Windows 64 bit machine (e.g. base AWS Windows instance: 4 cores, 16GB RAM, 40GB storage for build). + +Install: + * git + * java 8 + * maven + * Visual Studio Community 15 (2017) + +With [chocolatey](https://chocolatey.org/install): + + choco install git.install jdk8 maven visualstudio2017community + +Optionally: + + choco install intellijidea-community vscode + +Open developer command prompt for vs 2017 and run commands: + + git clone git@github.com:ververica/frocksdb.git + cd frocksdb + git checkout FRocksDB-6.20.3 # release branch + java\crossbuild\build-win.bat + +The result native library is `build\java\Release\rocksdbjni-shared.dll` and you can also find it under project foler with name `librocksdbjni-win64.dll` +The result windows jar is `build\java\rocksdbjni_classes.jar`. + +There is also a how-to in CMakeLists.txt. + +## Build in PPC64LE + +### Build on a powerPC machine +Strongly suggest you to build on native powerPC machine: + + +### Build within a docker machine via QEMU +** warning ** It would be extremely slow to build within a docker machine via QEMU, from my experiense, it might need at least 8 hours to build FRocksDB once. + +Use Ubuntu 16.04 (e.g. AWS instance 4 cores, 16GB RAM, 40GB storage for build). +Install git if not installed. If docker is installed, it might need to be removed. + +Setup ppc64le docker machine ([source](https://developer.ibm.com/linuxonpower/2017/06/08/build-test-ppc64le-docker-images-intel/)): + + wget http://ftp.unicamp.br/pub/ppc64el/boot2docker/install.sh && chmod +x ./install.sh && ./install.sh -s + docker-machine create -d qemu \ + --qemu-boot2docker-url=/home/ubuntu/.docker/machine/boot2docker.iso \ + --qemu-memory 8192 \ + --qemu-cpu-count 4 \ + --qemu-cache-mode none \ + --qemu-arch ppc64le \ + vm-ppc64le + +Regenerate certs as suggested if it did not work at once. + +Prepare docker machine to run rocksdbjni docker image for ppc64le build: + + eval $(docker-machine env vm-ppc64le) + git clone git@github.com:ververica/frocksdb.git + cd frocksdb + git checkout FRocksDB-6.20.3 # release branch + docker-machine ssh vm-ppc64le mkdir -p `pwd` + docker-machine scp -r . vm-ppc64le:`pwd` + +Build frocksdb: + + make jclean clean rocksdbjavastaticdockerppc64le + docker-machine scp vm-ppc64le:`pwd`/java/target/librocksdbjni-linux-ppc64le.so java/target/. + make jclean clean rocksdbjavastaticdockerppc64lemusl + docker-machine scp vm-ppc64le:`pwd`/java/target/librocksdbjni-linux-ppc64le.so java/target/. + +The result native libraries are `java/target/librocksdbjni-linux-ppc64le.so` and `java/target/librocksdbjni-linux-ppc64le-musl.so`. + +## Final crossbuild in Mac OSX + +Read how to Build cross jar for Mac OSX and linux as described in java/RELEASE.md but do not run it yet. + +Run commands: + + make jclean clean + mkdir -p java/target + cp /librocksdbjni-win64.dll java/target/librocksdbjni-win64.dll + cp /librocksdbjni-linux-ppc64le.so java/target/librocksdbjni-linux-ppc64le.so + cp /librocksdbjni-linux-ppc64le-musl.so java/target/librocksdbjni-linux-ppc64le-musl.so + FROCKSDB_VERSION=1.0 PORTABLE=1 ROCKSDB_DISABLE_JEMALLOC=true make clean frocksdbjavastaticreleasedocker + +* Note, we disable jemalloc on mac due to https://github.com/facebook/rocksdb/issues/5787 + +## Push to maven central + +Run: +```bash +VERSION= \ +USER= \ +PASSWORD= \ +KEYNAME= \ +PASSPHRASE= \ +java/publish-frocksdbjni.sh +``` + +Go to the staging repositories on Sonatype: + +https://oss.sonatype.org/#stagingRepositories + +Select the open staging repository and click on "Close". + +Test the files in the staging repository +which will look something like this `https://oss.sonatype.org/content/repositories/xxxx-1020`. + +Press the "Release" button (WARNING: this can not be undone). \ No newline at end of file diff --git a/HISTORY.md b/HISTORY.md index 0d7c284b2..f4d0ea3f4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,9 @@ +# FRocksdb Change Log +## 6.20.2-ververica-1.0 (08/09/2021) +### Improvement +* [Flink TTL] compaction filter for background cleanup of state with time-to-live +* [FLINK-19710] Revert implementation of PerfContext back to __thread to avoid performance regression + # Rocksdb Change Log > NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` diff --git a/Makefile b/Makefile index 09900875b..96d69282b 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,8 @@ #----------------------------------------------- +FROCKSDB_VERSION ?= 1.0 + BASH_EXISTS := $(shell which bash) SHELL := $(shell which bash) include common.mk @@ -2325,10 +2327,42 @@ rocksdbjavastaticrelease: rocksdbjavastaticosx rocksdbjava_javadocs_jar rocksdbj rocksdbjavastaticreleasedocker: rocksdbjavastaticosx rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib + jar -uf java/target/$(ROCKSDB_JAR_ALL) HISTORY*.md + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib librocksdbjni-win64.dll cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 +frocksdbjavastaticreleasedocker: rocksdbjavastaticreleasedocker + # update apache license + mkdir -p java/target/META-INF + cp LICENSE.Apache java/target/META-INF/LICENSE + cd java/target;jar -uf $(ROCKSDB_JAR_ALL) META-INF/LICENSE + + # jars to be released + $(eval JAR_PREF=rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)) + $(eval JAR_DOCS=$(JAR_PREF)-javadoc.jar) + $(eval JAR_SOURCES=$(JAR_PREF)-sources.jar) + + # update docs and sources jars + cd java/target;jar -uf $(JAR_DOCS) META-INF/LICENSE + cd java/target;jar -uf $(JAR_SOURCES) META-INF/LICENSE + + # prepare frocksdb release + cd java/target;mkdir -p frocksdb-release + + $(eval FROCKSDB_JAVA_VERSION=$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-ververica-$(FROCKSDB_VERSION)) + $(eval FJAR_PREF=frocksdbjni-$(FROCKSDB_JAVA_VERSION)) + $(eval FJAR=$(FJAR_PREF).jar) + $(eval FJAR_DOCS=$(FJAR_PREF)-javadoc.jar) + $(eval FJAR_SOURCES=$(FJAR_PREF)-sources.jar) + + cd java/target;cp $(ROCKSDB_JAR_ALL) frocksdb-release/$(FJAR) + cd java/target;cp $(JAR_DOCS) frocksdb-release/$(FJAR_DOCS) + cd java/target;cp $(JAR_SOURCES) frocksdb-release/$(FJAR_SOURCES) + openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 + cd java;cat pom.xml.template | sed 's/\$${FROCKSDB_JAVA_VERSION}/$(FROCKSDB_JAVA_VERSION)/' > pom.xml + cd java;cp pom.xml target/frocksdb-release/$(FJAR_PREF).pom + rocksdbjavastaticdockerx86: mkdir -p java/target docker run --rm --name rocksdb_linux_x86-be --platform linux/386 --attach stdin --attach stdout --attach stderr --volume $(HOME)/.m2:/root/.m2:ro --volume `pwd`:/rocksdb-host:ro --volume /rocksdb-local-build --volume `pwd`/java/target:/rocksdb-java-target --env DEBUG_LEVEL=$(DEBUG_LEVEL) evolvedbinary/rocksjava:centos6_x86-be /rocksdb-host/java/crossbuild/docker-build-linux-centos.sh diff --git a/java/crossbuild/build-win.bat b/java/crossbuild/build-win.bat new file mode 100644 index 000000000..2925ec19a --- /dev/null +++ b/java/crossbuild/build-win.bat @@ -0,0 +1,16 @@ +:: install git, java 8, maven, visual studio community 15 (2017) + +set MSBUILD=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe + +if exist build rd /s /q build +if exist librocksdbjni-win64.dll del librocksdbjni-win64.dll +mkdir build && cd build + +cmake -G "Visual Studio 15 Win64" -DWITH_JNI=1 .. + +"%MSBUILD%" rocksdb.sln /p:Configuration=Release /m + +cd .. + +copy build\java\Release\rocksdbjni-shared.dll librocksdbjni-win64.dll +echo Result is in librocksdbjni-win64.dll \ No newline at end of file diff --git a/java/deploysettings.xml b/java/deploysettings.xml new file mode 100644 index 000000000..7b73248e0 --- /dev/null +++ b/java/deploysettings.xml @@ -0,0 +1,12 @@ + + + + sonatype-nexus-staging + ${sonatype_user} + ${sonatype_pw} + + + \ No newline at end of file diff --git a/java/pom.xml.template b/java/pom.xml.template index 9dd9c74f3..bd882ec3a 100644 --- a/java/pom.xml.template +++ b/java/pom.xml.template @@ -2,12 +2,12 @@ 4.0.0 - org.rocksdb - rocksdbjni - ${ROCKSDB_JAVA_VERSION} + com.ververica + frocksdbjni + ${FROCKSDB_JAVA_VERSION} RocksDB JNI - RocksDB fat jar that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files + RocksDB fat jar with modifications specific for Apache Flink that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files for Mac OSX, and a .dll for Windows x64. https://rocksdb.org @@ -19,17 +19,12 @@ http://www.apache.org/licenses/LICENSE-2.0.html repo - - GNU General Public License, version 2 - http://www.gnu.org/licenses/gpl-2.0.html - repo - - scm:git:https://github.com/facebook/rocksdb.git - scm:git:https://github.com/facebook/rocksdb.git - scm:git:https://github.com/facebook/rocksdb.git + scm:git:https://github.com/ververica/frocksdb.git + scm:git:https://github.com/ververica/frocksdb.git + scm:git:https://github.com/ververica/frocksdb.git diff --git a/java/publish-frocksdbjni.sh b/java/publish-frocksdbjni.sh new file mode 100644 index 000000000..474c62e50 --- /dev/null +++ b/java/publish-frocksdbjni.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# fail on errors +set -e + +PREFIX=java/target/frocksdb-release/frocksdbjni-${VERSION} + +function deploy() { + FILE=$1 + CLASSIFIER=$2 + echo "Deploying file=${FILE} with classifier=${CLASSIFIER} to sonatype with prefix=${PREFIX}" + sonatype_user=${USER} sonatype_pw=${PASSWORD} mvn gpg:sign-and-deploy-file \ + --settings java/deploysettings.xml \ + -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ \ + -DrepositoryId=sonatype-nexus-staging \ + -DpomFile=${PREFIX}.pom \ + -Dfile=$FILE \ + -Dclassifier=$CLASSIFIER \ + -Dgpg.keyname=${KEYNAME} \ + -Dgpg.passphrase=${PASSPHRASE} +} + +PREFIX=java/target/frocksdb-release/frocksdbjni-${VERSION} + +deploy ${PREFIX}-sources.jar sources +deploy ${PREFIX}-javadoc.jar javadoc +deploy ${PREFIX}.jar \ No newline at end of file From 6cae002662a45131a0cd90dd84f5d3d3cb958713 Mon Sep 17 00:00:00 2001 From: Yun Tang Date: Wed, 11 Aug 2021 14:52:31 +0800 Subject: [PATCH 344/386] Add content related to ARM building in the FROCKSDB-RELEASE documentation --- FROCKSDB-RELEASE.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/FROCKSDB-RELEASE.md b/FROCKSDB-RELEASE.md index bd5d0b2cc..9538817f7 100644 --- a/FROCKSDB-RELEASE.md +++ b/FROCKSDB-RELEASE.md @@ -32,13 +32,17 @@ The result windows jar is `build\java\rocksdbjni_classes.jar`. There is also a how-to in CMakeLists.txt. ## Build in PPC64LE +We can build binaries on ppc64le platform in two ways: build on a powerPC machine directly or build within a docker machine via QEMU. +I strongly suggest to build on a powerPC machine. ### Build on a powerPC machine -Strongly suggest you to build on native powerPC machine: +Run commands separately to get binaries. + make jclean clean rocksdbjavastaticdockerppc64le + make jclean clean rocksdbjavastaticdockerppc64lemusl ### Build within a docker machine via QEMU -** warning ** It would be extremely slow to build within a docker machine via QEMU, from my experiense, it might need at least 8 hours to build FRocksDB once. +**warning** It would be extremely slow to build within a docker machine via QEMU, from my experiense, it might need at least 8 hours to build FRocksDB once. Use Ubuntu 16.04 (e.g. AWS instance 4 cores, 16GB RAM, 40GB storage for build). Install git if not installed. If docker is installed, it might need to be removed. @@ -74,6 +78,12 @@ Build frocksdb: The result native libraries are `java/target/librocksdbjni-linux-ppc64le.so` and `java/target/librocksdbjni-linux-ppc64le-musl.so`. +## Build on ARM machines +Run commands separately to get binaries. + + make jclaen clean rocksdbjavastaticdockerarm64v8 + make jclaen clean rocksdbjavastaticdockerarm64v8musl + ## Final crossbuild in Mac OSX Read how to Build cross jar for Mac OSX and linux as described in java/RELEASE.md but do not run it yet. @@ -85,6 +95,8 @@ Run commands: cp /librocksdbjni-win64.dll java/target/librocksdbjni-win64.dll cp /librocksdbjni-linux-ppc64le.so java/target/librocksdbjni-linux-ppc64le.so cp /librocksdbjni-linux-ppc64le-musl.so java/target/librocksdbjni-linux-ppc64le-musl.so + cp /librocksdbjni-linux-aarch64.so java/target/librocksdbjni-linux-aarch64.so + cp /librocksdbjni-linux-aarch64-musl.so java/target/librocksdbjni-linux-aarch64-musl.so FROCKSDB_VERSION=1.0 PORTABLE=1 ROCKSDB_DISABLE_JEMALLOC=true make clean frocksdbjavastaticreleasedocker * Note, we disable jemalloc on mac due to https://github.com/facebook/rocksdb/issues/5787 From bac6aeb6e012e19d9d5e3a5ee22b84c1e4a1559c Mon Sep 17 00:00:00 2001 From: Mika Naylor Date: Mon, 16 Aug 2021 17:28:15 +0200 Subject: [PATCH 345/386] [FLINK-23756] Update FrocksDB release document with more info Also make some slight improvements to the Maven upload script. --- FROCKSDB-RELEASE.md | 244 +++++++++++++++++++++++++++--------- java/publish-frocksdbjni.sh | 8 +- 2 files changed, 188 insertions(+), 64 deletions(-) diff --git a/FROCKSDB-RELEASE.md b/FROCKSDB-RELEASE.md index 9538817f7..3ec3c2724 100644 --- a/FROCKSDB-RELEASE.md +++ b/FROCKSDB-RELEASE.md @@ -1,94 +1,190 @@ +# FRocksDB Release Process + ## Summary -For FRocksDB-6.x, we need to release jar package which contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files for Mac OSX, and a .dll for Windows x64. -## Build in Windows +FrocksDB-6.x releases are a fat jar file that contain the following binaries: +* .so files for linux32 (glibc and musl-libc) +* .so files for linux64 (glibc and musl-libc) +* .so files for linux [aarch64](https://en.wikipedia.org/wiki/AArch64) (glibc and musl-libc) +* .so files for linux [ppc64le](https://en.wikipedia.org/wiki/Ppc64le) (glibc and musl-libc) +* .jnilib file for Mac OSX +* .dll for Windows x64 -Use Windows 64 bit machine (e.g. base AWS Windows instance: 4 cores, 16GB RAM, 40GB storage for build). +To build the binaries for a FrocksDB release, building on native architectures is advised. Building the binaries for ppc64le and aarch64 *can* be done using QEMU, but you may run into emulation bugs and the build times will be dramatically slower (up to x20). -Install: - * git - * java 8 - * maven - * Visual Studio Community 15 (2017) +We recommend building the binaries on environments with at least 4 cores, 16GB RAM and 40GB of storage. The following environments are recommended for use in the build process: +* Windows x64 +* Linux aarch64 +* Linux ppc64le +* Mac OSX -With [chocolatey](https://chocolatey.org/install): +## Build for Windows - choco install git.install jdk8 maven visualstudio2017community +For the Windows binary build, we recommend using a base [AWS Windows EC2 instance](https://aws.amazon.com/windows/products/ec2/) with 4 cores, 16GB RAM, 40GB storage for the build. -Optionally: +Firstly, install [chocolatey](https://chocolatey.org/install). Once installed, the following required components can be installed using Powershell: - choco install intellijidea-community vscode + choco install git.install jdk8 maven visualstudio2017community visualstudio2017-workload-nativedesktop -Open developer command prompt for vs 2017 and run commands: +Open the "Developer Command Prompt for VS 2017" and run the following commands: git clone git@github.com:ververica/frocksdb.git cd frocksdb git checkout FRocksDB-6.20.3 # release branch java\crossbuild\build-win.bat -The result native library is `build\java\Release\rocksdbjni-shared.dll` and you can also find it under project foler with name `librocksdbjni-win64.dll` +The resulting native binary will be built and available at `build\java\Release\rocksdbjni-shared.dll`. You can also find it under project folder with name `librocksdbjni-win64.dll`. The result windows jar is `build\java\rocksdbjni_classes.jar`. There is also a how-to in CMakeLists.txt. +**Once finished, extract the `librocksdbjni-win64.dll` from the build environment. You will need this .dll in the final crossbuild.** + +## Build for aarch64 + +For the Linux aarch64 binary build, we recommend using a base [AWS Ubuntu Server 20.04 LTS EC2](https://aws.amazon.com/windows/products/ec2/) with a 4 core Arm processor, 16GB RAM, 40GB storage for the build. You can also attempt to build with QEMU on a non-aarch64 processor, but you may run into emulation bugs and very long build times. + +### Building in aarch64 environment + +First, install the required packages such as Java 8 and make: + + sudo apt-get update + sudo apt-get install build-essential openjdk-8-jdk + +then, install and setup [Docker](https://docs.docker.com/engine/install/ubuntu/): + + sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release + + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + echo "deb [arch=arm64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get install docker-ce docker-ce-cli containerd.io + + sudo groupadd docker + sudo usermod -aG docker $USER + newgrp docker + +Then, clone the FrocksDB repo: + + git clone https://github.com/ververica/frocksdb.git + cd frocksdb + git checkout FRocksDB-6.20.3 # release branch + + +First, build the glibc binary: + + make jclean clean rocksdbjavastaticdockerarm64v8 + +**Once finished, extract the `java/target/librocksdbjni-linux-aarch64.so` from the build environment. You will need this .so in the final crossbuild.** + +Next, build the musl-libc binary: + + make jclean clean rocksdbjavastaticdockerarm64v8musl + +**Once finished, extract the `java/target/librocksdbjni-linux-aarch64-musl.so` from the build environment. You will need this .so in the final crossbuild.** + +### Building via QEMU + +You can use QEMU on, for example, an `x86_64` system to build the aarch64 binaries. To set this up on an Ubuntu envirnment: + + sudo apt-get install qemu binfmt-support qemu-user-static + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + +To verify that you can now run aarch64 docker images: + + docker run --rm -t arm64v8/ubuntu uname -m + > aarch64 + +You can now attempt to build the aarch64 binaries as in the previous section. + ## Build in PPC64LE -We can build binaries on ppc64le platform in two ways: build on a powerPC machine directly or build within a docker machine via QEMU. -I strongly suggest to build on a powerPC machine. -### Build on a powerPC machine -Run commands separately to get binaries. +For the ppc64le binaries, we recommend building on a PowerPC machine if possible, as it can be tricky to spin up a ppc64le cloud environment. However, if a PowerPC machine is not available, [Travis-CI](https://www.travis-ci.com/) offers ppc64le build environments that work perfectly for building these binaries. If neither a machine or Travis are an option, you can use QEMU but the build may take a very long time and be prone to emulation errors. + +### Building in ppc64le environment + +As with the aarch64 environment, the ppc64le environment will require Java 8, Docker and build-essentials installed. Once installed, you can build the 2 binaries: make jclean clean rocksdbjavastaticdockerppc64le + +**Once finished, extract the `java/target/librocksdbjni-linux-ppc64le.so` from the build environment. You will need this .so in the final crossbuild.** + make jclean clean rocksdbjavastaticdockerppc64lemusl -### Build within a docker machine via QEMU -**warning** It would be extremely slow to build within a docker machine via QEMU, from my experiense, it might need at least 8 hours to build FRocksDB once. +**Once finished, extract the `java/target/librocksdbjni-linux-ppc64le-musl.so` from the build environment. You will need this .so in the final crossbuild.** -Use Ubuntu 16.04 (e.g. AWS instance 4 cores, 16GB RAM, 40GB storage for build). -Install git if not installed. If docker is installed, it might need to be removed. +### Building via Travis -Setup ppc64le docker machine ([source](https://developer.ibm.com/linuxonpower/2017/06/08/build-test-ppc64le-docker-images-intel/)): +Travis-CI supports ppc64le build environments, and this can be a convienient way of building in the absence of a PowerPC machine. Assuming that you have an S3 bucket called **my-frocksdb-release-artifacts**, the following Travis configuration will build the release artifacts and push them to the S3 bucket: - wget http://ftp.unicamp.br/pub/ppc64el/boot2docker/install.sh && chmod +x ./install.sh && ./install.sh -s - docker-machine create -d qemu \ - --qemu-boot2docker-url=/home/ubuntu/.docker/machine/boot2docker.iso \ - --qemu-memory 8192 \ - --qemu-cpu-count 4 \ - --qemu-cache-mode none \ - --qemu-arch ppc64le \ - vm-ppc64le +``` +dist: xenial +language: cpp +os: + - linux +arch: + - ppc64le + +services: + - docker +addons: + artifacts: + paths: + - $TRAVIS_BUILD_DIR/java/target/librocksdbjni-linux-ppc64le-musl.so + - $TRAVIS_BUILD_DIR/java/target/librocksdbjni-linux-ppc64le.so + +env: + global: + - ARTIFACTS_BUCKET=my-rocksdb-release-artifacts + jobs: + - CMD=rocksdbjavastaticdockerppc64le + - CMD=rocksdbjavastaticdockerppc64lemusl + +install: + - sudo apt-get install -y openjdk-8-jdk || exit $? + - export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH + - export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture) + - echo "JAVA_HOME=${JAVA_HOME}" + - which java && java -version + - which javac && javac -version + +script: + - make jclean clean $CMD +``` -Regenerate certs as suggested if it did not work at once. +**Make sure to set the `ARTIFACTS_KEY` and `ARTIFACTS_SECRET` environment variables in the Travis Job with valid AWS credentials to access the S3 bucket you defined.** -Prepare docker machine to run rocksdbjni docker image for ppc64le build: +**Once finished, the`librocksdbjni-linux-ppce64le.so` and `librocksdbjni-linux-ppce64le-musl.so` binaries will be in the S3 bucket. You will need these .so binaries in the final crossbuild.** - eval $(docker-machine env vm-ppc64le) - git clone git@github.com:ververica/frocksdb.git - cd frocksdb - git checkout FRocksDB-6.20.3 # release branch - docker-machine ssh vm-ppc64le mkdir -p `pwd` - docker-machine scp -r . vm-ppc64le:`pwd` -Build frocksdb: +### Building via QEMU - make jclean clean rocksdbjavastaticdockerppc64le - docker-machine scp vm-ppc64le:`pwd`/java/target/librocksdbjni-linux-ppc64le.so java/target/. - make jclean clean rocksdbjavastaticdockerppc64lemusl - docker-machine scp vm-ppc64le:`pwd`/java/target/librocksdbjni-linux-ppc64le.so java/target/. +You can use QEMU on, for example, an `x86_64` system to build the ppc64le binaries. To set this up on an Ubuntu envirnment: + + sudo apt-get install qemu binfmt-support qemu-user-static + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -The result native libraries are `java/target/librocksdbjni-linux-ppc64le.so` and `java/target/librocksdbjni-linux-ppc64le-musl.so`. +To verify that you can now run ppc64le docker images: -## Build on ARM machines -Run commands separately to get binaries. + docker run --rm -t ppc64le/ubuntu uname -m + > ppc64le - make jclaen clean rocksdbjavastaticdockerarm64v8 - make jclaen clean rocksdbjavastaticdockerarm64v8musl +You can now attempt to build the ppc64le binaries as in the previous section. ## Final crossbuild in Mac OSX -Read how to Build cross jar for Mac OSX and linux as described in java/RELEASE.md but do not run it yet. +Documentation for the final crossbuild for Mac OSX and Linux is described in [java/RELEASE.md](java/RELEASE.md) as has information on dependencies that should be installed. As above, this tends to be Java 8, build-essentials and Docker. -Run commands: +Before you run this step, you should have 5 binaries from the previous build steps: + + 1. `librocksdbjni-win64.dll` from the Windows build step. + 2. `librocksdbjni-linux-aarch64.so` from the aarch64 build step. + 3. `librocksdbjni-linux-aarch64-musl.so` from the aarch64 build step. + 3. `librocksdbjni-linux-ppc64le.so` from the ppc64le build step. + 4. `librocksdbjni-linux-ppc64le-musl.so` from the ppc64le build step. + +To start the crossbuild within a Mac OSX environment: make jclean clean mkdir -p java/target @@ -97,19 +193,48 @@ Run commands: cp /librocksdbjni-linux-ppc64le-musl.so java/target/librocksdbjni-linux-ppc64le-musl.so cp /librocksdbjni-linux-aarch64.so java/target/librocksdbjni-linux-aarch64.so cp /librocksdbjni-linux-aarch64-musl.so java/target/librocksdbjni-linux-aarch64-musl.so - FROCKSDB_VERSION=1.0 PORTABLE=1 ROCKSDB_DISABLE_JEMALLOC=true make clean frocksdbjavastaticreleasedocker + FROCKSDB_VERSION=1.0 PORTABLE=1 ROCKSDB_DISABLE_JEMALLOC=true DEBUG_LEVEL=0 make frocksdbjavastaticreleasedocker + +*Note, we disable jemalloc on mac due to https://github.com/facebook/rocksdb/issues/5787*. + +Once finished, there should be a directory at `java/target/frocksdb-release` with the FRocksDB jar, javadoc jar, sources jar and pom in it. You can inspect the jar file and ensure that contains the binaries, history file, etc: + +``` +$ jar tf frocksdbjni-6.20.3-ververica-1.0.jar +META-INF/ +META-INF/MANIFEST.MF +HISTORY-JAVA.md +HISTORY.md +librocksdbjni-linux-aarch64-musl.so +librocksdbjni-linux-aarch64.so +librocksdbjni-linux-ppc64le-musl.so +librocksdbjni-linux-ppc64le.so +librocksdbjni-linux32-musl.so +librocksdbjni-linux32.so +librocksdbjni-linux64-musl.so +librocksdbjni-linux64.so +librocksdbjni-osx.jnilib +librocksdbjni-win64.dl +... +``` + +*Note that it contains linux32/64.so binaries as well as librocksdbjni-osx.jnilib*. + +## Push to Maven Central -* Note, we disable jemalloc on mac due to https://github.com/facebook/rocksdb/issues/5787 +For this step, you will need the following: -## Push to maven central +- The OSX Crossbuild artifacts built in `java/target/frocksdb-release` as above. +- A Sonatype account with access to the staging repository. If you do not have permission, open a ticket with Sonatype, [such as this one](https://issues.sonatype.org/browse/OSSRH-72185). +- A GPG key to sign the release, with your public key available for verification (for example, by uploading it to https://keys.openpgp.org/) -Run: +To upload the release to the Sonatype staging repository: ```bash VERSION= \ USER= \ PASSWORD= \ -KEYNAME= \ -PASSPHRASE= \ +KEYNAME= \ +PASSPHRASE= \ java/publish-frocksdbjni.sh ``` @@ -119,7 +244,6 @@ https://oss.sonatype.org/#stagingRepositories Select the open staging repository and click on "Close". -Test the files in the staging repository -which will look something like this `https://oss.sonatype.org/content/repositories/xxxx-1020`. +The staging repository will look something like `https://oss.sonatype.org/content/repositories/xxxx-1020`. You can use this staged release to test the artifacts and ensure they are correct. -Press the "Release" button (WARNING: this can not be undone). \ No newline at end of file +Once you have verified the artifacts are correct, press the "Release" button. **WARNING: this can not be undone**. Within 24-48 hours, the artifact will be available on Maven Central for use. diff --git a/java/publish-frocksdbjni.sh b/java/publish-frocksdbjni.sh index 474c62e50..2a6bd2865 100644 --- a/java/publish-frocksdbjni.sh +++ b/java/publish-frocksdbjni.sh @@ -26,19 +26,19 @@ function deploy() { FILE=$1 CLASSIFIER=$2 echo "Deploying file=${FILE} with classifier=${CLASSIFIER} to sonatype with prefix=${PREFIX}" - sonatype_user=${USER} sonatype_pw=${PASSWORD} mvn gpg:sign-and-deploy-file \ + sonatype_user="${USER}" sonatype_pw="${PASSWORD}" mvn gpg:sign-and-deploy-file \ --settings java/deploysettings.xml \ -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ \ -DrepositoryId=sonatype-nexus-staging \ -DpomFile=${PREFIX}.pom \ -Dfile=$FILE \ -Dclassifier=$CLASSIFIER \ - -Dgpg.keyname=${KEYNAME} \ - -Dgpg.passphrase=${PASSPHRASE} + -Dgpg.keyname="${KEYNAME}" \ + -Dgpg.passphrase="${PASSPHRASE}" } PREFIX=java/target/frocksdb-release/frocksdbjni-${VERSION} deploy ${PREFIX}-sources.jar sources deploy ${PREFIX}-javadoc.jar javadoc -deploy ${PREFIX}.jar \ No newline at end of file +deploy ${PREFIX}.jar From 174639cf1e6080a8f8f37aec132b3a500428f913 Mon Sep 17 00:00:00 2001 From: Yun Tang Date: Mon, 5 Dec 2022 12:45:09 +0800 Subject: [PATCH 346/386] [hotfix] Change the resource request when running CI --- .circleci/config.yml | 62 +++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 370ad301a..4d4f15512 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -237,14 +237,16 @@ jobs: build-macos: macos: xcode: 14.3.1 - resource_class: macos.m1.medium.gen1 + resource_class: medium environment: ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now steps: - increase-max-open-files-on-macos - install-gflags-on-macos - pre-steps-macos - - run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=16 -j16 all + - run: + command: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=16 -j16 all + no_output_timeout: 30m - post-steps build-macos-cmake: @@ -284,7 +286,7 @@ jobs: build-linux: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: make V=1 J=32 -j32 check @@ -292,7 +294,7 @@ jobs: build-linux-encrypted_env-no_compression: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check @@ -302,7 +304,7 @@ jobs: build-linux-static_lib-alt_namespace-status_checked: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 check @@ -310,7 +312,7 @@ jobs: build-linux-release: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - checkout # check out the code in the project directory - run: make V=1 -j32 LIB_MODE=shared release @@ -353,7 +355,7 @@ jobs: build-linux-clang10-asan: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out @@ -361,7 +363,7 @@ jobs: build-linux-clang10-mini-tsan: executor: linux-docker - resource_class: 2xlarge+ + resource_class: xlarge+ steps: - pre-steps - run: COMPILE_WITH_TSAN=1 CC=clang-13 CXX=clang++-13 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check @@ -369,7 +371,7 @@ jobs: build-linux-clang10-ubsan: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out @@ -377,7 +379,7 @@ jobs: build-linux-valgrind: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: PORTABLE=1 make V=1 -j32 valgrind_test @@ -385,7 +387,7 @@ jobs: build-linux-clang10-clang-analyze: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path. @@ -413,7 +415,7 @@ jobs: build-linux-cmake-with-folly: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - setup-folly @@ -423,7 +425,7 @@ jobs: build-linux-cmake-with-folly-lite-no-test: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - setup-folly @@ -432,7 +434,7 @@ jobs: build-linux-cmake-with-benchmark: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20 @@ -456,7 +458,7 @@ jobs: build-linux-gcc-7-with-folly: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - setup-folly @@ -466,7 +468,7 @@ jobs: build-linux-gcc-7-with-folly-lite-no-test: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - setup-folly @@ -475,7 +477,7 @@ jobs: build-linux-gcc-8-no_test_run: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: CC=gcc-8 CXX=g++-8 V=1 make -j32 all @@ -483,7 +485,7 @@ jobs: build-linux-cmake-with-folly-coroutines: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge environment: CC: gcc-10 CXX: g++-10 @@ -496,7 +498,7 @@ jobs: build-linux-gcc-10-cxx20-no_test_run: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j32 all @@ -504,7 +506,7 @@ jobs: build-linux-gcc-11-no_test_run: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: LIB_MODE=static CC=gcc-11 CXX=g++-11 V=1 make -j32 all microbench # TODO: LIB_MODE only to work around unresolved linker failures @@ -512,7 +514,7 @@ jobs: build-linux-clang-13-no_test_run: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j32 all microbench @@ -521,7 +523,7 @@ jobs: # Ensure ASAN+UBSAN with folly, and full testsuite with clang 13 build-linux-clang-13-asan-ubsan-with-folly: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - setup-folly @@ -532,7 +534,7 @@ jobs: # This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing. build-linux-run-microbench: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: DEBUG_LEVEL=0 make -j32 run_microbench @@ -548,7 +550,7 @@ jobs: build-linux-crashtest-tiered-storage-bb: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: @@ -559,7 +561,7 @@ jobs: build-linux-crashtest-tiered-storage-wb: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: @@ -571,7 +573,7 @@ jobs: build-windows-vs2022-avx2: executor: name: win/server-2022 - size: 2xlarge + size: xlarge environment: THIRDPARTY_HOME: C:/Users/circleci/thirdparty CMAKE_HOME: C:/Program Files/CMake @@ -589,7 +591,7 @@ jobs: build-windows-vs2022: executor: name: win/server-2022 - size: 2xlarge + size: xlarge environment: THIRDPARTY_HOME: C:/Users/circleci/thirdparty CMAKE_HOME: C:/Program Files/CMake @@ -607,7 +609,7 @@ jobs: build-windows-vs2019: executor: name: win/server-2019 - size: 2xlarge + size: xlarge environment: THIRDPARTY_HOME: C:/Users/circleci/thirdparty CMAKE_HOME: C:/Program Files/CMake @@ -781,7 +783,7 @@ jobs: build-linux-non-shm: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge environment: TEST_TMPDIR: /tmp/rocksdb_test_tmp steps: @@ -844,7 +846,7 @@ jobs: build-format-compatible: executor: linux-docker - resource_class: 2xlarge + resource_class: xlarge steps: - pre-steps - run: From c71fb2ad010aee210f807e259b90d05d13a07977 Mon Sep 17 00:00:00 2001 From: Yun Tang Date: Fri, 20 Jan 2023 19:46:52 +0800 Subject: [PATCH 347/386] [hotfix] Add docs of how to upload ppc64le artifacts to s3 --- FROCKSDB-RELEASE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FROCKSDB-RELEASE.md b/FROCKSDB-RELEASE.md index 3ec3c2724..2cd092d88 100644 --- a/FROCKSDB-RELEASE.md +++ b/FROCKSDB-RELEASE.md @@ -155,6 +155,8 @@ script: **Make sure to set the `ARTIFACTS_KEY` and `ARTIFACTS_SECRET` environment variables in the Travis Job with valid AWS credentials to access the S3 bucket you defined.** +**Make sure to avoid signatureV4-only S3 regions to store the uploaded artifacts (due to unresolved https://github.com/travis-ci/artifacts/issues/57). You can just choose the S3 bucket of `us-east-1` region for 100% compatibility.** + **Once finished, the`librocksdbjni-linux-ppce64le.so` and `librocksdbjni-linux-ppce64le-musl.so` binaries will be in the S3 bucket. You will need these .so binaries in the final crossbuild.** From 8fff7bb9947f9036021f99e3463c9657e80b71ae Mon Sep 17 00:00:00 2001 From: Yun Tang Date: Sat, 3 Dec 2022 13:24:00 +0800 Subject: [PATCH 348/386] [hotfix] Use zlib's fossils page to replace web.archive --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96d69282b..332db623b 100644 --- a/Makefile +++ b/Makefile @@ -2101,7 +2101,7 @@ SHA256_CMD = sha256sum ZLIB_VER ?= 1.3 ZLIB_SHA256 ?= ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e -ZLIB_DOWNLOAD_BASE ?= http://zlib.net +ZLIB_DOWNLOAD_BASE ?= https://zlib.net/fossils BZIP2_VER ?= 1.0.8 BZIP2_SHA256 ?= ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269 BZIP2_DOWNLOAD_BASE ?= http://sourceware.org/pub/bzip2 From 971d425ef2dd34a5d9d7ee9683ad0af9b4c472ac Mon Sep 17 00:00:00 2001 From: Roman Khachatryan Date: Thu, 22 Feb 2024 19:39:30 +0100 Subject: [PATCH 349/386] [hotfix] Regenerate TARGETS --- TARGETS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/TARGETS b/TARGETS index a28dd0ae1..afad2672b 100644 --- a/TARGETS +++ b/TARGETS @@ -5082,11 +5082,13 @@ cpp_unittest_wrapper(name="filename_test", deps=[":rocksdb_test_lib"], extra_compiler_flags=[]) + cpp_unittest_wrapper(name="flink_compaction_filter_test", srcs=["utilities/flink/flink_compaction_filter_test.cc"], deps=[":rocksdb_test_lib"], extra_compiler_flags=[]) + cpp_unittest_wrapper(name="flush_job_test", srcs=["db/flush_job_test.cc"], deps=[":rocksdb_test_lib"], From 96705d2143d350d42325c63f4bfed7caf14c156c Mon Sep 17 00:00:00 2001 From: Roman Khachatryan Date: Thu, 22 Feb 2024 21:26:06 +0100 Subject: [PATCH 350/386] [hotfix] Fix namespaces in flink compaction filter --- java/rocksjni/flink_compactionfilterjni.cc | 62 +++++++++++----------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/java/rocksjni/flink_compactionfilterjni.cc b/java/rocksjni/flink_compactionfilterjni.cc index 9f0954b43..a45f12d7b 100644 --- a/java/rocksjni/flink_compactionfilterjni.cc +++ b/java/rocksjni/flink_compactionfilterjni.cc @@ -12,8 +12,6 @@ #include "rocksjni/jnicallback.h" #include "utilities/flink/flink_compaction_filter.h" -using namespace ROCKSDB_NAMESPACE::flink; - class JniCallbackBase : public ROCKSDB_NAMESPACE::JniCallback { public: JniCallbackBase(JNIEnv* env, jobject jcallback_obj) @@ -94,7 +92,8 @@ class JavaListElemenFilterFactory assert(m_jcreate_filter_methodid != nullptr); } - FlinkCompactionFilter::ListElementFilter* CreateListElementFilter( + ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::ListElementFilter* + CreateListElementFilter( std::shared_ptr /*logger*/) const override { jboolean attached_thread = JNI_FALSE; JNIEnv* env = getJniEnv(&attached_thread); @@ -141,16 +140,16 @@ class JavaTimeProvider jmethodID m_jcurrent_timestamp_methodid; }; -static FlinkCompactionFilter::ListElementFilterFactory* -createListElementFilterFactory(JNIEnv* env, jint ji_list_elem_len, - jobject jlist_filter_factory) { - FlinkCompactionFilter::ListElementFilterFactory* list_filter_factory = - nullptr; +static ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter:: + ListElementFilterFactory* + createListElementFilterFactory(JNIEnv* env, jint ji_list_elem_len, + jobject jlist_filter_factory) { + ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::ListElementFilterFactory* + list_filter_factory = nullptr; if (ji_list_elem_len > 0) { auto fixed_size = static_cast(ji_list_elem_len); - list_filter_factory = - new FlinkCompactionFilter::FixedListElementFilterFactory( - fixed_size, static_cast(0)); + list_filter_factory = new ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter:: + FixedListElementFilterFactory(fixed_size, static_cast(0)); } else if (jlist_filter_factory != nullptr) { list_filter_factory = new JavaListElemenFilterFactory(env, jlist_filter_factory); @@ -165,10 +164,10 @@ createListElementFilterFactory(JNIEnv* env, jint ji_list_elem_len, */ jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfigHolder( JNIEnv* /* env */, jclass /* jcls */) { - using namespace ROCKSDB_NAMESPACE::flink; return reinterpret_cast( - new std::shared_ptr( - new FlinkCompactionFilter::ConfigHolder())); + new std::shared_ptr< + ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::ConfigHolder>( + new ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::ConfigHolder())); } /* @@ -178,10 +177,8 @@ jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfi */ void Java_org_rocksdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHolder( JNIEnv* /* env */, jclass /* jcls */, jlong handle) { - using namespace ROCKSDB_NAMESPACE::flink; - auto* config_holder = - reinterpret_cast*>( - handle); + auto* config_holder = reinterpret_cast*>(handle); delete config_holder; } @@ -193,9 +190,9 @@ void Java_org_rocksdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHo jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( JNIEnv* env, jclass /* jcls */, jlong config_holder_handle, jobject jtime_provider, jlong logger_handle) { - using namespace ROCKSDB_NAMESPACE::flink; auto config_holder = - *(reinterpret_cast*>( + *(reinterpret_cast*>( config_holder_handle)); auto time_provider = new JavaTimeProvider(env, jtime_provider); auto logger = @@ -204,10 +201,13 @@ jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( : *(reinterpret_cast< std::shared_ptr*>( logger_handle)); - return reinterpret_cast(new FlinkCompactionFilter( - config_holder, - std::unique_ptr(time_provider), - logger)); + return reinterpret_cast( + new ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter( + config_holder, + std::unique_ptr< + ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::TimeProvider>( + time_provider), + logger)); } /* @@ -221,19 +221,21 @@ jboolean Java_org_rocksdb_FlinkCompactionFilter_configureFlinkCompactionFilter( jlong jquery_time_after_num_entries, jint ji_list_elem_len, jobject jlist_filter_factory) { auto state_type = - static_cast(ji_state_type); + static_cast( + ji_state_type); auto timestamp_offset = static_cast(ji_timestamp_offset); auto ttl = static_cast(jl_ttl_milli); auto query_time_after_num_entries = static_cast(jquery_time_after_num_entries); auto config_holder = - *(reinterpret_cast*>( + *(reinterpret_cast*>( handle)); auto list_filter_factory = createListElementFilterFactory( env, ji_list_elem_len, jlist_filter_factory); - auto config = new FlinkCompactionFilter::Config{ + auto config = new ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter::Config{ state_type, timestamp_offset, ttl, query_time_after_num_entries, - std::unique_ptr( - list_filter_factory)}; + std::unique_ptr(list_filter_factory)}; return static_cast(config_holder->Configure(config)); -} \ No newline at end of file +} From 09e645b803036b23bc5c947b0925b17317e99cab Mon Sep 17 00:00:00 2001 From: Zakelly Date: Thu, 14 Dec 2023 16:31:24 +0800 Subject: [PATCH 351/386] [FLINK-23346] Avoid core dump when rethrowing the exception --- java/rocksjni/flink_compactionfilterjni.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/rocksjni/flink_compactionfilterjni.cc b/java/rocksjni/flink_compactionfilterjni.cc index a45f12d7b..cd3e88027 100644 --- a/java/rocksjni/flink_compactionfilterjni.cc +++ b/java/rocksjni/flink_compactionfilterjni.cc @@ -20,8 +20,9 @@ class JniCallbackBase : public ROCKSDB_NAMESPACE::JniCallback { protected: inline void CheckAndRethrowException(JNIEnv* env) const { if (env->ExceptionCheck()) { + jthrowable obj = env->ExceptionOccurred(); env->ExceptionDescribe(); - env->Throw(env->ExceptionOccurred()); + env->Throw(obj); } } }; From b162cb4e84252d1347f7666df1b7e441bf0d69e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Mon, 17 Jun 2024 20:09:31 +0800 Subject: [PATCH 352/386] [FLINK-35577] Fix the broken CI --- .circleci/config.yml | 119 ++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4d4f15512..84b39239a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -129,6 +129,17 @@ commands: sudo apt remove --purge cmake sudo snap install cmake --classic + install-clang-10: + steps: + - run: + name: Install Clang 10 + command: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list + echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list + echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable + sudo apt-get update -y && sudo apt-get install -y clang-10 + install-gflags: steps: - run: @@ -237,7 +248,7 @@ jobs: build-macos: macos: xcode: 14.3.1 - resource_class: medium + resource_class: macos.m1.medium.gen1 environment: ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now steps: @@ -289,7 +300,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: make V=1 J=32 -j32 check + - run: make V=1 J=8 -j8 check - post-steps build-linux-encrypted_env-no_compression: @@ -297,7 +308,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check + - run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=8 -j8 check - run: | ./sst_dump --help | grep -E -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression - post-steps @@ -307,7 +318,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j24 check + - run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=static OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j8 check - post-steps build-linux-release: @@ -315,20 +326,20 @@ jobs: resource_class: xlarge steps: - checkout # check out the code in the project directory - - run: make V=1 -j32 LIB_MODE=shared release + - run: make V=1 -j8 LIB_MODE=shared release - run: ls librocksdb.so # ensure shared lib built - run: ./db_stress --version # ensure with gflags - run: make clean - - run: make V=1 -j32 release + - run: make V=1 -j8 release - run: ls librocksdb.a # ensure static lib built - run: ./db_stress --version # ensure with gflags - run: make clean - run: apt-get remove -y libgflags-dev - - run: make V=1 -j32 LIB_MODE=shared release + - run: make V=1 -j8 LIB_MODE=shared release - run: ls librocksdb.so # ensure shared lib built - run: if ./db_stress --version; then false; else true; fi # ensure without gflags - run: make clean - - run: make V=1 -j32 release + - run: make V=1 -j8 release - run: ls librocksdb.a # ensure static lib built - run: if ./db_stress --version; then false; else true; fi # ensure without gflags - post-steps @@ -354,19 +365,25 @@ jobs: - post-steps build-linux-clang10-asan: - executor: linux-docker + machine: + image: ubuntu-2004:202111-02 resource_class: xlarge steps: - pre-steps + - install-gflags + - install-clang-10 - run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out - post-steps build-linux-clang10-mini-tsan: - executor: linux-docker - resource_class: xlarge+ + machine: + image: ubuntu-2004:202111-02 + resource_class: xlarge steps: - pre-steps - - run: COMPILE_WITH_TSAN=1 CC=clang-13 CXX=clang++-13 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check + - install-gflags + - install-clang-10 + - run: COMPILE_WITH_TSAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out. - post-steps build-linux-clang10-ubsan: @@ -374,7 +391,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out + - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j8 ubsan_check # aligned new doesn't work for reason we haven't figured out - post-steps build-linux-valgrind: @@ -382,7 +399,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: PORTABLE=1 make V=1 -j32 valgrind_test + - run: PORTABLE=1 make V=1 -j8 valgrind_test - post-steps build-linux-clang10-clang-analyze: @@ -390,7 +407,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path. + - run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j8 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path. - post-steps - run: name: "compress test report" @@ -420,7 +437,7 @@ jobs: - pre-steps - setup-folly - build-folly - - run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20) + - run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j8 && ctest -j8) - post-steps build-linux-cmake-with-folly-lite-no-test: @@ -429,7 +446,7 @@ jobs: steps: - pre-steps - setup-folly - - run: (mkdir build && cd build && cmake -DUSE_FOLLY_LITE=1 -DWITH_GFLAGS=1 .. && make V=1 -j20) + - run: (mkdir build && cd build && cmake -DUSE_FOLLY_LITE=1 -DWITH_GFLAGS=1 .. && make V=1 -j8) - post-steps build-linux-cmake-with-benchmark: @@ -437,7 +454,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20 + - run: mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j8 && ctest -j8 - post-steps build-linux-unity-and-headers: @@ -463,7 +480,7 @@ jobs: - pre-steps - setup-folly - build-folly - - run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures + - run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j8 check # TODO: LIB_MODE only to work around unresolved linker failures - post-steps build-linux-gcc-7-with-folly-lite-no-test: @@ -472,7 +489,7 @@ jobs: steps: - pre-steps - setup-folly - - run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 all + - run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j8 all - post-steps build-linux-gcc-8-no_test_run: @@ -480,7 +497,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: CC=gcc-8 CXX=g++-8 V=1 make -j32 all + - run: CC=gcc-8 CXX=g++-8 V=1 make -j8 all - post-steps build-linux-cmake-with-folly-coroutines: @@ -493,7 +510,7 @@ jobs: - pre-steps - setup-folly - build-folly - - run: (mkdir build && cd build && cmake -DUSE_COROUTINES=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j20 && ctest -j20) + - run: (mkdir build && cd build && cmake -DUSE_COROUTINES=1 -DWITH_GFLAGS=1 -DROCKSDB_BUILD_SHARED=0 .. && make V=1 -j8 && ctest -j8) - post-steps build-linux-gcc-10-cxx20-no_test_run: @@ -501,7 +518,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j32 all + - run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j8 all - post-steps build-linux-gcc-11-no_test_run: @@ -509,7 +526,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: LIB_MODE=static CC=gcc-11 CXX=g++-11 V=1 make -j32 all microbench # TODO: LIB_MODE only to work around unresolved linker failures + - run: LIB_MODE=static CC=gcc-11 CXX=g++-11 V=1 make -j8 all microbench # TODO: LIB_MODE only to work around unresolved linker failures - post-steps build-linux-clang-13-no_test_run: @@ -517,7 +534,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j32 all microbench + - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j8 all microbench - post-steps # Ensure ASAN+UBSAN with folly, and full testsuite with clang 13 @@ -528,7 +545,7 @@ jobs: - pre-steps - setup-folly - build-folly - - run: CC=clang-13 CXX=clang++-13 LIB_MODE=static USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check # TODO: LIB_MODE only to work around unresolved linker failures + - run: CC=clang-13 CXX=clang++-13 LIB_MODE=static USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j8 check # TODO: LIB_MODE only to work around unresolved linker failures - post-steps # This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing. @@ -537,7 +554,7 @@ jobs: resource_class: xlarge steps: - pre-steps - - run: DEBUG_LEVEL=0 make -j32 run_microbench + - run: DEBUG_LEVEL=0 make -j8 run_microbench - post-steps build-linux-mini-crashtest: @@ -555,7 +572,7 @@ jobs: - pre-steps - run: name: "run crashtest" - command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' blackbox_crash_test_with_tiered_storage + command: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' blackbox_crash_test_with_tiered_storage no_output_timeout: 100m - post-steps @@ -566,7 +583,7 @@ jobs: - pre-steps - run: name: "run crashtest" - command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' whitebox_crash_test_with_tiered_storage + command: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS='--duration=10800 --use_io_uring=0' whitebox_crash_test_with_tiered_storage no_output_timeout: 100m - post-steps @@ -641,26 +658,26 @@ jobs: command: make V=1 J=8 -j8 jtest - post-steps - build-linux-java-pmd: - machine: - image: ubuntu-2004:202111-02 - resource_class: large - environment: - JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 - steps: - - install-maven - - pre-steps - - run: - name: "Set Java Environment" - command: | - echo "JAVA_HOME=${JAVA_HOME}" - echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV - which java && java -version - which javac && javac -version - - run: - name: "PMD RocksDBJava" - command: make V=1 J=8 -j8 jpmd - - post-pmd-steps +# build-linux-java-pmd: +# machine: +# image: ubuntu-2004:202111-02 +# resource_class: large +# environment: +# JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64 +# steps: +# - install-maven +# - pre-steps +# - run: +# name: "Set Java Environment" +# command: | +# echo "JAVA_HOME=${JAVA_HOME}" +# echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV +# which java && java -version +# which javac && javac -version +# - run: +# name: "PMD RocksDBJava" +# command: make V=1 J=8 -j8 jpmd +# - post-pmd-steps build-linux-java-static: executor: linux-java-docker @@ -788,7 +805,7 @@ jobs: TEST_TMPDIR: /tmp/rocksdb_test_tmp steps: - pre-steps - - run: make V=1 -j32 check + - run: make V=1 -j8 check - post-steps build-linux-arm-test-full: @@ -925,7 +942,7 @@ workflows: - build-macos-java - build-macos-java-static - build-macos-java-static-universal - - build-linux-java-pmd +# - build-linux-java-pmd jobs-macos: jobs: - build-macos From ed46268ea99b85370e9a8098be16806860aa6201 Mon Sep 17 00:00:00 2001 From: Vershinin Maxim 00873208 Date: Thu, 11 Apr 2024 21:54:51 -0700 Subject: [PATCH 353/386] Fix error for CF smallest and largest keys computation in ImportColumnFamilyJob::Prepare (#12526) Summary: This PR fixes error for CF smallest and largest keys computation in ImportColumnFamilyJob::Prepare. Before this fix smallest and largest keys for CF were computed incorrectly, and ImportColumnFamilyJob::Prepare function might not have detect overlaps between CFs. I added test to detect this error. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12526 Reviewed By: hx235 Differential Revision: D56046044 Pulled By: ajkr fbshipit-source-id: d562fbfc9cc2d9624372d24d34a649198a960691 (cherry picked from commit 70d3fc3b6f0bebc3f45e34cc7c3f9fa8ab064fdb) --- db/import_column_family_job.cc | 4 +- db/import_column_family_test.cc | 65 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index f7b8a50ae..0e97c721d 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -66,11 +66,11 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number, largest = file_to_import.largest_internal_key; } else { if (cfd_->internal_comparator().Compare( - smallest, file_to_import.smallest_internal_key) < 0) { + smallest, file_to_import.smallest_internal_key) > 0) { smallest = file_to_import.smallest_internal_key; } if (cfd_->internal_comparator().Compare( - largest, file_to_import.largest_internal_key) > 0) { + largest, file_to_import.largest_internal_key) < 0) { largest = file_to_import.largest_internal_key; } } diff --git a/db/import_column_family_test.cc b/db/import_column_family_test.cc index 89586bcd1..e3a36073f 100644 --- a/db/import_column_family_test.cc +++ b/db/import_column_family_test.cc @@ -881,6 +881,71 @@ TEST_F(ImportColumnFamilyTest, ImportMultiColumnFamilyWithOverlap) { delete db_copy; ASSERT_OK(DestroyDir(env_, dbname_ + "/db_copy")); } + +TEST_F(ImportColumnFamilyTest, ImportMultiColumnFamilySeveralFilesWithOverlap) { + Options options = CurrentOptions(); + CreateAndReopenWithCF({"koko"}, options); + + SstFileWriter sfw_cf1(EnvOptions(), options, handles_[1]); + const std::string file1_sst_name = "file1.sst"; + const std::string file1_sst = sst_files_dir_ + file1_sst_name; + ASSERT_OK(sfw_cf1.Open(file1_sst)); + ASSERT_OK(sfw_cf1.Put("K1", "V1")); + ASSERT_OK(sfw_cf1.Put("K2", "V2")); + ASSERT_OK(sfw_cf1.Finish()); + + SstFileWriter sfw_cf2(EnvOptions(), options, handles_[1]); + const std::string file2_sst_name = "file2.sst"; + const std::string file2_sst = sst_files_dir_ + file2_sst_name; + ASSERT_OK(sfw_cf2.Open(file2_sst)); + ASSERT_OK(sfw_cf2.Put("K2", "V2")); + ASSERT_OK(sfw_cf2.Put("K3", "V3")); + ASSERT_OK(sfw_cf2.Finish()); + + ColumnFamilyHandle* second_cfh = nullptr; + ASSERT_OK(db_->CreateColumnFamily(options, "toto", &second_cfh)); + + SstFileWriter sfw_cf3(EnvOptions(), options, second_cfh); + const std::string file3_sst_name = "file3.sst"; + const std::string file3_sst = sst_files_dir_ + file3_sst_name; + ASSERT_OK(sfw_cf3.Open(file3_sst)); + ASSERT_OK(sfw_cf3.Put("K3", "V3")); + ASSERT_OK(sfw_cf3.Put("K4", "V4")); + ASSERT_OK(sfw_cf3.Finish()); + + SstFileWriter sfw_cf4(EnvOptions(), options, second_cfh); + const std::string file4_sst_name = "file4.sst"; + const std::string file4_sst = sst_files_dir_ + file4_sst_name; + ASSERT_OK(sfw_cf4.Open(file4_sst)); + ASSERT_OK(sfw_cf4.Put("K4", "V4")); + ASSERT_OK(sfw_cf4.Put("K5", "V5")); + ASSERT_OK(sfw_cf4.Finish()); + + ExportImportFilesMetaData metadata1, metadata2; + metadata1.files.push_back( + LiveFileMetaDataInit(file1_sst_name, sst_files_dir_, 1, 1, 2)); + metadata1.files.push_back( + LiveFileMetaDataInit(file2_sst_name, sst_files_dir_, 1, 3, 4)); + metadata1.db_comparator_name = options.comparator->Name(); + metadata2.files.push_back( + LiveFileMetaDataInit(file3_sst_name, sst_files_dir_, 1, 1, 2)); + metadata2.files.push_back( + LiveFileMetaDataInit(file4_sst_name, sst_files_dir_, 1, 3, 4)); + metadata2.db_comparator_name = options.comparator->Name(); + + std::vector metadatas{&metadata1, + &metadata2}; + + ASSERT_EQ(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo", + ImportColumnFamilyOptions(), + metadatas, &import_cfh_), + Status::InvalidArgument("CFs have overlapping ranges")); + ASSERT_EQ(import_cfh_, nullptr); + + ASSERT_OK(db_->DropColumnFamily(second_cfh)); + ASSERT_OK(db_->DestroyColumnFamilyHandle(second_cfh)); +} + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { From 051f508a0815dbd8deb75a15d5b29e015537fc9d Mon Sep 17 00:00:00 2001 From: Changyu Bi Date: Thu, 14 Dec 2023 02:37:27 +0800 Subject: [PATCH 354/386] Fix a corruption bug in `CreateColumnFamilyWithImport()` (#12602) Summary: when importing files from multiple CFs into a new CF, we were reusing the epoch numbers assigned by the original CFs. This means L0 files in the new CF can have the same epoch number (assigned originally by different CFs). While CreateColumnFamilyWithImport() requires each original CF to have disjoint key range, after an intra-l0 compaction, we still can end up with L0 files with the same epoch number but overlapping key range. This PR attempt to fix this by reassigning epoch numbers when importing multiple CFs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12602 Test Plan: a new repro unit test. Before this PR, it fails with ``` [ RUN ] ImportColumnFamilyTest.AssignEpochNumberToMultipleCF db/import_column_family_test.cc:1048: Failure db_->WaitForCompact(o) Corruption: force_consistency_checks(DEBUG): VersionBuilder: L0 files of same epoch number but overlapping range https://github.com/facebook/rocksdb/issues/44 , smallest key: '6B6579303030303030' seq:511, type:1 , largest key: '6B6579303031303239' seq:510, type:1 , epoch number: 3 vs. file https://github.com/facebook/rocksdb/issues/36 , smallest key: '6B6579303030313030' seq:401, type:1 , largest key: '6B6579303030313939' seq:500, type:1 , epoch number: 3 ``` Reviewed By: hx235 Differential Revision: D56851808 Pulled By: cbi42 fbshipit-source-id: 01b8c790c9f1f2a168047ead670e73633f705b84 (cherry picked from commit 6fdc4c52823d0b32bb18321b0d4b14ab70d09e92) --- db/import_column_family_job.cc | 90 ++++++++------- db/import_column_family_test.cc | 105 +++++++++++++++++- db/version_set.cc | 54 ++++----- db/version_set.h | 10 +- .../bug_fixes/fix-import-epoch.md | 1 + 5 files changed, 189 insertions(+), 71 deletions(-) create mode 100644 unreleased_history/bug_fixes/fix-import-epoch.md diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 0e97c721d..4ef12d8fa 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -175,22 +175,29 @@ Status ImportColumnFamilyJob::Run() { static_cast(temp_current_time); } - // Recover files' epoch number using dummy VersionStorageInfo - VersionBuilder dummy_version_builder( - cfd_->current()->version_set()->file_options(), cfd_->ioptions(), - cfd_->table_cache(), cfd_->current()->storage_info(), - cfd_->current()->version_set(), - cfd_->GetFileMetadataCacheReservationManager()); - VersionStorageInfo dummy_vstorage( - &cfd_->internal_comparator(), cfd_->user_comparator(), - cfd_->NumberLevels(), cfd_->ioptions()->compaction_style, - nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks, - EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock, - cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay, - cfd_->current()->version_set()->offpeak_time_option()); Status s; - + // When importing multiple CFs, we should not reuse epoch number from ingested + // files. Since these epoch numbers were assigned by different CFs, there may + // be different files from different CFs with the same epoch number. With a + // subsequent intra-L0 compaction we may end up with files with overlapping + // key range but the same epoch number. Here we will create a dummy + // VersionStorageInfo per CF being imported. Each CF's files will be assigned + // increasing epoch numbers to avoid duplicated epoch number. This is done by + // only resetting epoch number of the new CF in the first call to + // RecoverEpochNumbers() below. for (size_t i = 0; s.ok() && i < files_to_import_.size(); ++i) { + VersionBuilder dummy_version_builder( + cfd_->current()->version_set()->file_options(), cfd_->ioptions(), + cfd_->table_cache(), cfd_->current()->storage_info(), + cfd_->current()->version_set(), + cfd_->GetFileMetadataCacheReservationManager()); + VersionStorageInfo dummy_vstorage( + &cfd_->internal_comparator(), cfd_->user_comparator(), + cfd_->NumberLevels(), cfd_->ioptions()->compaction_style, + nullptr /* src_vstorage */, cfd_->ioptions()->force_consistency_checks, + EpochNumberRequirement::kMightMissing, cfd_->ioptions()->clock, + cfd_->GetLatestMutableCFOptions()->bottommost_file_compaction_delay, + cfd_->current()->version_set()->offpeak_time_option()); for (size_t j = 0; s.ok() && j < files_to_import_[i].size(); ++j) { const auto& f = files_to_import_[i][j]; const auto& file_metadata = *metadatas_[i][j]; @@ -218,42 +225,39 @@ Status ImportColumnFamilyJob::Run() { f.table_properties.user_defined_timestamps_persisted)); s = dummy_version_builder.Apply(&dummy_version_edit); } - } - - if (s.ok()) { - s = dummy_version_builder.SaveTo(&dummy_vstorage); - } - if (s.ok()) { - dummy_vstorage.RecoverEpochNumbers(cfd_); - } - - // Record changes from this CF import in VersionEdit, including files with - // recovered epoch numbers - if (s.ok()) { - edit_.SetColumnFamily(cfd_->GetID()); - + if (s.ok()) { + s = dummy_version_builder.SaveTo(&dummy_vstorage); + } + if (s.ok()) { + // force resetting epoch number for each file + dummy_vstorage.RecoverEpochNumbers(cfd_, /*restart_epoch=*/i == 0, + /*force=*/true); + edit_.SetColumnFamily(cfd_->GetID()); + + for (int level = 0; level < dummy_vstorage.num_levels(); level++) { + for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) { + edit_.AddFile(level, *file_meta); + // If incoming sequence number is higher, update local sequence + // number. + if (file_meta->fd.largest_seqno > versions_->LastSequence()) { + versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno); + versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno); + versions_->SetLastSequence(file_meta->fd.largest_seqno); + } + } + } + } + // Release resources occupied by the dummy VersionStorageInfo for (int level = 0; level < dummy_vstorage.num_levels(); level++) { for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) { - edit_.AddFile(level, *file_meta); - // If incoming sequence number is higher, update local sequence number. - if (file_meta->fd.largest_seqno > versions_->LastSequence()) { - versions_->SetLastAllocatedSequence(file_meta->fd.largest_seqno); - versions_->SetLastPublishedSequence(file_meta->fd.largest_seqno); - versions_->SetLastSequence(file_meta->fd.largest_seqno); + file_meta->refs--; + if (file_meta->refs <= 0) { + delete file_meta; } } } } - // Release resources occupied by the dummy VersionStorageInfo - for (int level = 0; level < dummy_vstorage.num_levels(); level++) { - for (FileMetaData* file_meta : dummy_vstorage.LevelFiles(level)) { - file_meta->refs--; - if (file_meta->refs <= 0) { - delete file_meta; - } - } - } return s; } diff --git a/db/import_column_family_test.cc b/db/import_column_family_test.cc index e3a36073f..15cae03ed 100644 --- a/db/import_column_family_test.cc +++ b/db/import_column_family_test.cc @@ -946,11 +946,112 @@ TEST_F(ImportColumnFamilyTest, ImportMultiColumnFamilySeveralFilesWithOverlap) { ASSERT_OK(db_->DestroyColumnFamilyHandle(second_cfh)); } +TEST_F(ImportColumnFamilyTest, AssignEpochNumberToMultipleCF) { + // Test ingesting CFs where L0 files could have the same epoch number. + Options options = CurrentOptions(); + options.level_compaction_dynamic_level_bytes = true; + options.max_background_jobs = 8; + env_->SetBackgroundThreads(2, Env::LOW); + env_->SetBackgroundThreads(0, Env::BOTTOM); + CreateAndReopenWithCF({"CF1", "CF2"}, options); + + // CF1: + // L6: [0, 99], [100, 199] + // CF2: + // L6: [1000, 1099], [1100, 1199] + for (int i = 100; i < 200; ++i) { + ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); + ASSERT_OK(Put(2, Key(1000 + i), Key(1000 + i) + "_val")); + } + ASSERT_OK(Flush(1)); + ASSERT_OK(Flush(2)); + for (int i = 0; i < 100; ++i) { + ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); + ASSERT_OK(Put(2, Key(1000 + i), Key(1000 + i) + "_val")); + } + ASSERT_OK(Flush(1)); + ASSERT_OK(Flush(2)); + MoveFilesToLevel(6, 1); + MoveFilesToLevel(6, 2); + + // CF1: + // level 0 epoch: 5 file num 30 smallest key000010 - key000019 + // level 0 epoch: 4 file num 27 smallest key000000 - key000009 + // level 0 epoch: 3 file num 23 smallest key000100 - key000199 + // level 6 epoch: 2 file num 20 smallest key000000 - key000099 + // level 6 epoch: 1 file num 17 smallest key000100 - key000199 + // CF2: + // level 0 epoch: 5 file num 31 smallest key001010 - key001019 + // level 0 epoch: 4 file num 28 smallest key001000 - key001009 + // level 0 epoch: 3 file num 25 smallest key001020 - key001029 + // level 6 epoch: 2 file num 21 smallest key001000 - key001099 + // level 6 epoch: 1 file num 18 smallest key001100 - key001199 + for (int i = 100; i < 200; ++i) { + ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); + } + ASSERT_OK(Flush(1)); + for (int i = 20; i < 30; ++i) { + ASSERT_OK(Put(2, Key(i + 1000), Key(i + 1000) + "_val")); + } + ASSERT_OK(Flush(2)); + + for (int i = 0; i < 20; ++i) { + ASSERT_OK(Put(1, Key(i), Key(i) + "_val")); + ASSERT_OK(Put(2, Key(i + 1000), Key(i + 1000) + "_val")); + if (i % 10 == 9) { + ASSERT_OK(Flush(1)); + ASSERT_OK(Flush(2)); + } + } + ASSERT_OK(Flush(1)); + ASSERT_OK(Flush(2)); + + // Create a CF by importing these two CF1 and CF2. + // Then two compactions will be triggerred, one to compact from L0 + // to L6 (files #23 and #17), and another to do intra-L0 compaction + // for the rest of the L0 files. Before a bug fix, we used to + // directly use the epoch numbers from the ingested files in the new CF. + // This means different files from different CFs can have the same epoch + // number. If the intra-L0 compaction finishes first, it can cause a + // corruption where two L0 files can have the same epoch number but + // with overlapping key range. + Checkpoint* checkpoint1; + ASSERT_OK(Checkpoint::Create(db_, &checkpoint1)); + ASSERT_OK(checkpoint1->ExportColumnFamily(handles_[1], export_files_dir_, + &metadata_ptr_)); + ASSERT_OK(checkpoint1->ExportColumnFamily(handles_[2], export_files_dir2_, + &metadata_ptr2_)); + ASSERT_NE(metadata_ptr_, nullptr); + ASSERT_NE(metadata_ptr2_, nullptr); + + std::atomic_int compaction_counter = 0; + SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:NonTrivial:BeforeRun", + [&compaction_counter](void*) { + compaction_counter++; + if (compaction_counter == 1) { + // Wait for the next compaction to finish + TEST_SYNC_POINT("WaitForSecondCompaction"); + } + }); + SyncPoint::GetInstance()->LoadDependency( + {{"DBImpl::BackgroundCompaction:AfterCompaction", + "WaitForSecondCompaction"}}); + SyncPoint::GetInstance()->EnableProcessing(); + ImportColumnFamilyOptions import_options; + import_options.move_files = false; + std::vector metadatas = {metadata_ptr_, + metadata_ptr2_}; + ASSERT_OK(db_->CreateColumnFamilyWithImport(options, "CF3", import_options, + metadatas, &import_cfh_)); + WaitForCompactOptions o; + ASSERT_OK(db_->WaitForCompact(o)); + delete checkpoint1; +} } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} - +} \ No newline at end of file diff --git a/db/version_set.cc b/db/version_set.cc index 335d492bc..181c5e356 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4582,25 +4582,27 @@ uint64_t VersionStorageInfo::GetMaxEpochNumberOfFiles() const { return max_epoch_number; } -void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd) { - cfd->ResetNextEpochNumber(); - - bool reserve_epoch_num_for_file_ingested_behind = - cfd->ioptions()->allow_ingest_behind; - if (reserve_epoch_num_for_file_ingested_behind) { - uint64_t reserved_epoch_number = cfd->NewEpochNumber(); - assert(reserved_epoch_number == kReservedEpochNumberForFileIngestedBehind); - ROCKS_LOG_INFO(cfd->ioptions()->info_log.get(), - "[%s]CF has reserved epoch number %" PRIu64 - " for files ingested " - "behind since `Options::allow_ingest_behind` is true", - cfd->GetName().c_str(), reserved_epoch_number); - } - - if (HasMissingEpochNumber()) { - assert(epoch_number_requirement_ == EpochNumberRequirement::kMightMissing); - assert(num_levels_ >= 1); - +void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd, + bool restart_epoch, bool force) { + if (restart_epoch) { + cfd->ResetNextEpochNumber(); + + bool reserve_epoch_num_for_file_ingested_behind = + cfd->ioptions()->allow_ingest_behind; + if (reserve_epoch_num_for_file_ingested_behind) { + uint64_t reserved_epoch_number = cfd->NewEpochNumber(); + assert(reserved_epoch_number == + kReservedEpochNumberForFileIngestedBehind); + ROCKS_LOG_INFO(cfd->ioptions()->info_log.get(), + "[%s]CF has reserved epoch number %" PRIu64 + " for files ingested " + "behind since `Options::allow_ingest_behind` is true", + cfd->GetName().c_str(), reserved_epoch_number); + } + } + + bool missing_epoch_number = HasMissingEpochNumber(); + if (missing_epoch_number || force) { for (int level = num_levels_ - 1; level >= 1; --level) { auto& files_at_level = files_[level]; if (files_at_level.empty()) { @@ -4611,17 +4613,19 @@ void VersionStorageInfo::RecoverEpochNumbers(ColumnFamilyData* cfd) { f->epoch_number = next_epoch_number; } } - for (auto file_meta_iter = files_[0].rbegin(); file_meta_iter != files_[0].rend(); file_meta_iter++) { FileMetaData* f = *file_meta_iter; f->epoch_number = cfd->NewEpochNumber(); } - - ROCKS_LOG_WARN(cfd->ioptions()->info_log.get(), - "[%s]CF's epoch numbers are inferred based on seqno", - cfd->GetName().c_str()); - epoch_number_requirement_ = EpochNumberRequirement::kMustPresent; + if (missing_epoch_number) { + assert(epoch_number_requirement_ == + EpochNumberRequirement::kMightMissing); + ROCKS_LOG_WARN(cfd->ioptions()->info_log.get(), + "[%s]CF's epoch numbers are inferred based on seqno", + cfd->GetName().c_str()); + epoch_number_requirement_ = EpochNumberRequirement::kMustPresent; + } } else { assert(epoch_number_requirement_ == EpochNumberRequirement::kMustPresent); cfd->SetNextEpochNumber( diff --git a/db/version_set.h b/db/version_set.h index 5ccb69771..1ef256a7b 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -341,7 +341,15 @@ class VersionStorageInfo { EpochNumberRequirement epoch_number_requirement) { epoch_number_requirement_ = epoch_number_requirement; } - void RecoverEpochNumbers(ColumnFamilyData* cfd); + // Ensure all files have epoch number set. + // If there is a file missing epoch number, all files' epoch number will be + // reset according to CF's epoch number. Otherwise, the CF will be updated + // with the max epoch number of the files. + // + // @param restart_epoch This CF's epoch number will be reset to start from 0. + // @param force Force resetting all files' epoch number. + void RecoverEpochNumbers(ColumnFamilyData* cfd, bool restart_epoch = true, + bool force = false); class FileLocation { public: diff --git a/unreleased_history/bug_fixes/fix-import-epoch.md b/unreleased_history/bug_fixes/fix-import-epoch.md new file mode 100644 index 000000000..97df6276f --- /dev/null +++ b/unreleased_history/bug_fixes/fix-import-epoch.md @@ -0,0 +1 @@ +* Fix a bug in CreateColumnFamilyWithImport() where if multiple CFs are imported, we were not resetting files' epoch number and L0 files can have overlapping key range but the same epoch number. \ No newline at end of file From d313386cadebf60ae1252dee98e94e1cd5a39999 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Wed, 13 Dec 2023 10:37:27 -0800 Subject: [PATCH 355/386] Speedup based on pending compaction bytes relative to data size (#12130) Summary: RocksDB self throttles per-DB compaction parallelism until it detects compaction pressure. The pressure detection based on pending compaction bytes was only comparing against the slowdown trigger (`soft_pending_compaction_bytes_limit`). Online services tend to set that extremely high to avoid stalling at all costs. Perhaps they should have set it to zero, but we never documented that zero disables stalling so I have been telling everyone to increase it for years. This PR adds pressure detection based on pending compaction bytes relative to the size of bottommost data. The size of bottommost data should be fairly stable and proportional to the logical data size Pull Request resolved: https://github.com/facebook/rocksdb/pull/12130 Reviewed By: hx235 Differential Revision: D52000746 Pulled By: ajkr fbshipit-source-id: 7e1fd170901a74c2d4a69266285e3edf6e7631c7 (cherry picked from commit d8e47620d7e0aa6ecc939eb55e2f2518a7f030b8) --- unreleased_history/behavior_changes/debt_based_speedup.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 unreleased_history/behavior_changes/debt_based_speedup.md diff --git a/unreleased_history/behavior_changes/debt_based_speedup.md b/unreleased_history/behavior_changes/debt_based_speedup.md new file mode 100644 index 000000000..8db8b6688 --- /dev/null +++ b/unreleased_history/behavior_changes/debt_based_speedup.md @@ -0,0 +1 @@ +Compactions can be scheduled in parallel in an additional scenario: high compaction debt relative to the data size From a826affcaa0dfe7120f1fc41d3ef7ae1052655c7 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 15 Jan 2024 22:41:18 -0800 Subject: [PATCH 356/386] Detect compaction pressure at lower debt ratios (#12236) Summary: This PR significantly reduces the compaction pressure threshold introduced in https://github.com/facebook/rocksdb/issues/12130 by a factor of 64x. The original number was too high to trigger in scenarios where compaction parallelism was needed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12236 Reviewed By: cbi42 Differential Revision: D52765685 Pulled By: ajkr fbshipit-source-id: 8298e966933b485de24f63165a00e672cb9db6c4 (cherry picked from commit 2dda7a0dd2f2866b85bbbe48a57406b79d7ceb4c) --- db/column_family.cc | 5 +- db/db_compaction_test.cc | 54 ++++++++++++------- db/db_test2.cc | 11 ++++ .../more_debt_based_speedup.md | 1 + 4 files changed, 48 insertions(+), 23 deletions(-) create mode 100644 unreleased_history/behavior_changes/more_debt_based_speedup.md diff --git a/db/column_family.cc b/db/column_family.cc index 9782cd31a..e0a9c6a27 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -870,7 +870,7 @@ uint64_t GetPendingCompactionBytesForCompactionSpeedup( const VersionStorageInfo* vstorage) { // Compaction debt relatively large compared to the stable (bottommost) data // size indicates compaction fell behind. - const uint64_t kBottommostSizeMultiplier = 8; + const uint64_t kBottommostSizeDivisor = 8; // Meaningful progress toward the slowdown trigger is another good indication. const uint64_t kSlowdownTriggerDivisor = 4; @@ -890,8 +890,7 @@ uint64_t GetPendingCompactionBytesForCompactionSpeedup( return slowdown_threshold; } - uint64_t size_threshold = - MultiplyCheckOverflow(bottommost_files_size, kBottommostSizeMultiplier); + uint64_t size_threshold = bottommost_files_size / kBottommostSizeDivisor; return std::min(size_threshold, slowdown_threshold); } } // anonymous namespace diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 2d7123117..be88e198f 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -76,7 +76,7 @@ class CompactionStatsCollector : public EventListener { class DBCompactionTest : public DBTestBase { public: DBCompactionTest() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {} + : DBTestBase("db_compaction_test", /*env_do_fsync=*/false) {} protected: /* @@ -121,7 +121,7 @@ class DBCompactionTestWithParam public testing::WithParamInterface> { public: DBCompactionTestWithParam() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { + : DBTestBase("db_compaction_test", /*env_do_fsync=*/false) { max_subcompactions_ = std::get<0>(GetParam()); exclusive_manual_compaction_ = std::get<1>(GetParam()); } @@ -140,7 +140,7 @@ class DBCompactionTestWithBottommostParam std::tuple> { public: DBCompactionTestWithBottommostParam() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { + : DBTestBase("db_compaction_test", /*env_do_fsync=*/false) { bottommost_level_compaction_ = std::get<0>(GetParam()); } @@ -160,7 +160,7 @@ class DBCompactionWaitForCompactTest std::tuple> { public: DBCompactionWaitForCompactTest() - : DBTestBase("db_compaction_test", /*env_do_fsync=*/true) { + : DBTestBase("db_compaction_test", /*env_do_fsync=*/false) { abort_on_pause_ = std::get<0>(GetParam()); flush_ = std::get<1>(GetParam()); close_db_ = std::get<2>(GetParam()); @@ -845,6 +845,20 @@ TEST_F(DBCompactionTest, BGCompactionsAllowed) { options.memtable_factory.reset( test::NewSpecialSkipListFactory(kNumKeysPerFile)); + CreateAndReopenWithCF({"one", "two", "three"}, options); + + Random rnd(301); + for (int cf = 0; cf < 4; cf++) { + // Make a trivial L1 for L0 to compact into. L2 will be large so debt ratio + // will not cause compaction pressure. + ASSERT_OK(Put(cf, Key(0), rnd.RandomString(102400))); + ASSERT_OK(Flush(cf)); + MoveFilesToLevel(2, cf); + ASSERT_OK(Put(cf, Key(0), "")); + ASSERT_OK(Flush(cf)); + MoveFilesToLevel(1, cf); + } + // Block all threads in thread pool. const size_t kTotalTasks = 4; env_->SetBackgroundThreads(4, Env::LOW); @@ -855,9 +869,6 @@ TEST_F(DBCompactionTest, BGCompactionsAllowed) { sleeping_tasks[i].WaitUntilSleeping(); } - CreateAndReopenWithCF({"one", "two", "three"}, options); - - Random rnd(301); for (int cf = 0; cf < 4; cf++) { for (int num = 0; num < options.level0_file_num_compaction_trigger; num++) { for (int i = 0; i < kNumKeysPerFile; i++) { @@ -6150,7 +6161,7 @@ class CompactionPriTest : public DBTestBase, public testing::WithParamInterface { public: CompactionPriTest() - : DBTestBase("compaction_pri_test", /*env_do_fsync=*/true) { + : DBTestBase("compaction_pri_test", /*env_do_fsync=*/false) { compaction_pri_ = GetParam(); } @@ -6270,26 +6281,30 @@ TEST_F(DBCompactionTest, PersistRoundRobinCompactCursor) { TEST_P(RoundRobinSubcompactionsAgainstPressureToken, PressureTokenTest) { const int kKeysPerBuffer = 100; + const int kNumSubcompactions = 2; + const int kFilesPerLevel = 50; Options options = CurrentOptions(); - options.num_levels = 4; + options.num_levels = 3; options.max_bytes_for_level_multiplier = 2; options.level0_file_num_compaction_trigger = 4; options.target_file_size_base = kKeysPerBuffer * 1024; options.compaction_pri = CompactionPri::kRoundRobin; - options.max_bytes_for_level_base = 8 * kKeysPerBuffer * 1024; + // Target size is chosen so that filling the level with `kFilesPerLevel` files + // will make it oversized by `kNumSubcompactions` files. + options.max_bytes_for_level_base = + (kFilesPerLevel - kNumSubcompactions) * kKeysPerBuffer * 1024; options.disable_auto_compactions = true; - // Setup 7 threads but limited subcompactions so that - // RoundRobin requires extra compactions from reserved threads + // Setup `kNumSubcompactions` threads but limited subcompactions so + // that RoundRobin requires extra compactions from reserved threads options.max_subcompactions = 1; - options.max_background_compactions = 7; + options.max_background_compactions = kNumSubcompactions; options.max_compaction_bytes = 100000000; DestroyAndReopen(options); - env_->SetBackgroundThreads(7, Env::LOW); + env_->SetBackgroundThreads(kNumSubcompactions, Env::LOW); Random rnd(301); - const std::vector files_per_level = {0, 15, 25}; for (int lvl = 2; lvl > 0; lvl--) { - for (int i = 0; i < files_per_level[lvl]; i++) { + for (int i = 0; i < kFilesPerLevel; i++) { for (int j = 0; j < kKeysPerBuffer; j++) { // Add (lvl-1) to ensure nearly equivallent number of files // in L2 are overlapped with fils selected to compact from @@ -6300,9 +6315,8 @@ TEST_P(RoundRobinSubcompactionsAgainstPressureToken, PressureTokenTest) { ASSERT_OK(Flush()); } MoveFilesToLevel(lvl); - ASSERT_EQ(files_per_level[lvl], NumTableFilesAtLevel(lvl, 0)); + ASSERT_EQ(kFilesPerLevel, NumTableFilesAtLevel(lvl, 0)); } - // 15 files in L1; 25 files in L2 // This is a variable for making sure the following callback is called // and the assertions in it are indeed excuted. @@ -6311,10 +6325,10 @@ TEST_P(RoundRobinSubcompactionsAgainstPressureToken, PressureTokenTest) { "CompactionJob::GenSubcompactionBoundaries:0", [&](void* arg) { uint64_t num_planned_subcompactions = *(static_cast(arg)); if (grab_pressure_token_) { - // 7 files are selected for round-robin under auto + // `kNumSubcompactions` files are selected for round-robin under auto // compaction. The number of planned subcompaction is restricted by // the limited number of max_background_compactions - ASSERT_EQ(num_planned_subcompactions, 7); + ASSERT_EQ(num_planned_subcompactions, kNumSubcompactions); } else { ASSERT_EQ(num_planned_subcompactions, 1); } diff --git a/db/db_test2.cc b/db/db_test2.cc index e471685b2..db879268c 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -3861,6 +3861,17 @@ TEST_F(DBTest2, LowPriWrite) { int64_t* rate_bytes_per_sec = static_cast(arg); ASSERT_EQ(1024 * 1024, *rate_bytes_per_sec); }); + + // Make a trivial L5 for L0 to compact into. L6 will be large so debt ratio + // will not cause compaction pressure. + Random rnd(301); + ASSERT_OK(Put("", rnd.RandomString(102400))); + ASSERT_OK(Flush()); + MoveFilesToLevel(6); + ASSERT_OK(Put("", "")); + ASSERT_OK(Flush()); + MoveFilesToLevel(5); + // Block compaction ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({ {"DBTest.LowPriWrite:0", "DBImpl::BGWorkCompaction"}, diff --git a/unreleased_history/behavior_changes/more_debt_based_speedup.md b/unreleased_history/behavior_changes/more_debt_based_speedup.md new file mode 100644 index 000000000..c10f0d7b3 --- /dev/null +++ b/unreleased_history/behavior_changes/more_debt_based_speedup.md @@ -0,0 +1 @@ +Reduced the compaction debt ratio trigger for scheduling parallel compactions From 69ba29adfa57bfa25a927a821d021b588fdcbb85 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 29 Jan 2024 17:29:04 -0800 Subject: [PATCH 357/386] Speedup based on number of files marked for compaction (#12306) Summary: RocksDB self throttles per-DB compaction parallelism until it detects compaction pressure. This PR adds pressure detection based on the number of files marked for compaction. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12306 Reviewed By: cbi42 Differential Revision: D53200559 Pulled By: ajkr fbshipit-source-id: 63402ee336881a4539204d255960f04338ab7a0e (cherry picked from commit aacf60dda2a138f9d3826c25818a3bcf250859fd) --- db/column_family.cc | 18 +++++ db/column_family_test.cc | 69 +++++++++++++++++++ .../behavior_changes/marking_based_speedup.md | 1 + 3 files changed, 88 insertions(+) create mode 100644 unreleased_history/behavior_changes/marking_based_speedup.md diff --git a/db/column_family.cc b/db/column_family.cc index e0a9c6a27..2be522341 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -893,6 +893,14 @@ uint64_t GetPendingCompactionBytesForCompactionSpeedup( uint64_t size_threshold = bottommost_files_size / kBottommostSizeDivisor; return std::min(size_threshold, slowdown_threshold); } + +uint64_t GetMarkedFileCountForCompactionSpeedup() { + // When just one file is marked, it is not clear that parallel compaction will + // help the compaction that the user nicely requested to happen sooner. When + // multiple files are marked, however, it is pretty clearly helpful, except + // for the rare case in which a single compaction grabs all the marked files. + return 2; +} } // anonymous namespace std::pair @@ -1074,6 +1082,16 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions( "compaction " "bytes %" PRIu64, name_.c_str(), vstorage->estimated_compaction_needed_bytes()); + } else if (uint64_t(vstorage->FilesMarkedForCompaction().size()) >= + GetMarkedFileCountForCompactionSpeedup()) { + write_controller_token_ = + write_controller->GetCompactionPressureToken(); + ROCKS_LOG_INFO( + ioptions_.logger, + "[%s] Increasing compaction threads because we have %" PRIu64 + " files marked for compaction", + name_.c_str(), + uint64_t(vstorage->FilesMarkedForCompaction().size())); } else { write_controller_token_.reset(); } diff --git a/db/column_family_test.cc b/db/column_family_test.cc index edadfebf1..cf7327287 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -3008,6 +3008,75 @@ TEST_P(ColumnFamilyTest, CompactionSpeedupForCompactionDebt) { } } +TEST_P(ColumnFamilyTest, CompactionSpeedupForMarkedFiles) { + const int kParallelismLimit = 3; + class AlwaysCompactTpc : public TablePropertiesCollector { + public: + Status Finish(UserCollectedProperties* /* properties */) override { + return Status::OK(); + } + + UserCollectedProperties GetReadableProperties() const override { + return UserCollectedProperties{}; + } + + const char* Name() const override { return "AlwaysCompactTpc"; } + + bool NeedCompact() const override { return true; } + }; + + class AlwaysCompactTpcf : public TablePropertiesCollectorFactory { + public: + TablePropertiesCollector* CreateTablePropertiesCollector( + TablePropertiesCollectorFactory::Context /* context */) override { + return new AlwaysCompactTpc(); + } + + const char* Name() const override { return "AlwaysCompactTpcf"; } + }; + + column_family_options_.num_levels = 2; + column_family_options_.table_properties_collector_factories.emplace_back( + std::make_shared()); + db_options_.max_background_compactions = kParallelismLimit; + Open(); + + // Make a nonempty last level. Only marked files in upper levels count. + ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Flush(FlushOptions())); + WaitForCompaction(); + AssertFilesPerLevel("0,1", 0 /* cf */); + + // Block the compaction thread pool so marked files accumulate in L0. + test::SleepingBackgroundTask sleeping_tasks[kParallelismLimit]; + for (int i = 0; i < kParallelismLimit; i++) { + env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, + &sleeping_tasks[i], Env::Priority::LOW); + sleeping_tasks[i].WaitUntilSleeping(); + } + + // Zero marked upper-level files. No speedup. + ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); + AssertFilesPerLevel("0,1", 0 /* cf */); + + // One marked upper-level file. No speedup. + ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_EQ(1, dbfull()->TEST_BGCompactionsAllowed()); + AssertFilesPerLevel("1,1", 0 /* cf */); + + // Two marked upper-level files. Speedup. + ASSERT_OK(db_->Put(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_EQ(kParallelismLimit, dbfull()->TEST_BGCompactionsAllowed()); + AssertFilesPerLevel("2,1", 0 /* cf */); + + for (int i = 0; i < kParallelismLimit; i++) { + sleeping_tasks[i].WakeUp(); + sleeping_tasks[i].WaitUntilDone(); + } +} + TEST_P(ColumnFamilyTest, CreateAndDestroyOptions) { std::unique_ptr cfo(new ColumnFamilyOptions()); ColumnFamilyHandle* cfh; diff --git a/unreleased_history/behavior_changes/marking_based_speedup.md b/unreleased_history/behavior_changes/marking_based_speedup.md new file mode 100644 index 000000000..cf01b8f58 --- /dev/null +++ b/unreleased_history/behavior_changes/marking_based_speedup.md @@ -0,0 +1 @@ +Compactions can be scheduled in parallel in an additional scenario: multiple files are marked for compaction within a single column family From ee8b7d126088934206b5df89e150cb10e330466d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=A9=AC=E8=B6=8A?= Date: Fri, 5 Jul 2024 10:40:53 +0800 Subject: [PATCH 358/386] Fix folly build (#12795) Summary: - Updated pinned folly version to the latest - gcc/g++ 10 is required since https://github.com/facebook/folly/commit/2c1c617e9e so we had to modify the tests using gcc/g++ 7 - libsodium 1.0.17 is no longer downloadable from GitHub so I found it elsewhere. I will submit a PR for that upstream to folly - USE_FOLLY_LITE changes - added boost header dependency instead of commenting out the `#include`s since that approach stopped working - added "folly/lang/Exception.cpp" to the compilation Pull Request resolved: https://github.com/facebook/rocksdb/pull/12795 Reviewed By: hx235 Differential Revision: D58916693 Pulled By: ajkr fbshipit-source-id: b5f9bca2d929825846ac898b785972b071db62b1 (cherry picked from commit 40944cbbdbdcfac694fc3b291ba1838e943a789b) --- .circleci/config.yml | 20 +++++++++++++------- CMakeLists.txt | 9 ++++++++- Makefile | 23 +++++++++++++++++------ build_tools/build_detect_platform | 8 +++++++- src.mk | 1 + 5 files changed, 46 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 84b39239a..fbeb41143 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -473,23 +473,29 @@ jobs: - run: make V=1 -j8 -k check-headers # could be moved to a different build - post-steps - build-linux-gcc-7-with-folly: + build-linux-make-with-folly: executor: linux-docker resource_class: xlarge + environment: + CC: gcc-10 + CXX: g++-10 steps: - pre-steps - setup-folly - build-folly - - run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j8 check # TODO: LIB_MODE only to work around unresolved linker failures + - run: USE_FOLLY=1 LIB_MODE=static V=1 make -j8 check # TODO: LIB_MODE only to work around unresolved linker failures - post-steps - build-linux-gcc-7-with-folly-lite-no-test: + build-linux-make-with-folly-lite-no-test: executor: linux-docker resource_class: xlarge + environment: + CC: gcc-10 + CXX: g++-10 steps: - pre-steps - setup-folly - - run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j8 all + - run: USE_FOLLY_LITE=1 V=1 make -j8 all - post-steps build-linux-gcc-8-no_test_run: @@ -903,8 +909,8 @@ workflows: - build-linux - build-linux-cmake-with-folly - build-linux-cmake-with-folly-lite-no-test - - build-linux-gcc-7-with-folly - - build-linux-gcc-7-with-folly-lite-no-test + - build-linux-make-with-folly + - build-linux-make-with-folly-lite-no-test - build-linux-cmake-with-folly-coroutines - build-linux-cmake-with-benchmark - build-linux-encrypted_env-no_compression @@ -979,7 +985,7 @@ workflows: - build-linux-arm-test-full - build-linux-run-microbench - build-linux-non-shm - - build-linux-clang-13-asan-ubsan-with-folly +# - build-linux-clang-13-asan-ubsan-with-folly - build-linux-valgrind - build-windows-vs2022-avx2 - build-windows-vs2022 diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c11b4fe4..5fcd9b7cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -596,7 +596,7 @@ if(USE_FOLLY) FMT_INST_PATH) exec_program(ls ARGS -d ${FOLLY_INST_PATH}/../gflags* OUTPUT_VARIABLE GFLAGS_INST_PATH) - set(Boost_DIR ${BOOST_INST_PATH}/lib/cmake/Boost-1.78.0) + set(Boost_DIR ${BOOST_INST_PATH}/lib/cmake/Boost-1.83.0) if(EXISTS ${FMT_INST_PATH}/lib64) set(fmt_DIR ${FMT_INST_PATH}/lib64/cmake/fmt) else() @@ -1026,6 +1026,7 @@ if(USE_FOLLY_LITE) list(APPEND SOURCES third-party/folly/folly/container/detail/F14Table.cpp third-party/folly/folly/detail/Futex.cpp + third-party/folly/folly/lang/Exception.cpp third-party/folly/folly/lang/SafeAssert.cpp third-party/folly/folly/lang/ToAscii.cpp third-party/folly/folly/ScopeGuard.cpp @@ -1033,6 +1034,12 @@ if(USE_FOLLY_LITE) third-party/folly/folly/synchronization/DistributedMutex.cpp third-party/folly/folly/synchronization/ParkingLot.cpp) include_directories(${PROJECT_SOURCE_DIR}/third-party/folly) + exec_program(python3 ${PROJECT_SOURCE_DIR}/third-party/folly ARGS + build/fbcode_builder/getdeps.py show-source-dir boost OUTPUT_VARIABLE + BOOST_SOURCE_PATH) + exec_program(ls ARGS -d ${BOOST_SOURCE_PATH}/boost* OUTPUT_VARIABLE + BOOST_INCLUDE_DIR) + include_directories(${BOOST_INCLUDE_DIR}) add_definitions(-DUSE_FOLLY -DFOLLY_NO_CONFIG) list(APPEND THIRDPARTY_LIBS glog) endif() diff --git a/Makefile b/Makefile index 332db623b..fc726dcd7 100644 --- a/Makefile +++ b/Makefile @@ -105,6 +105,7 @@ dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \ export LIB_MODE="$(LIB_MODE)"; \ export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \ export USE_FOLLY="$(USE_FOLLY)"; \ + export USE_FOLLY_LITE="$(USE_FOLLY_LITE)"; \ "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk")) # this file is generated by the previous line to set build flags and sources include make_config.mk @@ -502,6 +503,17 @@ endif ifeq ($(USE_FOLLY_LITE),1) # Path to the Folly source code and include files FOLLY_DIR = ./third-party/folly +ifneq ($(strip $(BOOST_SOURCE_PATH)),) + BOOST_INCLUDE = $(shell (ls -d $(BOOST_SOURCE_PATH)/boost*/)) + # AIX: pre-defined system headers are surrounded by an extern "C" block + ifeq ($(PLATFORM), OS_AIX) + PLATFORM_CCFLAGS += -I$(BOOST_INCLUDE) + PLATFORM_CXXFLAGS += -I$(BOOST_INCLUDE) + else + PLATFORM_CCFLAGS += -isystem $(BOOST_INCLUDE) + PLATFORM_CXXFLAGS += -isystem $(BOOST_INCLUDE) + endif +endif # BOOST_SOURCE_PATH # AIX: pre-defined system headers are surrounded by an extern "C" block ifeq ($(PLATFORM), OS_AIX) PLATFORM_CCFLAGS += -I$(FOLLY_DIR) @@ -2482,14 +2494,15 @@ checkout_folly: fi @# Pin to a particular version for public CI, so that PR authors don't @# need to worry about folly breaking our integration. Update periodically - cd third-party/folly && git reset --hard beacd86d63cd71c904632262e6c36f60874d78ba - @# A hack to remove boost dependency. - @# NOTE: this hack is only needed if building using USE_FOLLY_LITE - perl -pi -e 's/^(#include .)/__cpp_rtti && $$1/' third-party/folly/folly/memory/MemoryResource.h + @# NOTE: this hack is only needed if building target build_folly in the future + perl -pi -e 's,https://github.com/jedisct1/libsodium/releases/download/1.0.17/libsodium-1.0.17.tar.gz,https://download.libsodium.org/libsodium/releases/old/unsupported/libsodium-1.0.17.tar.gz,' third-party/folly/build/fbcode_builder/manifests/libsodium + @# NOTE: boost source will be needed for any build including `USE_FOLLY_LITE` builds as those depend on boost headers + cd third-party/folly && $(PYTHON) build/fbcode_builder/getdeps.py fetch boost CXX_M_FLAGS = $(filter -m%, $(CXXFLAGS)) @@ -2501,8 +2514,6 @@ build_folly: echo "Please run checkout_folly first"; \ false; \ fi - # Restore the original version of Invoke.h with boost dependency - cd third-party/folly && ${GIT_COMMAND} checkout folly/functional/Invoke.h cd third-party/folly && \ CXXFLAGS=" $(CXX_M_FLAGS) -DHAVE_CXX11_ATOMIC " $(PYTHON) build/fbcode_builder/getdeps.py build --no-tests diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index a5e2b5aa2..87932ba7a 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -603,7 +603,7 @@ EOF PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark" fi fi - if test $USE_FOLLY; then + if test $USE_FOLLY || test $USE_FOLLY_LITE; then # Test whether libfolly library is installed $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < @@ -751,6 +751,11 @@ if [ "$USE_FOLLY" ]; then FOLLY_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-inst-dir folly` fi fi +if [ "$USE_FOLLY_LITE" ]; then + if [ "$FOLLY_DIR" ]; then + BOOST_SOURCE_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-source-dir boost` + fi +fi PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS" @@ -792,6 +797,7 @@ echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT" echo "FIND=$FIND" >> "$OUTPUT" echo "WATCH=$WATCH" >> "$OUTPUT" echo "FOLLY_PATH=$FOLLY_PATH" >> "$OUTPUT" +echo "BOOST_SOURCE_PATH=$BOOST_SOURCE_PATH" >> "$OUTPUT" # This will enable some related identifiers for the preprocessor if test -n "$JEMALLOC"; then diff --git a/src.mk b/src.mk index 2e5253176..dc3289b00 100644 --- a/src.mk +++ b/src.mk @@ -401,6 +401,7 @@ TEST_LIB_SOURCES = \ FOLLY_SOURCES = \ $(FOLLY_DIR)/folly/container/detail/F14Table.cpp \ $(FOLLY_DIR)/folly/detail/Futex.cpp \ + $(FOLLY_DIR)/folly/lang/Exception.cpp \ $(FOLLY_DIR)/folly/lang/SafeAssert.cpp \ $(FOLLY_DIR)/folly/lang/ToAscii.cpp \ $(FOLLY_DIR)/folly/ScopeGuard.cpp \ From 9a10201138756a01b0954342bfb18a05723ebf72 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 24 Jun 2024 10:46:29 -0700 Subject: [PATCH 359/386] Update pinned folly version (#12801) Summary: https://github.com/facebook/folly/commit/843fd576793d91c4c55fa3495b1694e5d708c54b fixed the URL for libsodium. Updated folly version to latest, which includes that commit. I am not sure the URL will be stable, but it still seems better than substituting the URL. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12801 Reviewed By: cbi42 Differential Revision: D58921033 Pulled By: ajkr fbshipit-source-id: 442ea3ff83ced2679ea9bfd04945e9449ce2ff96 (cherry picked from commit 13549817afd97ce29705c42e87fa945056fd2d11) --- .circleci/config.yml | 6 ++++++ Makefile | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fbeb41143..1c1feced2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -433,6 +433,9 @@ jobs: build-linux-cmake-with-folly: executor: linux-docker resource_class: xlarge + environment: + CC: gcc-10 + CXX: g++-10 steps: - pre-steps - setup-folly @@ -443,6 +446,9 @@ jobs: build-linux-cmake-with-folly-lite-no-test: executor: linux-docker resource_class: xlarge + environment: + CC: gcc-10 + CXX: g++-10 steps: - pre-steps - setup-folly diff --git a/Makefile b/Makefile index fc726dcd7..cafd48529 100644 --- a/Makefile +++ b/Makefile @@ -2494,13 +2494,11 @@ checkout_folly: fi @# Pin to a particular version for public CI, so that PR authors don't @# need to worry about folly breaking our integration. Update periodically - cd third-party/folly && git reset --hard 17054c85c2dee0e84cbee7ef28d7bae7d0b1630e + cd third-party/folly && git reset --hard c48fdd205c1c291651749d532b8055fe822bba25 @# NOTE: this hack is required for clang in some cases perl -pi -e 's/int rv = syscall/int rv = (int)syscall/' third-party/folly/folly/detail/Futex.cpp @# NOTE: this hack is required for gcc in some cases perl -pi -e 's/(__has_include..)/__cpp_rtti && $$1/' third-party/folly/folly/memory/MemoryResource.h - @# NOTE: this hack is only needed if building target build_folly in the future - perl -pi -e 's,https://github.com/jedisct1/libsodium/releases/download/1.0.17/libsodium-1.0.17.tar.gz,https://download.libsodium.org/libsodium/releases/old/unsupported/libsodium-1.0.17.tar.gz,' third-party/folly/build/fbcode_builder/manifests/libsodium @# NOTE: boost source will be needed for any build including `USE_FOLLY_LITE` builds as those depend on boost headers cd third-party/folly && $(PYTHON) build/fbcode_builder/getdeps.py fetch boost From 8fe04d84e96af2053b948e7f357bea76de64f271 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Wed, 24 Jan 2024 11:21:05 -0800 Subject: [PATCH 360/386] Fix bug of newer ingested data assigned with an older seqno (#12257) Summary: **Context:** We found an edge case where newer ingested data is assigned with an older seqno. This causes older data of that key to be returned for read. Consider the following lsm shape: ![image](https://github.com/facebook/rocksdb/assets/83968999/973fd160-5065-49cd-8b7b-b6ab4badae23) Then ingest a file to L5 containing new data of key_overlap. Because of [this](https://l.facebook.com/l.php?u=https%3A%2F%2Fgithub.com%2Ffacebook%2Frocksdb%2Fblob%2F5a26f392ca640818da0b8590be6119699e852b07%2Fdb%2Fexternal_sst_file_ingestion_job.cc%3Ffbclid%3DIwAR10clXxpUSrt6sYg12sUMeHfShS7XigFrsJHvZoUDroQpbj_Sb3dG_JZFc%23L951-L956&h=AT0m56P7O0ZML7jk1sdjgnZZyGPMXg9HkKvBEb8mE9ZM3fpJjPrArAMsaHWZQPt9Ki-Pn7lv7x-RT9NEd_202Y6D2juIVHOIt3EjCZptDKBLRBMG49F8iBUSM9ypiKe8XCfM-FNW2Hl4KbVq2e3nZRbMvUM), the file is assigned with seqno 2, older than the old data's seqno 4. After just another compaction, we will drop the new_v for key_overlap because of the seqno and cause older data to be returned. ![image](https://github.com/facebook/rocksdb/assets/83968999/a3ef95e4-e7ae-4c30-8d03-955cd4b5ed42) **Summary:** This PR removes the incorrect seqno assignment Pull Request resolved: https://github.com/facebook/rocksdb/pull/12257 Test Plan: - New unit test failed before the fix but passes after - python3 tools/db_crashtest.py --compaction_style=1 --ingest_external_file_one_in=10 --preclude_last_level_data_seconds=36000 --compact_files_one_in=10 --enable_blob_files=0 blackbox` - Rehearsal stress test Reviewed By: cbi42 Differential Revision: D52926092 Pulled By: hx235 fbshipit-source-id: 9e4dade0f6cc44e548db8fca27ccbc81a621cd6f (cherry picked from commit 1b2b16b38ef760252d61b123e7e39c26306cd1c7) --- db/external_sst_file_basic_test.cc | 76 +++++++++++++++++++ db/external_sst_file_ingestion_job.cc | 20 ----- .../new_ingested_data_with_old_seqno.md | 1 + 3 files changed, 77 insertions(+), 20 deletions(-) create mode 100644 unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index 749a172ac..41ac48a07 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -9,6 +9,8 @@ #include "db/version_edit.h" #include "port/port.h" #include "port/stack_trace.h" +#include "rocksdb/advanced_options.h" +#include "rocksdb/options.h" #include "rocksdb/sst_file_writer.h" #include "test_util/testharness.h" #include "test_util/testutil.h" @@ -1292,6 +1294,80 @@ TEST_F(ExternalSSTFileBasicTest, VerifyChecksumReadahead) { Destroy(options); } +TEST_F(ExternalSSTFileBasicTest, ReadOldValueOfIngestedKeyBug) { + Options options = CurrentOptions(); + options.compaction_style = kCompactionStyleUniversal; + options.disable_auto_compactions = true; + options.num_levels = 3; + options.preserve_internal_time_seconds = 36000; + DestroyAndReopen(options); + + // To create the following LSM tree to trigger the bug: + // L0 + // L1 with seqno [1, 2] + // L2 with seqno [3, 4] + + // To create L1 shape + ASSERT_OK( + db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k1", "seqno1")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK( + db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k1", "seqno2")); + ASSERT_OK(db_->Flush(FlushOptions())); + ColumnFamilyMetaData meta_1; + db_->GetColumnFamilyMetaData(&meta_1); + auto& files_1 = meta_1.levels[0].files; + ASSERT_EQ(files_1.size(), 2); + std::string file1 = files_1[0].db_path + files_1[0].name; + std::string file2 = files_1[1].db_path + files_1[1].name; + ASSERT_OK(db_->CompactFiles(CompactionOptions(), {file1, file2}, 1)); + // To confirm L1 shape + ColumnFamilyMetaData meta_2; + db_->GetColumnFamilyMetaData(&meta_2); + ASSERT_EQ(meta_2.levels[0].files.size(), 0); + ASSERT_EQ(meta_2.levels[1].files.size(), 1); + // Seqno starts from non-zero due to seqno reservation for + // preserve_internal_time_seconds greater than 0; + ASSERT_EQ(meta_2.levels[1].files[0].largest_seqno, 102); + ASSERT_EQ(meta_2.levels[2].files.size(), 0); + // To create L2 shape + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k2overlap", + "old_value")); + ASSERT_OK(db_->Flush(FlushOptions())); + ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), "k2overlap", + "old_value")); + ASSERT_OK(db_->Flush(FlushOptions())); + ColumnFamilyMetaData meta_3; + db_->GetColumnFamilyMetaData(&meta_3); + auto& files_3 = meta_3.levels[0].files; + std::string file3 = files_3[0].db_path + files_3[0].name; + std::string file4 = files_3[1].db_path + files_3[1].name; + ASSERT_OK(db_->CompactFiles(CompactionOptions(), {file3, file4}, 2)); + // To confirm L2 shape + ColumnFamilyMetaData meta_4; + db_->GetColumnFamilyMetaData(&meta_4); + ASSERT_EQ(meta_4.levels[0].files.size(), 0); + ASSERT_EQ(meta_4.levels[1].files.size(), 1); + ASSERT_EQ(meta_4.levels[2].files.size(), 1); + ASSERT_EQ(meta_4.levels[2].files[0].largest_seqno, 104); + + // Ingest a file with new value of the key "k2overlap" + SstFileWriter sst_file_writer(EnvOptions(), options); + std::string f = sst_files_dir_ + "f.sst"; + ASSERT_OK(sst_file_writer.Open(f)); + ASSERT_OK(sst_file_writer.Put("k2overlap", "new_value")); + ExternalSstFileInfo f_info; + ASSERT_OK(sst_file_writer.Finish(&f_info)); + ASSERT_OK(db_->IngestExternalFile({f}, IngestExternalFileOptions())); + + // To verify new value of the key "k2overlap" is correctly returned + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + std::string value; + ASSERT_OK(db_->Get(ReadOptions(), "k2overlap", &value)); + // Before the fix, the value would be "old_value" and assertion failed + ASSERT_EQ(value, "new_value"); +} + TEST_F(ExternalSSTFileBasicTest, IngestRangeDeletionTombstoneWithGlobalSeqno) { for (int i = 5; i < 25; i++) { ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i), diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index a4a194714..778e054c7 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -937,26 +937,6 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( overlap_with_db = true; break; } - - if (compaction_style == kCompactionStyleUniversal && lvl != 0) { - const std::vector& level_files = - vstorage->LevelFiles(lvl); - const SequenceNumber level_largest_seqno = - (*std::max_element(level_files.begin(), level_files.end(), - [](FileMetaData* f1, FileMetaData* f2) { - return f1->fd.largest_seqno < - f2->fd.largest_seqno; - })) - ->fd.largest_seqno; - // should only assign seqno to current level's largest seqno when - // the file fits - if (level_largest_seqno != 0 && - IngestedFileFitInLevel(file_to_ingest, lvl)) { - *assigned_seqno = level_largest_seqno; - } else { - continue; - } - } } else if (compaction_style == kCompactionStyleUniversal) { continue; } diff --git a/unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md b/unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md new file mode 100644 index 000000000..8d0f32b4b --- /dev/null +++ b/unreleased_history/bug_fixes/new_ingested_data_with_old_seqno.md @@ -0,0 +1 @@ +Fix a bug where older data of an ingested key can be returned for read when universal compaction is used From 703d00a69de8a6aa79e4d402023fe676845989e8 Mon Sep 17 00:00:00 2001 From: Nicolas Pepin-Perreault Date: Tue, 26 Dec 2023 14:02:36 -0800 Subject: [PATCH 361/386] Access SST full file checksum via RocksDB#getLiveFilesMetadata (#11770) Summary: **Description** This PR passes along the native `LiveFileMetaData#file_checksum` field from the C++ class to the Java API as a copied byte array. If there is no file checksum generator factory set beforehand, then the array will empty. Please advise if you'd rather it be null - an empty array means one extra allocation, but it avoids possible null pointer exceptions. > **Note** > This functionality complements but does not supersede https://github.com/facebook/rocksdb/issues/11736 It's outside the scope here to add support for Java based `FileChecksumGenFactory` implementations. As a workaround, users can already use the built-in one by creating their initial `DBOptions` via properties: ```java final Properties props = new Properties(); props.put("file_checksum_gen_factory", "FileChecksumGenCrc32cFactory"); try (final DBOptions dbOptions = DBOptions.getDBOptionsFromProps(props); final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(); final Options options = new Options(dbOptions, cfOptions).setCreateIfMissing(true)) { // do stuff } ``` I wanted to add a better test, but unfortunately there's no available CRC32C implementation available in Java 8 without adding a dependency or adding a JNI helper for RocksDB's own implementation (or bumping the minimum version for tests to Java 9). That said, I understand the test is rather poor, so happy to change it to whatever you'd like. **Context** To give some context, we replicate RocksDB checkpoints to other nodes. Part of this is verifying the integrity of each file during replication. With a large enough RocksDB, computing the checksum ourselves is prohibitively expensive. Since SST files comprise the bulk of the data, we'd much rather delegate this to RocksDB on file write, and read it back after to compare. It's likely we will provide a follow up to read the file checksum list directly from the manifest without having to open the DB, but this was the easiest first step to get it working for us. Pull Request resolved: https://github.com/facebook/rocksdb/pull/11770 Reviewed By: hx235 Differential Revision: D52420729 Pulled By: ajkr fbshipit-source-id: a873de35a48aaf315e125733091cd221a97b9073 (cherry picked from commit 5b073a7daa1c2949cd188ca981104f174ddc61af) --- java/rocksjni/portal.h | 18 ++++++++++-- .../java/org/rocksdb/LiveFileMetaData.java | 23 +++++---------- .../java/org/rocksdb/SstFileMetaData.java | 15 +++++++++- .../test/java/org/rocksdb/RocksDBTest.java | 28 +++++++++++++++++++ 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 45d0c184c..3edff81aa 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -7447,7 +7447,7 @@ class LiveFileMetaDataJni : public JavaClass { jmethodID mid = env->GetMethodID( jclazz, "", - "([BILjava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V"); + "([BILjava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ[B)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -7498,6 +7498,18 @@ class LiveFileMetaDataJni : public JavaClass { return nullptr; } + jbyteArray jfile_checksum = ROCKSDB_NAMESPACE::JniUtil::copyBytes( + env, live_file_meta_data->file_checksum); + if (env->ExceptionCheck()) { + // exception occurred creating java string + env->DeleteLocalRef(jcolumn_family_name); + env->DeleteLocalRef(jfile_name); + env->DeleteLocalRef(jpath); + env->DeleteLocalRef(jsmallest_key); + env->DeleteLocalRef(jlargest_key); + return nullptr; + } + jobject jlive_file_meta_data = env->NewObject( jclazz, mid, jcolumn_family_name, static_cast(live_file_meta_data->level), jfile_name, jpath, @@ -7508,7 +7520,7 @@ class LiveFileMetaDataJni : public JavaClass { static_cast(live_file_meta_data->num_reads_sampled), static_cast(live_file_meta_data->being_compacted), static_cast(live_file_meta_data->num_entries), - static_cast(live_file_meta_data->num_deletions)); + static_cast(live_file_meta_data->num_deletions), jfile_checksum); if (env->ExceptionCheck()) { env->DeleteLocalRef(jcolumn_family_name); @@ -7516,6 +7528,7 @@ class LiveFileMetaDataJni : public JavaClass { env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); + env->DeleteLocalRef(jfile_checksum); return nullptr; } @@ -7525,6 +7538,7 @@ class LiveFileMetaDataJni : public JavaClass { env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); + env->DeleteLocalRef(jfile_checksum); return jlive_file_meta_data; } diff --git a/java/src/main/java/org/rocksdb/LiveFileMetaData.java b/java/src/main/java/org/rocksdb/LiveFileMetaData.java index cb0f1a302..5242496a3 100644 --- a/java/src/main/java/org/rocksdb/LiveFileMetaData.java +++ b/java/src/main/java/org/rocksdb/LiveFileMetaData.java @@ -16,22 +16,13 @@ public class LiveFileMetaData extends SstFileMetaData { /** * Called from JNI C++ */ - private LiveFileMetaData( - final byte[] columnFamilyName, - final int level, - final String fileName, - final String path, - final long size, - final long smallestSeqno, - final long largestSeqno, - final byte[] smallestKey, - final byte[] largestKey, - final long numReadsSampled, - final boolean beingCompacted, - final long numEntries, - final long numDeletions) { - super(fileName, path, size, smallestSeqno, largestSeqno, smallestKey, - largestKey, numReadsSampled, beingCompacted, numEntries, numDeletions); + private LiveFileMetaData(final byte[] columnFamilyName, final int level, final String fileName, + final String path, final long size, final long smallestSeqno, final long largestSeqno, + final byte[] smallestKey, final byte[] largestKey, final long numReadsSampled, + final boolean beingCompacted, final long numEntries, final long numDeletions, + final byte[] fileChecksum) { + super(fileName, path, size, smallestSeqno, largestSeqno, smallestKey, largestKey, + numReadsSampled, beingCompacted, numEntries, numDeletions, fileChecksum); this.columnFamilyName = columnFamilyName; this.level = level; } diff --git a/java/src/main/java/org/rocksdb/SstFileMetaData.java b/java/src/main/java/org/rocksdb/SstFileMetaData.java index 88ea8152a..6025d0b42 100644 --- a/java/src/main/java/org/rocksdb/SstFileMetaData.java +++ b/java/src/main/java/org/rocksdb/SstFileMetaData.java @@ -20,6 +20,7 @@ public class SstFileMetaData { private final boolean beingCompacted; private final long numEntries; private final long numDeletions; + private final byte[] fileChecksum; /** * Called from JNI C++ @@ -35,12 +36,13 @@ public class SstFileMetaData { * @param beingCompacted true if the file is being compacted, false otherwise * @param numEntries the number of entries * @param numDeletions the number of deletions + * @param fileChecksum the full file checksum (if enabled) */ @SuppressWarnings("PMD.ArrayIsStoredDirectly") protected SstFileMetaData(final String fileName, final String path, final long size, final long smallestSeqno, final long largestSeqno, final byte[] smallestKey, final byte[] largestKey, final long numReadsSampled, final boolean beingCompacted, - final long numEntries, final long numDeletions) { + final long numEntries, final long numDeletions, final byte[] fileChecksum) { this.fileName = fileName; this.path = path; this.size = size; @@ -52,6 +54,7 @@ protected SstFileMetaData(final String fileName, final String path, final long s this.beingCompacted = beingCompacted; this.numEntries = numEntries; this.numDeletions = numDeletions; + this.fileChecksum = fileChecksum; } /** @@ -154,4 +157,14 @@ public long numEntries() { public long numDeletions() { return numDeletions; } + + /** + * Get the full file checksum iff full file checksum is enabled. + * + * @return the file's checksum + */ + @SuppressWarnings("PMD.MethodReturnsInternalArray") + public byte[] fileChecksum() { + return fileChecksum; + } } diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index d6b00ed6a..48197735f 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1381,6 +1381,34 @@ public void getApproximateMemTableStatsSingleKey() throws RocksDBException { } } + @Test + public void getLiveFilesMetadataWithChecksum() throws RocksDBException { + final Properties props = new Properties(); + final byte[] key1 = "key1".getBytes(UTF_8); + props.put("file_checksum_gen_factory", "FileChecksumGenCrc32cFactory"); + + try (final DBOptions dbOptions = DBOptions.getDBOptionsFromProps(props); + final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(); + final Options options = new Options(dbOptions, cfOptions).setCreateIfMissing(true)) { + final String dbPath = dbFolder.getRoot().getAbsolutePath(); + + // disable WAL so we have a deterministic checksum + try (final RocksDB db = RocksDB.open(options, dbPath); + final WriteOptions writeOptions = new WriteOptions().setDisableWAL(true)) { + db.put(writeOptions, key1, key1); + } + + try (final RocksDB db = RocksDB.open(options, dbPath)) { + final List expectedFileMetadata = db.getLiveFilesMetaData(); + assertThat(expectedFileMetadata).hasSize(1); + // ideally we could re-compute here, but CRC32C is a Java 9 feature, so we have no CRC32C + // implementation available here + final LiveFileMetaData sstFile = expectedFileMetadata.get(0); + assertThat(sstFile.fileChecksum()).isNotEmpty(); + } + } + } + @Ignore("TODO(AR) re-enable when ready!") @Test public void compactFiles() throws RocksDBException { From 3e4e540d52a67f7fe745bc65f42df1a1eddfd5ea Mon Sep 17 00:00:00 2001 From: Radek Hubner Date: Fri, 5 Apr 2024 13:55:18 -0700 Subject: [PATCH 362/386] Fix exception on RocksDB.getColumnFamilyMetaData() (#12474) Summary: https://github.com/facebook/rocksdb/issues/12466 reported a bug when `RocksDB.getColumnFamilyMetaData()` is called on an existing database(With files stored on disk). As neilramaswamy mentioned, this was caused by https://github.com/facebook/rocksdb/issues/11770 where the signature of `SstFileMetaData` constructor was changed, but JNI code wasn't updated. This PR fix JNI code, and also properly populate `fileChecksum` on `SstFileMetaData`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12474 Reviewed By: jowlyzhang Differential Revision: D55811808 Pulled By: ajkr fbshipit-source-id: 2ab156f41eaf4a4f30c49e6df421b61e8451230e (cherry picked from commit a8035ebc0b22f079a447bdc6b0aeeb2f1cf09d47) --- java/rocksjni/portal.h | 18 +++++++++++-- .../test/java/org/rocksdb/RocksDBTest.java | 27 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 3edff81aa..c13b8a666 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -7569,7 +7569,8 @@ class SstFileMetaDataJni : public JavaClass { } jmethodID mid = env->GetMethodID( - jclazz, "", "(Ljava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ)V"); + jclazz, "", + "(Ljava/lang/String;Ljava/lang/String;JJJ[B[BJZJJ[B)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -7609,6 +7610,17 @@ class SstFileMetaDataJni : public JavaClass { return nullptr; } + jbyteArray jfile_checksum = ROCKSDB_NAMESPACE::JniUtil::copyBytes( + env, sst_file_meta_data->file_checksum); + if (env->ExceptionCheck()) { + // exception occurred creating java string + env->DeleteLocalRef(jfile_name); + env->DeleteLocalRef(jpath); + env->DeleteLocalRef(jsmallest_key); + env->DeleteLocalRef(jlargest_key); + return nullptr; + } + jobject jsst_file_meta_data = env->NewObject( jclazz, mid, jfile_name, jpath, static_cast(sst_file_meta_data->size), @@ -7617,13 +7629,14 @@ class SstFileMetaDataJni : public JavaClass { jlargest_key, static_cast(sst_file_meta_data->num_reads_sampled), static_cast(sst_file_meta_data->being_compacted), static_cast(sst_file_meta_data->num_entries), - static_cast(sst_file_meta_data->num_deletions)); + static_cast(sst_file_meta_data->num_deletions), jfile_checksum); if (env->ExceptionCheck()) { env->DeleteLocalRef(jfile_name); env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); + env->DeleteLocalRef(jfile_checksum); return nullptr; } @@ -7632,6 +7645,7 @@ class SstFileMetaDataJni : public JavaClass { env->DeleteLocalRef(jpath); env->DeleteLocalRef(jsmallest_key); env->DeleteLocalRef(jlargest_key); + env->DeleteLocalRef(jfile_checksum); return jsst_file_meta_data; } diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 48197735f..74e523c49 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1409,6 +1409,33 @@ public void getLiveFilesMetadataWithChecksum() throws RocksDBException { } } + @Test + public void getColumnFamilyMetadataWithChecksum() throws RocksDBException { + final Properties props = new Properties(); + props.put("file_checksum_gen_factory", "FileChecksumGenCrc32cFactory"); + final String dbPath = dbFolder.getRoot().getAbsolutePath(); + + try (final DBOptions dbOptions = DBOptions.getDBOptionsFromProps(props); + final ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(); + final Options options = new Options(dbOptions, cfOptions).setCreateIfMissing(true)) { + try (final RocksDB db = RocksDB.open(options, dbPath); + final WriteOptions writeOptions = new WriteOptions().setDisableWAL(true)) { + db.put("key".getBytes(UTF_8), "value".getBytes(UTF_8)); + } + + try (final RocksDB db = RocksDB.open(options, dbFolder.getRoot().getAbsolutePath())) { + ColumnFamilyMetaData metadata = db.getColumnFamilyMetaData(); // Exception here + List levels = metadata.levels(); + assertThat(levels).isNotEmpty(); + List filesMetadata = levels.get(0).files(); + assertThat(filesMetadata).isNotEmpty(); + assertThat(filesMetadata.get(0).fileChecksum()).isNotNull(); + assertThat(filesMetadata.get(0).fileChecksum()).hasSize(4); + assertThat(filesMetadata.get(0).fileChecksum()).isNotEqualTo(new byte[] {0, 0, 0, 0}); + } + } + } + @Ignore("TODO(AR) re-enable when ready!") @Test public void compactFiles() throws RocksDBException { From 7a76723056dd9092b88032526b8bfa16058e0315 Mon Sep 17 00:00:00 2001 From: mayuehappy Date: Tue, 13 Aug 2024 19:44:04 +0800 Subject: [PATCH 363/386] [FLINK-35575] Disable PERF_CONTEXT by default in compilation (#76) --- .circleci/config.yml | 5 +++-- Makefile | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1c1feced2..23466cf48 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -45,6 +45,7 @@ commands: echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV + echo "export DISABLE_PERF_CONTEXT=0" >> $BASH_ENV windows-build-steps: steps: @@ -361,7 +362,7 @@ jobs: resource_class: xlarge steps: - checkout # check out the code in the project directory - - run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all + - run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 DISABLE_PERF_CONTEXT=0 make V=1 -j16 all - post-steps build-linux-clang10-asan: @@ -474,7 +475,7 @@ jobs: - run: apt-get update -y && apt-get install -y libgflags-dev - run: name: "Unity build" - command: make V=1 -j8 unity_test + command: DISABLE_PERF_CONTEXT=0 make V=1 -j8 unity_test no_output_timeout: 20m - run: make V=1 -j8 -k check-headers # could be moved to a different build - post-steps diff --git a/Makefile b/Makefile index cafd48529..bb39c2350 100644 --- a/Makefile +++ b/Makefile @@ -429,6 +429,12 @@ ifndef DISABLE_JEMALLOC PLATFORM_CCFLAGS += $(JEMALLOC_INCLUDE) endif +DISABLE_PERF_CONTEXT ?= 1 +ifeq ($(DISABLE_PERF_CONTEXT),1) + PLATFORM_CXXFLAGS += -DNPERF_CONTEXT + PLATFORM_CCFLAGS += -DNPERF_CONTEXT +endif + ifndef USE_FOLLY USE_FOLLY=0 endif From 1d531dab3f2d2dd98a509d6b2cb5cab72c1373a6 Mon Sep 17 00:00:00 2001 From: Zakelly Date: Wed, 6 Mar 2024 14:17:12 +0800 Subject: [PATCH 364/386] [build] Setting up templates for issues and PRs (#1) (cherry picked from commit e7b6d68b6eca99f0f8780d30889e45e80df07ab0) --- .github/ISSUE_TEMPLATE/bug_report.md | 29 ++++++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 8 +++++ .github/ISSUE_TEMPLATE/work_item.md | 20 +++++++++++++ .github/pull_request_template.md | 44 ++++++++++++++++++++++++++++ issue_template.md | 7 ----- 5 files changed, 101 insertions(+), 7 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/work_item.md create mode 100644 .github/pull_request_template.md delete mode 100644 issue_template.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..044c642ce --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,29 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Compile '...' +2. Run '....' +3. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. CentOS 7.8] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..e3e7745a4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Have questions + url: https://github.com/ververica/ForSt/discussions/categories/q-a + about: Please ask and answer questions here. + - name: New Ideas + url: https://github.com/ververica/ForSt/discussions/categories/ideas + about: Please suggest your new ideas here. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/work_item.md b/.github/ISSUE_TEMPLATE/work_item.md new file mode 100644 index 000000000..d3dea472d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/work_item.md @@ -0,0 +1,20 @@ +--- +name: Work Item +about: Suggest/Log a work item (For big ideas and proposals, please go to New Ideas) +title: '' +labels: '' +assignees: '' + +--- + +**What is this for** +A clear and concise description of what the item is. + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..9429374eb --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,44 @@ + + +## What is the purpose of the change + +*(For example: This pull request enables caching all the java classes that will be frequently used.)* + + +## Brief change log + +*(for example:)* + - *A global cache container* + - *Cache entries for each objects* + + +## Verifying this change + +*(Please pick either of the following options)* + +This change is a trivial rework / code cleanup without any test coverage. + +*(or)* + +This change is already covered by existing tests, such as *(please describe tests)*. + +*(or)* + +This change added tests and can be verified as follows: + +*(example:)* + - *first step* + - *second step* + - *third step, and xxx behaves as expected* \ No newline at end of file diff --git a/issue_template.md b/issue_template.md deleted file mode 100644 index ca52f5ead..000000000 --- a/issue_template.md +++ /dev/null @@ -1,7 +0,0 @@ -> Note: Please use Issues only for bug reports. For questions, discussions, feature requests, etc. post to dev group: https://groups.google.com/forum/#!forum/rocksdb or https://www.facebook.com/groups/rocksdb.dev - -### Expected behavior - -### Actual behavior - -### Steps to reproduce the behavior From eaa8588741441820b860b0ab00b090668ec20b29 Mon Sep 17 00:00:00 2001 From: Zakelly Date: Thu, 7 Mar 2024 12:05:10 +0800 Subject: [PATCH 365/386] [build] Remove buckify output in sanity check (#3) This fixes #2 (cherry picked from commit 6f910e2772e770bbeab87bd417dd5e88a6b91019) --- .github/workflows/sanity_check.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/sanity_check.yml b/.github/workflows/sanity_check.yml index efc9d99cf..093b2e230 100644 --- a/.github/workflows/sanity_check.yml +++ b/.github/workflows/sanity_check.yml @@ -38,8 +38,5 @@ jobs: - name: Check format run: VERBOSE_CHECK=1 make check-format - - name: Compare buckify output - run: make check-buck-targets - - name: Simple source code checks - run: make check-sources + run: make check-sources \ No newline at end of file From f10be993a3d7aeadd2750a7f278e7f3f2286c580 Mon Sep 17 00:00:00 2001 From: yhx <38719192+masteryhx@users.noreply.github.com> Date: Tue, 12 Mar 2024 11:25:14 +0800 Subject: [PATCH 366/386] [env] Introduce interface of env_flink (#5) (cherry picked from commit 61f9574773fbfdae7b2f71bd8f861605afead3ec) --- CMakeLists.txt | 3 +- env/flink/env_flink.cc | 10 ++++ env/flink/env_flink.h | 101 +++++++++++++++++++++++++++++++++++++++++ src.mk | 1 + 4 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 env/flink/env_flink.cc create mode 100644 env/flink/env_flink.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5fcd9b7cd..61f96005b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1019,7 +1019,8 @@ else() port/port_posix.cc env/env_posix.cc env/fs_posix.cc - env/io_posix.cc) + env/io_posix.cc + env/flink/env_flink.cc) endif() if(USE_FOLLY_LITE) diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc new file mode 100644 index 000000000..87183f131 --- /dev/null +++ b/env/flink/env_flink.cc @@ -0,0 +1,10 @@ +// Copyright (c) 2021-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// TODO: +// 1. Register flink env to ObjectLibrary +// 2. Implement all methods of env_flink.h + +#include "env_flink.h" \ No newline at end of file diff --git a/env/flink/env_flink.h b/env/flink/env_flink.h new file mode 100644 index 000000000..d1912a3de --- /dev/null +++ b/env/flink/env_flink.h @@ -0,0 +1,101 @@ +// Copyright (c) 2021-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include "rocksdb/env.h" +#include "rocksdb/file_system.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +// FlinkFileSystem extended from FileSystemWrapper which delegate necessary +// methods to Flink FileSystem based on JNI. For other methods, base FileSystem +// will proxy its methods. +class FlinkFileSystem : public FileSystemWrapper { + public: + // Create FlinkFileSystem with base_fs proxying all other methods and + // base_path + static Status Create(const std::shared_ptr& /*base_fs*/, + const std::string& /*base_path*/, + std::unique_ptr* /*fs*/); + + // Define some names + static const char* kClassName() { return "FlinkFileSystem"; } + const char* Name() const override { return kClassName(); } + static const char* kNickName() { return "flink"; } + const char* NickName() const override { return kNickName(); } + + // Constructor and Destructor + explicit FlinkFileSystem(const std::shared_ptr& base, + const std::string& fsname); + ~FlinkFileSystem() override; + + // Several methods current FileSystem must implement + + std::string GetId() const override; + Status ValidateOptions(const DBOptions& /*db_opts*/, + const ColumnFamilyOptions& /*cf_opts*/) const override; + IOStatus NewSequentialFile(const std::string& /*fname*/, + const FileOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override; + IOStatus NewRandomAccessFile(const std::string& /*fname*/, + const FileOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override; + IOStatus NewWritableFile(const std::string& /*fname*/, + const FileOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override; + IOStatus NewDirectory(const std::string& /*name*/, + const IOOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override; + IOStatus FileExists(const std::string& /*fname*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus GetChildren(const std::string& /*path*/, + const IOOptions& /*options*/, + std::vector* /*result*/, + IODebugContext* /*dbg*/) override; + IOStatus DeleteFile(const std::string& /*fname*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus CreateDir(const std::string& /*name*/, const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus CreateDirIfMissing(const std::string& /*name*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus DeleteDir(const std::string& /*name*/, const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus GetFileSize(const std::string& /*fname*/, + const IOOptions& /*options*/, uint64_t* /*size*/, + IODebugContext* /*dbg*/) override; + IOStatus GetFileModificationTime(const std::string& /*fname*/, + const IOOptions& /*options*/, + uint64_t* /*time*/, + IODebugContext* /*dbg*/) override; + IOStatus RenameFile(const std::string& /*src*/, const std::string& /*target*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus LockFile(const std::string& /*fname*/, const IOOptions& /*options*/, + FileLock** /*lock*/, IODebugContext* /*dbg*/) override; + IOStatus UnlockFile(FileLock* /*lock*/, const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override; + IOStatus IsDirectory(const std::string& /*path*/, + const IOOptions& /*options*/, bool* /*is_dir*/, + IODebugContext* /*dbg*/) override; + + private: + std::string base_path_; +}; + +// Returns a `FlinkEnv` with base_path +Status NewFlinkEnv(const std::string& base_path, std::unique_ptr* env); +// Returns a `FlinkFileSystem` with base_path +Status NewFlinkFileSystem(const std::string& base_path, + std::shared_ptr* fs); +} // namespace ROCKSDB_NAMESPACE diff --git a/src.mk b/src.mk index dc3289b00..caad7ee8f 100644 --- a/src.mk +++ b/src.mk @@ -113,6 +113,7 @@ LIB_SOURCES = \ env/io_posix.cc \ env/mock_env.cc \ env/unique_id_gen.cc \ + env/flink/env_flink.cc \ file/delete_scheduler.cc \ file/file_prefetch_buffer.cc \ file/file_util.cc \ From b8cb45ea8ef3a201bb85a45207a4cc480dbc63c2 Mon Sep 17 00:00:00 2001 From: "jinse.ljz" Date: Tue, 12 Mar 2024 12:56:06 +0800 Subject: [PATCH 367/386] [env] Introduce JvmUtils to support global JNIEnv (cherry picked from commit 44debe7a9de2c1a50405bd7501830670b9542451) --- CMakeLists.txt | 7 +++- env/flink/jvm_util.cc | 59 ++++++++++++++++++++++++++++++++++ env/flink/jvm_util.h | 74 +++++++++++++++++++++++++++++++++++++++++++ src.mk | 1 + 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 env/flink/jvm_util.cc create mode 100644 env/flink/jvm_util.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 61f96005b..a5cc1e39d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1020,7 +1020,8 @@ else() env/env_posix.cc env/fs_posix.cc env/io_posix.cc - env/flink/env_flink.cc) + env/flink/env_flink.cc + env/flink/jvm_util.cc) endif() if(USE_FOLLY_LITE) @@ -1165,6 +1166,10 @@ endif() if(WITH_JNI OR JNI) message(STATUS "JNI library is enabled") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/java) + include_directories(${JNI_INCLUDE_DIRS}) + if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + include_directories(${JNI_INCLUDE_DIRS}/linux) + endif () else() message(STATUS "JNI library is disabled") endif() diff --git a/env/flink/jvm_util.cc b/env/flink/jvm_util.cc new file mode 100644 index 000000000..8e2c6f07a --- /dev/null +++ b/env/flink/jvm_util.cc @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "env/flink/jvm_util.h" + +namespace ROCKSDB_NAMESPACE { + +std::atomic jvm_ = std::atomic(nullptr); + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) { + JNIEnv* env = nullptr; + if (vm->GetEnv((void**)&env, JNI_VERSION_1_8) != JNI_OK) { + return -1; + } + + jvm_.store(vm); + return JNI_VERSION_1_8; +} + +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM* vm, void* reserved) { + jvm_.store(nullptr); +} + +void setJVM(JavaVM* jvm) { jvm_.store(jvm); } + +JNIEnv* getJNIEnv(bool attach) { + JavaVM* jvm = jvm_.load(); + if (jvm == nullptr) { + return nullptr; + } + + thread_local JavaEnv env; + if (env.getEnv() == nullptr) { + auto status = jvm->GetEnv((void**)&(env.getEnv()), JNI_VERSION_1_8); + if (attach && (status == JNI_EDETACHED || env.getEnv() == nullptr)) { + if (jvm->AttachCurrentThread((void**)&(env.getEnv()), nullptr) == + JNI_OK) { + env.setNeedDetach(); + } + } + } + return env.getEnv(); +} +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/env/flink/jvm_util.h b/env/flink/jvm_util.h new file mode 100644 index 000000000..5c5b5fc83 --- /dev/null +++ b/env/flink/jvm_util.h @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "jni.h" +#include "rocksdb/env.h" + +namespace ROCKSDB_NAMESPACE { + +extern std::atomic jvm_; + +#ifdef __cplusplus +extern "C" { +#endif + +JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved); +JNIEXPORT void JNICALL JNI_OnUnload(JavaVM* vm, void* reserved); + +#ifdef __cplusplus +} +#endif + +void setJVM(JavaVM* jvm); + +JNIEnv* getJNIEnv(bool attach = true); + +static inline std::string parseJavaString(JNIEnv* jni_env, + jstring java_string) { + const char* chars = jni_env->GetStringUTFChars(java_string, nullptr); + auto length = jni_env->GetStringUTFLength(java_string); + std::string native_string = std::string(chars, length); + jni_env->ReleaseStringUTFChars(java_string, chars); + return native_string; +} + +class JavaEnv { + public: + ~JavaEnv() { + if (env_ != nullptr && need_detach_) { + jvm_.load()->DetachCurrentThread(); + need_detach_ = false; + } + } + + JNIEnv*& getEnv() { return env_; } + + void setNeedDetach() { need_detach_ = true; } + + private: + JNIEnv* env_ = nullptr; + bool need_detach_ = false; +}; +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/src.mk b/src.mk index caad7ee8f..30b70195a 100644 --- a/src.mk +++ b/src.mk @@ -114,6 +114,7 @@ LIB_SOURCES = \ env/mock_env.cc \ env/unique_id_gen.cc \ env/flink/env_flink.cc \ + env/flink/jvm_util.cc \ file/delete_scheduler.cc \ file/file_prefetch_buffer.cc \ file/file_util.cc \ From 0a7f5f1eb5d6aba1ae258fa4460be7e63ddd097a Mon Sep 17 00:00:00 2001 From: yhx Date: Tue, 12 Mar 2024 16:11:25 +0800 Subject: [PATCH 368/386] [env] Introduce interface of env_flink (#7) (cherry picked from commit 4a511b33d33ff41d1231fd8d3361b1916e94dbac) --- CMakeLists.txt | 3 +- env/flink/jni_helper.cc | 76 +++++++++++++++++++++++++++++++++++++++++ env/flink/jni_helper.h | 45 ++++++++++++++++++++++++ src.mk | 1 + 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 env/flink/jni_helper.cc create mode 100644 env/flink/jni_helper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index a5cc1e39d..1efcde659 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1021,7 +1021,8 @@ else() env/fs_posix.cc env/io_posix.cc env/flink/env_flink.cc - env/flink/jvm_util.cc) + env/flink/jvm_util.cc + env/flink/jni_helper.cc) endif() if(USE_FOLLY_LITE) diff --git a/env/flink/jni_helper.cc b/env/flink/jni_helper.cc new file mode 100644 index 000000000..8d1ac5acf --- /dev/null +++ b/env/flink/jni_helper.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "jni_helper.h" + +namespace ROCKSDB_NAMESPACE { + +JavaClassCache::JavaClassCache(JNIEnv *env) : jni_env_(env) { + // Set all class names + cached_java_classes_[JavaClassCache::JC_URI].className = "java/net/URI"; + cached_java_classes_[JavaClassCache::JC_BYTE_BUFFER].className = + "java/nio/ByteBuffer"; + cached_java_classes_[JavaClassCache::JC_THROWABLE].className = + "java/lang/Throwable"; + cached_java_classes_[JavaClassCache::JC_FLINK_PATH].className = + "org/apache/flink/core/fs/Path"; + cached_java_classes_[JavaClassCache::JC_FLINK_FILE_SYSTEM].className = + "org/apache/flink/state/forst/fs/ForStFlinkFileSystem"; + cached_java_classes_[JavaClassCache::JC_FLINK_FILE_STATUS].className = + "org/apache/flink/core/fs/FileStatus"; + cached_java_classes_[JavaClassCache::JC_FLINK_FS_INPUT_STREAM].className = + "org/apache/flink/state/forst/fs/ByteBufferReadableFSDataInputStream"; + cached_java_classes_[JavaClassCache::JC_FLINK_FS_OUTPUT_STREAM].className = + "org/apache/flink/state/forst/fs/ByteBufferWritableFSDataOutputStream"; + + // Try best to create and set the jclass objects based on the class names set + // above + int numCachedClasses = + sizeof(cached_java_classes_) / sizeof(javaClassAndName); + for (int i = 0; i < numCachedClasses; i++) { + initCachedClass(cached_java_classes_[i].className, + &cached_java_classes_[i].javaClass); + } +} + +JavaClassCache::~JavaClassCache() { + // Release all global ref of cached jclasses + for (const auto &item : cached_java_classes_) { + if (item.javaClass) { + jni_env_->DeleteGlobalRef(item.javaClass); + } + } +} + +Status JavaClassCache::initCachedClass(const char *className, + jclass *cachedJclass) { + jclass tempLocalClassRef = jni_env_->FindClass(className); + if (!tempLocalClassRef) { + return Status::IOError("Exception when FindClass, class name: " + + std::string(className)); + } + *cachedJclass = (jclass)jni_env_->NewGlobalRef(tempLocalClassRef); + if (!*cachedJclass) { + return Status::IOError("Exception when NewGlobalRef, class name " + + std::string(className)); + } + + jni_env_->DeleteLocalRef(tempLocalClassRef); + return Status::OK(); +} + +Status JavaClassCache::GetJClass(CachedJavaClass cachedJavaClass, + jclass *javaClass) { + jclass targetClass = cached_java_classes_[cachedJavaClass].javaClass; + Status status = Status::OK(); + if (!targetClass) { + status = initCachedClass(cached_java_classes_[cachedJavaClass].className, + &targetClass); + } + *javaClass = targetClass; + return status; +} + +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/env/flink/jni_helper.h b/env/flink/jni_helper.h new file mode 100644 index 000000000..39d9e9f9a --- /dev/null +++ b/env/flink/jni_helper.h @@ -0,0 +1,45 @@ +// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "jni.h" +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +// A cache for java classes to avoid calling FindClass frequently +class JavaClassCache { + public: + // Frequently-used class type representing jclasses which will be cached. + typedef enum { + JC_URI, + JC_BYTE_BUFFER, + JC_THROWABLE, + JC_FLINK_PATH, + JC_FLINK_FILE_SYSTEM, + JC_FLINK_FILE_STATUS, + JC_FLINK_FS_INPUT_STREAM, + JC_FLINK_FS_OUTPUT_STREAM, + NUM_CACHED_CLASSES + } CachedJavaClass; + + // Constructor and Destructor + explicit JavaClassCache(JNIEnv* env); + ~JavaClassCache(); + + // Get jclass by specific CachedJavaClass + Status GetJClass(CachedJavaClass cachedJavaClass, jclass* javaClass); + + private: + typedef struct { + jclass javaClass; + const char* className; + } javaClassAndName; + + JNIEnv* jni_env_; + javaClassAndName cached_java_classes_[JavaClassCache::NUM_CACHED_CLASSES]; + + Status initCachedClass(const char* className, jclass* cachedClass); +}; +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/src.mk b/src.mk index 30b70195a..4beae92a3 100644 --- a/src.mk +++ b/src.mk @@ -115,6 +115,7 @@ LIB_SOURCES = \ env/unique_id_gen.cc \ env/flink/env_flink.cc \ env/flink/jvm_util.cc \ + env/flink/jni_helper.cc \ file/delete_scheduler.cc \ file/file_prefetch_buffer.cc \ file/file_util.cc \ From 5ad02f7cc91e6ef72c90ff911481ad9ba75caac1 Mon Sep 17 00:00:00 2001 From: Zakelly Date: Tue, 12 Mar 2024 17:23:59 +0800 Subject: [PATCH 369/386] [build] license and READMEs (#9) (cherry picked from commit 09ba94fc277a872445f29a4d95e94b656b852fd2) --- CONTRIBUTING.md | 48 +- COPYING | 339 ----- DEFAULT_OPTIONS_HISTORY.md | 24 - DUMP_FORMAT.md | 16 - FROCKSDB-RELEASE.md | 251 ---- HISTORY.md | 2602 ------------------------------------ LICENSE.Apache => LICENSE | 0 README.md | 15 +- 8 files changed, 43 insertions(+), 3252 deletions(-) delete mode 100644 COPYING delete mode 100644 DEFAULT_OPTIONS_HISTORY.md delete mode 100644 DUMP_FORMAT.md delete mode 100644 FROCKSDB-RELEASE.md delete mode 100644 HISTORY.md rename LICENSE.Apache => LICENSE (100%) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 190100b42..d7ca7890d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,17 +1,45 @@ -# Contributing to RocksDB +# Contributing to ForSt ## Code of Conduct The code of conduct is described in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md) -## Contributor License Agreement ("CLA") +## Basic Development Workflow +As most open-source projects in github, ForSt contributors work on their forks, and send pull requests to ForSt’s repo. After a reviewer approves the pull request and all the CI check are passed, a ForSt team member will merge it. -In order to accept your pull request, we need you to submit a CLA. You -only need to do this once, so if you've done this for another Facebook -open source project, you're good to go. If you are submitting a pull -request for the first time, just let us know that you have completed -the CLA and we can cross-check with your GitHub username. +## Code style +ForSt follows the RocksDB's code format. +RocksDB follows Google C++ Style: https://google.github.io/styleguide/cppguide.html +Note: a common pattern in existing RocksDB code is using non-nullable Type* for output parameters, in the old Google C++ Style, but this guideline has changed. The new guideline prefers (non-const) references for output parameters. +For formatting, we limit each line to 80 characters. Most formatting can be done automatically by running +``` +build_tools/format-diff.sh +``` +or simply ```make format``` if you use GNU make. If you lack of dependencies to run it, the script will print out instructions for you to install them. -Complete your CLA here: -If you prefer to sign a paper copy, we can send you a PDF. Send us an -e-mail or create a new github issue to request the CLA in PDF format. +## License Claim +ForSt is licensed under Apache 2.0 License. But since the RocksDB has its own license, we keep the license claim on top of each existing files, and use/add Apache 2.0 License on top of each new created files. +``` +/* Copyright 2024-present, the ForSt authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +``` + +## Submit patches +Before you submit a patch, we strongly recommend that you share your ideas with others +in the community via [Issues](https://github.com/ververica/ForSt/issues) or +[Discussions](https://github.com/ververica/ForSt/discussions). Of course, you do not +need to do this if you are submitting a patch that can already be associated with an +issue, or a minor patch like a typo fix. You can then submit your patch via +[Pull Requests](https://github.com/ververica/ForSt/pulls), which requires a GitHub account. diff --git a/COPYING b/COPYING deleted file mode 100644 index d159169d1..000000000 --- a/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/DEFAULT_OPTIONS_HISTORY.md b/DEFAULT_OPTIONS_HISTORY.md deleted file mode 100644 index 82c64d523..000000000 --- a/DEFAULT_OPTIONS_HISTORY.md +++ /dev/null @@ -1,24 +0,0 @@ -# RocksDB default options change log (NO LONGER MAINTAINED) -## Unreleased -* delayed_write_rate takes the rate given by rate_limiter if not specified. - -## 5.2 -* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. - -## 5.0 (11/17/2016) -* Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default -* Options.level0_stop_writes_trigger default value changes from 24 to 32. - -## 4.8.0 (5/2/2016) -* options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters. -* options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks. -* options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata. - -## 4.7.0 (4/8/2016) -* options.write_buffer_size changes from 4MB to 64MB. -* options.target_file_size_base changes from 2MB to 64MB. -* options.max_bytes_for_level_base changes from 10MB to 256MB. -* options.soft_pending_compaction_bytes_limit changes from 0 (disabled) to 64GB. -* options.hard_pending_compaction_bytes_limit changes from 0 (disabled) to 256GB. -* table_cache_numshardbits changes from 4 to 6. -* max_file_opening_threads changes from 1 to 16. diff --git a/DUMP_FORMAT.md b/DUMP_FORMAT.md deleted file mode 100644 index 009dabad5..000000000 --- a/DUMP_FORMAT.md +++ /dev/null @@ -1,16 +0,0 @@ -## RocksDB dump format - -The version 1 RocksDB dump format is fairly simple: - -1) The dump starts with the magic 8 byte identifier "ROCKDUMP" - -2) The magic is followed by an 8 byte big-endian version which is 0x00000001. - -3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is. - -4) The first chunk is special and is a json string indicating some things about the creation of this dump. It contains the following keys: -* database-path: The path of the database this dump was created from. -* hostname: The hostname of the machine where the dump was created. -* creation-time: Unix seconds since epoc when this dump was created. - -5) Following the info dump the slices paired into are key/value pairs. diff --git a/FROCKSDB-RELEASE.md b/FROCKSDB-RELEASE.md deleted file mode 100644 index 2cd092d88..000000000 --- a/FROCKSDB-RELEASE.md +++ /dev/null @@ -1,251 +0,0 @@ -# FRocksDB Release Process - -## Summary - -FrocksDB-6.x releases are a fat jar file that contain the following binaries: -* .so files for linux32 (glibc and musl-libc) -* .so files for linux64 (glibc and musl-libc) -* .so files for linux [aarch64](https://en.wikipedia.org/wiki/AArch64) (glibc and musl-libc) -* .so files for linux [ppc64le](https://en.wikipedia.org/wiki/Ppc64le) (glibc and musl-libc) -* .jnilib file for Mac OSX -* .dll for Windows x64 - -To build the binaries for a FrocksDB release, building on native architectures is advised. Building the binaries for ppc64le and aarch64 *can* be done using QEMU, but you may run into emulation bugs and the build times will be dramatically slower (up to x20). - -We recommend building the binaries on environments with at least 4 cores, 16GB RAM and 40GB of storage. The following environments are recommended for use in the build process: -* Windows x64 -* Linux aarch64 -* Linux ppc64le -* Mac OSX - -## Build for Windows - -For the Windows binary build, we recommend using a base [AWS Windows EC2 instance](https://aws.amazon.com/windows/products/ec2/) with 4 cores, 16GB RAM, 40GB storage for the build. - -Firstly, install [chocolatey](https://chocolatey.org/install). Once installed, the following required components can be installed using Powershell: - - choco install git.install jdk8 maven visualstudio2017community visualstudio2017-workload-nativedesktop - -Open the "Developer Command Prompt for VS 2017" and run the following commands: - - git clone git@github.com:ververica/frocksdb.git - cd frocksdb - git checkout FRocksDB-6.20.3 # release branch - java\crossbuild\build-win.bat - -The resulting native binary will be built and available at `build\java\Release\rocksdbjni-shared.dll`. You can also find it under project folder with name `librocksdbjni-win64.dll`. -The result windows jar is `build\java\rocksdbjni_classes.jar`. - -There is also a how-to in CMakeLists.txt. - -**Once finished, extract the `librocksdbjni-win64.dll` from the build environment. You will need this .dll in the final crossbuild.** - -## Build for aarch64 - -For the Linux aarch64 binary build, we recommend using a base [AWS Ubuntu Server 20.04 LTS EC2](https://aws.amazon.com/windows/products/ec2/) with a 4 core Arm processor, 16GB RAM, 40GB storage for the build. You can also attempt to build with QEMU on a non-aarch64 processor, but you may run into emulation bugs and very long build times. - -### Building in aarch64 environment - -First, install the required packages such as Java 8 and make: - - sudo apt-get update - sudo apt-get install build-essential openjdk-8-jdk - -then, install and setup [Docker](https://docs.docker.com/engine/install/ubuntu/): - - sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg - echo "deb [arch=arm64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - - sudo apt-get update - sudo apt-get install docker-ce docker-ce-cli containerd.io - - sudo groupadd docker - sudo usermod -aG docker $USER - newgrp docker - -Then, clone the FrocksDB repo: - - git clone https://github.com/ververica/frocksdb.git - cd frocksdb - git checkout FRocksDB-6.20.3 # release branch - - -First, build the glibc binary: - - make jclean clean rocksdbjavastaticdockerarm64v8 - -**Once finished, extract the `java/target/librocksdbjni-linux-aarch64.so` from the build environment. You will need this .so in the final crossbuild.** - -Next, build the musl-libc binary: - - make jclean clean rocksdbjavastaticdockerarm64v8musl - -**Once finished, extract the `java/target/librocksdbjni-linux-aarch64-musl.so` from the build environment. You will need this .so in the final crossbuild.** - -### Building via QEMU - -You can use QEMU on, for example, an `x86_64` system to build the aarch64 binaries. To set this up on an Ubuntu envirnment: - - sudo apt-get install qemu binfmt-support qemu-user-static - docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - -To verify that you can now run aarch64 docker images: - - docker run --rm -t arm64v8/ubuntu uname -m - > aarch64 - -You can now attempt to build the aarch64 binaries as in the previous section. - -## Build in PPC64LE - -For the ppc64le binaries, we recommend building on a PowerPC machine if possible, as it can be tricky to spin up a ppc64le cloud environment. However, if a PowerPC machine is not available, [Travis-CI](https://www.travis-ci.com/) offers ppc64le build environments that work perfectly for building these binaries. If neither a machine or Travis are an option, you can use QEMU but the build may take a very long time and be prone to emulation errors. - -### Building in ppc64le environment - -As with the aarch64 environment, the ppc64le environment will require Java 8, Docker and build-essentials installed. Once installed, you can build the 2 binaries: - - make jclean clean rocksdbjavastaticdockerppc64le - -**Once finished, extract the `java/target/librocksdbjni-linux-ppc64le.so` from the build environment. You will need this .so in the final crossbuild.** - - make jclean clean rocksdbjavastaticdockerppc64lemusl - -**Once finished, extract the `java/target/librocksdbjni-linux-ppc64le-musl.so` from the build environment. You will need this .so in the final crossbuild.** - -### Building via Travis - -Travis-CI supports ppc64le build environments, and this can be a convienient way of building in the absence of a PowerPC machine. Assuming that you have an S3 bucket called **my-frocksdb-release-artifacts**, the following Travis configuration will build the release artifacts and push them to the S3 bucket: - -``` -dist: xenial -language: cpp -os: - - linux -arch: - - ppc64le - -services: - - docker -addons: - artifacts: - paths: - - $TRAVIS_BUILD_DIR/java/target/librocksdbjni-linux-ppc64le-musl.so - - $TRAVIS_BUILD_DIR/java/target/librocksdbjni-linux-ppc64le.so - -env: - global: - - ARTIFACTS_BUCKET=my-rocksdb-release-artifacts - jobs: - - CMD=rocksdbjavastaticdockerppc64le - - CMD=rocksdbjavastaticdockerppc64lemusl - -install: - - sudo apt-get install -y openjdk-8-jdk || exit $? - - export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH - - export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture) - - echo "JAVA_HOME=${JAVA_HOME}" - - which java && java -version - - which javac && javac -version - -script: - - make jclean clean $CMD -``` - -**Make sure to set the `ARTIFACTS_KEY` and `ARTIFACTS_SECRET` environment variables in the Travis Job with valid AWS credentials to access the S3 bucket you defined.** - -**Make sure to avoid signatureV4-only S3 regions to store the uploaded artifacts (due to unresolved https://github.com/travis-ci/artifacts/issues/57). You can just choose the S3 bucket of `us-east-1` region for 100% compatibility.** - -**Once finished, the`librocksdbjni-linux-ppce64le.so` and `librocksdbjni-linux-ppce64le-musl.so` binaries will be in the S3 bucket. You will need these .so binaries in the final crossbuild.** - - -### Building via QEMU - -You can use QEMU on, for example, an `x86_64` system to build the ppc64le binaries. To set this up on an Ubuntu envirnment: - - sudo apt-get install qemu binfmt-support qemu-user-static - docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - -To verify that you can now run ppc64le docker images: - - docker run --rm -t ppc64le/ubuntu uname -m - > ppc64le - -You can now attempt to build the ppc64le binaries as in the previous section. - -## Final crossbuild in Mac OSX - -Documentation for the final crossbuild for Mac OSX and Linux is described in [java/RELEASE.md](java/RELEASE.md) as has information on dependencies that should be installed. As above, this tends to be Java 8, build-essentials and Docker. - -Before you run this step, you should have 5 binaries from the previous build steps: - - 1. `librocksdbjni-win64.dll` from the Windows build step. - 2. `librocksdbjni-linux-aarch64.so` from the aarch64 build step. - 3. `librocksdbjni-linux-aarch64-musl.so` from the aarch64 build step. - 3. `librocksdbjni-linux-ppc64le.so` from the ppc64le build step. - 4. `librocksdbjni-linux-ppc64le-musl.so` from the ppc64le build step. - -To start the crossbuild within a Mac OSX environment: - - make jclean clean - mkdir -p java/target - cp /librocksdbjni-win64.dll java/target/librocksdbjni-win64.dll - cp /librocksdbjni-linux-ppc64le.so java/target/librocksdbjni-linux-ppc64le.so - cp /librocksdbjni-linux-ppc64le-musl.so java/target/librocksdbjni-linux-ppc64le-musl.so - cp /librocksdbjni-linux-aarch64.so java/target/librocksdbjni-linux-aarch64.so - cp /librocksdbjni-linux-aarch64-musl.so java/target/librocksdbjni-linux-aarch64-musl.so - FROCKSDB_VERSION=1.0 PORTABLE=1 ROCKSDB_DISABLE_JEMALLOC=true DEBUG_LEVEL=0 make frocksdbjavastaticreleasedocker - -*Note, we disable jemalloc on mac due to https://github.com/facebook/rocksdb/issues/5787*. - -Once finished, there should be a directory at `java/target/frocksdb-release` with the FRocksDB jar, javadoc jar, sources jar and pom in it. You can inspect the jar file and ensure that contains the binaries, history file, etc: - -``` -$ jar tf frocksdbjni-6.20.3-ververica-1.0.jar -META-INF/ -META-INF/MANIFEST.MF -HISTORY-JAVA.md -HISTORY.md -librocksdbjni-linux-aarch64-musl.so -librocksdbjni-linux-aarch64.so -librocksdbjni-linux-ppc64le-musl.so -librocksdbjni-linux-ppc64le.so -librocksdbjni-linux32-musl.so -librocksdbjni-linux32.so -librocksdbjni-linux64-musl.so -librocksdbjni-linux64.so -librocksdbjni-osx.jnilib -librocksdbjni-win64.dl -... -``` - -*Note that it contains linux32/64.so binaries as well as librocksdbjni-osx.jnilib*. - -## Push to Maven Central - -For this step, you will need the following: - -- The OSX Crossbuild artifacts built in `java/target/frocksdb-release` as above. -- A Sonatype account with access to the staging repository. If you do not have permission, open a ticket with Sonatype, [such as this one](https://issues.sonatype.org/browse/OSSRH-72185). -- A GPG key to sign the release, with your public key available for verification (for example, by uploading it to https://keys.openpgp.org/) - -To upload the release to the Sonatype staging repository: -```bash -VERSION= \ -USER= \ -PASSWORD= \ -KEYNAME= \ -PASSPHRASE= \ -java/publish-frocksdbjni.sh -``` - -Go to the staging repositories on Sonatype: - -https://oss.sonatype.org/#stagingRepositories - -Select the open staging repository and click on "Close". - -The staging repository will look something like `https://oss.sonatype.org/content/repositories/xxxx-1020`. You can use this staged release to test the artifacts and ensure they are correct. - -Once you have verified the artifacts are correct, press the "Release" button. **WARNING: this can not be undone**. Within 24-48 hours, the artifact will be available on Maven Central for use. diff --git a/HISTORY.md b/HISTORY.md deleted file mode 100644 index f4d0ea3f4..000000000 --- a/HISTORY.md +++ /dev/null @@ -1,2602 +0,0 @@ -# FRocksdb Change Log -## 6.20.2-ververica-1.0 (08/09/2021) -### Improvement -* [Flink TTL] compaction filter for background cleanup of state with time-to-live -* [FLINK-19710] Revert implementation of PerfContext back to __thread to avoid performance regression - -# Rocksdb Change Log -> NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt` - -## 8.10.0 (12/15/2023) -### New Features -* Provide support for async_io to trim readahead_size by doing block cache lookup -* Added initial wide-column support in `WriteBatchWithIndex`. This includes the `PutEntity` API and support for wide columns in the existing read APIs (`GetFromBatch`, `GetFromBatchAndDB`, `MultiGetFromBatchAndDB`, and `BaseDeltaIterator`). - -### Public API Changes -* Custom implementations of `TablePropertiesCollectorFactory` may now return a `nullptr` collector to decline processing a file, reducing callback overheads in such cases. - -### Behavior Changes -* Make ReadOptions.auto_readahead_size default true which does prefetching optimizations for forward scans if iterate_upper_bound and block_cache is also specified. -* Compactions can be scheduled in parallel in an additional scenario: high compaction debt relative to the data size -* HyperClockCache now has built-in protection against excessive CPU consumption under the extreme stress condition of no (or very few) evictable cache entries, which can slightly increase memory usage such conditions. New option `HyperClockCacheOptions::eviction_effort_cap` controls the space-time trade-off of the response. The default should be generally well-balanced, with no measurable affect on normal operation. - -### Bug Fixes -* Fix a corner case with auto_readahead_size where Prev Operation returns NOT SUPPORTED error when scans direction is changed from forward to backward. -* Avoid destroying the periodic task scheduler's default timer in order to prevent static destruction order issues. -* Fix double counting of BYTES_WRITTEN ticker when doing writes with transactions. -* Fix a WRITE_STALL counter that was reporting wrong value in few cases. -* A lookup by MultiGet in a TieredCache that goes to the local flash cache and finishes with very low latency, i.e before the subsequent call to WaitAll, is ignored, resulting in a false negative and a memory leak. - -### Performance Improvements -* Java API extensions to improve consistency and completeness of APIs -1 Extended `RocksDB.get([ColumnFamilyHandle columnFamilyHandle,] ReadOptions opt, ByteBuffer key, ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters -2 Extended `RocksDB.put( [ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOpts, final ByteBuffer key, final ByteBuffer value)` which now accepts indirect buffer parameters as well as direct buffer parameters -3 Added `RocksDB.merge([ColumnFamilyHandle columnFamilyHandle,] WriteOptions writeOptions, ByteBuffer key, ByteBuffer value)` methods with the same parameter options as `put(...)` - direct and indirect buffers are supported -4 Added `RocksIterator.key( byte[] key [, int offset, int len])` methods which retrieve the iterator key into the supplied buffer -5 Added `RocksIterator.value( byte[] value [, int offset, int len])` methods which retrieve the iterator value into the supplied buffer -6 Deprecated `get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions readOptions, byte[])` in favour of `get(final ReadOptions readOptions, final ColumnFamilyHandle columnFamilyHandle, byte[])` which has consistent parameter ordering with other methods in the same class -7 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value)` methods which retrieve the requested value into the supplied buffer -8 Added `Transaction.get( ReadOptions opt, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which retrieve the requested value into the supplied buffer -9 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] byte[] key, byte[] value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer -10 Added `Transaction.getForUpdate( ReadOptions readOptions, [ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value, boolean exclusive [, boolean doValidate])` methods which retrieve the requested value into the supplied buffer -11 Added `Transaction.getIterator()` method as a convenience which defaults the `ReadOptions` value supplied to existing `Transaction.iterator()` methods. This mirrors the existing `RocksDB.iterator()` method. -12 Added `Transaction.put([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written in a `ByteBuffer` parameter -13 Added `Transaction.merge([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value [, boolean assumeTracked])` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter -14 Added `Transaction.mergeUntracked([ColumnFamilyHandle columnFamilyHandle, ] ByteBuffer key, ByteBuffer value)` methods which supply the key, and the value to be written/merged in a `ByteBuffer` parameter - - -## 8.9.0 (11/17/2023) -### New Features -* Add GetEntity() and PutEntity() API implementation for Attribute Group support. Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. - -### Public API Changes -* Added rocksdb_ratelimiter_create_auto_tuned API to create an auto-tuned GenericRateLimiter. -* Added clipColumnFamily() to the Java API to clip the entries in the CF according to the range [begin_key, end_key). -* Make the `EnableFileDeletion` API not default to force enabling. For users that rely on this default behavior and still -want to continue to use force enabling, they need to explicitly pass a `true` to `EnableFileDeletion`. -* Add new Cache APIs GetSecondaryCacheCapacity() and GetSecondaryCachePinnedUsage() to return the configured capacity, and cache reservation charged to the secondary cache. - -### Behavior Changes -* During off-peak hours defined by `daily_offpeak_time_utc`, the compaction picker will select a larger number of files for periodic compaction. This selection will include files that are projected to expire by the next off-peak start time, ensuring that these files are not chosen for periodic compaction outside of off-peak hours. -* If an error occurs when writing to a trace file after `DB::StartTrace()`, the subsequent trace writes are skipped to avoid writing to a file that has previously seen error. In this case, `DB::EndTrace()` will also return a non-ok status with info about the error occured previously in its status message. -* Deleting stale files upon recovery are delegated to SstFileManger if available so they can be rate limited. -* Make RocksDB only call `TablePropertiesCollector::Finish()` once. -* When `WAL_ttl_seconds > 0`, we now process archived WALs for deletion at least every `WAL_ttl_seconds / 2` seconds. Previously it could be less frequent in case of small `WAL_ttl_seconds` values when size-based expiration (`WAL_size_limit_MB > 0 `) was simultaneously enabled. - -### Bug Fixes -* Fixed a crash or assertion failure bug in experimental new HyperClockCache variant, especially when running with a SecondaryCache. -* Fix a race between flush error recovery and db destruction that can lead to db crashing. -* Fixed some bugs in the index builder/reader path for user-defined timestamps in Memtable only feature. - -## 8.8.0 (10/23/2023) -### New Features -* Introduce AttributeGroup by adding the first AttributeGroup support API, MultiGetEntity(). Through the use of Column Families, AttributeGroup enables users to logically group wide-column entities. More APIs to support AttributeGroup will come soon, including GetEntity, PutEntity, and others. -* Added new tickers `rocksdb.fifo.{max.size|ttl}.compactions` to count FIFO compactions that drop files for different reasons -* Add an experimental offpeak duration awareness by setting `DBOptions::daily_offpeak_time_utc` in "HH:mm-HH:mm" format. This information will be used for resource optimization in the future -* Users can now change the max bytes granted in a single refill period (i.e, burst) during runtime by `SetSingleBurstBytes()` for RocksDB rate limiter - -### Public API Changes -* The default value of `DBOptions::fail_if_options_file_error` changed from `false` to `true`. Operations that set in-memory options (e.g., `DB::Open*()`, `DB::SetOptions()`, `DB::CreateColumnFamily*()`, and `DB::DropColumnFamily()`) but fail to persist the change will now return a non-OK `Status` by default. - -### Behavior Changes -* For non direct IO, eliminate the file system prefetching attempt for compaction read when `Options::compaction_readahead_size` is 0 -* During a write stop, writes now block on in-progress recovery attempts - -### Bug Fixes -* Fix a bug in auto_readahead_size where first_internal_key of index blocks wasn't copied properly resulting in corruption error when first_internal_key was used for comparison. -* Fixed a bug where compaction read under non direct IO still falls back to RocksDB internal prefetching after file system's prefetching returns non-OK status other than `Status::NotSupported()` -* Add bounds check in WBWIIteratorImpl and make BaseDeltaIterator, WriteUnpreparedTxn and WritePreparedTxn respect the upper bound and lower bound in ReadOption. See 11680. -* Fixed the handling of wide-column base values in the `max_successive_merges` logic. -* Fixed a rare race bug involving a concurrent combination of Create/DropColumnFamily and/or Set(DB)Options that could lead to inconsistency between (a) the DB's reported options state, (b) the DB options in effect, and (c) the latest persisted OPTIONS file. -* Fixed a possible underflow when computing the compressed secondary cache share of memory reservations while updating the compressed secondary to total block cache ratio. - -### Performance Improvements -* Improved the I/O efficiency of DB::Open a new DB with `create_missing_column_families=true` and many column families. - -## 8.7.0 (09/22/2023) -### New Features -* Added an experimental new "automatic" variant of HyperClockCache that does not require a prior estimate of the average size of cache entries. This variant is activated when HyperClockCacheOptions::estimated\_entry\_charge = 0 and has essentially the same concurrency benefits as the existing HyperClockCache. -* Add a new statistic `COMPACTION_CPU_TOTAL_TIME` that records cumulative compaction cpu time. This ticker is updated regularly while a compaction is running. -* Add `GetEntity()` API for ReadOnly DB and Secondary DB. -* Add a new iterator API `Iterator::Refresh(const Snapshot *)` that allows iterator to be refreshed while using the input snapshot to read. -* Added a new read option `merge_operand_count_threshold`. When the number of merge operands applied during a successful point lookup exceeds this threshold, the query will return a special OK status with a new subcode `kMergeOperandThresholdExceeded`. Applications might use this signal to take action to reduce the number of merge operands for the affected key(s), for example by running a compaction. -* For `NewRibbonFilterPolicy()`, made the `bloom_before_level` option mutable through the Configurable interface and the SetOptions API, allowing dynamic switching between all-Bloom and all-Ribbon configurations, and configurations in between. See comments on `NewRibbonFilterPolicy()` -* RocksDB now allows the block cache to be stacked on top of a compressed secondary cache and a non-volatile secondary cache, thus creating a three-tier cache. To set it up, use the `NewTieredCache()` API in rocksdb/cache.h.. -* Added a new wide-column aware full merge API called `FullMergeV3` to `MergeOperator`. `FullMergeV3` supports wide columns both as base value and merge result, which enables the application to perform more general transformations during merges. For backward compatibility, the default implementation implements the earlier logic of applying the merge operation to the default column of any wide-column entities. Specifically, if there is no base value or the base value is a plain key-value, the default implementation falls back to `FullMergeV2`. If the base value is a wide-column entity, the default implementation invokes `FullMergeV2` to perform the merge on the default column, and leaves any other columns unchanged. -* Add wide column support to ldb commands (scan, dump, idump, dump_wal) and sst_dump tool's scan command - -### Public API Changes -* Expose more information about input files used in table creation (if any) in `CompactionFilter::Context`. See `CompactionFilter::Context::input_start_level`,`CompactionFilter::Context::input_table_properties` for more. -* `Options::compaction_readahead_size` 's default value is changed from 0 to 2MB. -* When using LZ4 compression, the `acceleration` parameter is configurable by setting the negated value in `CompressionOptions::level`. For example, `CompressionOptions::level=-10` will set `acceleration=10` -* The `NewTieredCache` API has been changed to take the total cache capacity (inclusive of both the primary and the compressed secondary cache) and the ratio of total capacity to allocate to the compressed cache. These are specified in `TieredCacheOptions`. Any capacity specified in `LRUCacheOptions`, `HyperClockCacheOptions` and `CompressedSecondaryCacheOptions` is ignored. A new API, `UpdateTieredCache` is provided to dynamically update the total capacity, ratio of compressed cache, and admission policy. -* The `NewTieredVolatileCache()` API in rocksdb/cache.h has been renamed to `NewTieredCache()`. - -### Behavior Changes -* Compaction read performance will regress when `Options::compaction_readahead_size` is explicitly set to 0 -* Universal size amp compaction will conditionally exclude some of the newest L0 files when selecting input with a small negative impact to size amp. This is to prevent a large number of L0 files from being locked by a size amp compaction, potentially leading to write stop with a few more flushes. -* Change ldb scan command delimiter from ':' to '==>'. - -### Bug Fixes -* Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results. -* Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file. -* Fix a bug with atomic_flush=true that can cause DB to stuck after a flush fails (#11872). -* Fix a bug where RocksDB (with atomic_flush=false) can delete output SST files of pending flushes when a previous concurrent flush fails (#11865). This can result in DB entering read-only state with error message like `IO error: No such file or directory: While open a file for random read: /tmp/rocksdbtest-501/db_flush_test_87732_4230653031040984171/000013.sst`. -* Fix an assertion fault during seek with async_io when readahead trimming is enabled. -* When the compressed secondary cache capacity is reduced to 0, it should be completely disabled. Before this fix, inserts and lookups would still go to the backing `LRUCache` before returning, thus incurring locking overhead. With this fix, inserts and lookups are no-ops and do not add any overhead. -* Updating the tiered cache (cache allocated using NewTieredCache()) by calling SetCapacity() on it was not working properly. The initial creation would set the primary cache capacity to the combined primary and compressed secondary cache capacity. But SetCapacity() would just set the primary cache capacity. With this fix, the user always specifies the total budget and compressed secondary cache ratio on creation. Subsequently, SetCapacity() will distribute the new capacity across the two caches by the same ratio. -* Fixed a bug in `MultiGet` for cleaning up SuperVersion acquired with locking db mutex. -* Fix a bug where row cache can falsely return kNotFound even though row cache entry is hit. -* Fixed a race condition in `GenericRateLimiter` that could cause it to stop granting requests -* Fix a bug (Issue #10257) where DB can hang after write stall since no compaction is scheduled (#11764). -* Add a fix for async_io where during seek, when reading a block for seeking a target key in a file without any readahead, the iterator aligned the read on a page boundary and reading more than necessary. This increased the storage read bandwidth usage. -* Fix an issue in sst dump tool to handle bounds specified for data with user-defined timestamps. -* When auto_readahead_size is enabled, update readahead upper bound during readahead trimming when reseek changes iterate_upper_bound dynamically. -* Fixed a bug where `rocksdb.file.read.verify.file.checksums.micros` is not populated - -### Performance Improvements -* Added additional improvements in tuning readahead_size during Scans when auto_readahead_size is enabled. However it's not supported with Iterator::Prev operation and will return NotSupported error. -* During async_io, the Seek happens in 2 phases. Phase 1 starts an asynchronous read on a block cache miss, and phase 2 waits for it to complete and finishes the seek. In both phases, it tries to lookup the block cache for the data block first before looking in the prefetch buffer. It's optimized by doing the block cache lookup only in the first phase that would save some CPU. - -## 8.6.0 (08/18/2023) -### New Features -* Added enhanced data integrity checking on SST files with new format_version=6. Performance impact is very small or negligible. Previously if SST data was misplaced or re-arranged by the storage layer, it could pass block checksum with higher than 1 in 4 billion probability. With format_version=6, block checksums depend on what file they are in and location within the file. This way, misplaced SST data is no more likely to pass checksum verification than randomly corrupted data. Also in format_version=6, SST footers are checksum-protected. -* Add a new feature to trim readahead_size during scans upto upper_bound when iterate_upper_bound is specified. It's enabled through ReadOptions.auto_readahead_size. Users must also specify ReadOptions.iterate_upper_bound. -* RocksDB will compare the number of input keys to the number of keys processed after each compaction. Compaction will fail and report Corruption status if the verification fails. Option `compaction_verify_record_count` is introduced for this purpose and is enabled by default. -* Add a CF option `bottommost_file_compaction_delay` to allow specifying the delay of bottommost level single-file compactions. -* Add support to allow enabling / disabling user-defined timestamps feature for an existing column family in combination with the in-Memtable only feature. -* Implement a new admission policy for the compressed secondary cache that admits blocks evicted from the primary cache with the hit bit set. This policy can be specified in TieredVolatileCacheOptions by setting the newly added adm_policy option. -* Add a column family option `memtable_max_range_deletions` that limits the number of range deletions in a memtable. RocksDB will try to do an automatic flush after the limit is reached. (#11358) -* Add PutEntity API in sst_file_writer -* Add `timeout` in microsecond option to `WaitForCompactOptions` to allow timely termination of prolonged waiting in scenarios like recurring recoverable errors, such as out-of-space situations and continuous write streams that sustain ongoing flush and compactions -* New statistics `rocksdb.file.read.{get|multiget|db.iterator|verify.checksum|verify.file.checksums}.micros` measure read time of block-based SST tables or blob files during db open, `Get()`, `MultiGet()`, using db iterator, `VerifyFileChecksums()` and `VerifyChecksum()`. They require stats level greater than `StatsLevel::kExceptDetailedTimers`. -* Add close_db option to `WaitForCompactOptions` to call Close() after waiting is done. -* Add a new compression option `CompressionOptions::checksum` for enabling ZSTD's checksum feature to detect corruption during decompression. - -### Public API Changes -* Mark `Options::access_hint_on_compaction_start` related APIs as deprecated. See #11631 for alternative behavior. - -### Behavior Changes -* Statistics `rocksdb.sst.read.micros` now includes time spent on multi read and async read into the file -* For Universal Compaction users, periodic compaction (option `periodic_compaction_seconds`) will be set to 30 days by default if block based table is used. - -### Bug Fixes -* Fix a bug in FileTTLBooster that can cause users with a large number of levels (more than 65) to see errors like "runtime error: shift exponent .. is too large.." (#11673). - -## 8.5.0 (07/21/2023) -### Public API Changes -* Removed recently added APIs `GeneralCache` and `MakeSharedGeneralCache()` as our plan changed to stop exposing a general-purpose cache interface. The old forms of these APIs, `Cache` and `NewLRUCache()`, are still available, although general-purpose caching support will be dropped eventually. - -### Behavior Changes -* Option `periodic_compaction_seconds` no longer supports FIFO compaction: setting it has no effect on FIFO compactions. FIFO compaction users should only set option `ttl` instead. -* Move prefetching responsibility to page cache for compaction read for non directIO use case - -### Performance Improvements -* In case of direct_io, if buffer passed by callee is already aligned, RandomAccessFileRead::Read will avoid realloacting a new buffer, reducing memcpy and use already passed aligned buffer. -* Small efficiency improvement to HyperClockCache by reducing chance of compiler-generated heap allocations - -### Bug Fixes -* Fix use_after_free bug in async_io MultiReads when underlying FS enabled kFSBuffer. kFSBuffer is when underlying FS pass their own buffer instead of using RocksDB scratch in FSReadRequest. Right now it's an experimental feature. - -## 8.4.0 (06/26/2023) -### New Features -* Add FSReadRequest::fs_scratch which is a data buffer allocated and provided by underlying FileSystem to RocksDB during reads, when FS wants to provide its own buffer with data instead of using RocksDB provided FSReadRequest::scratch. This can help in cpu optimization by avoiding copy from file system's buffer to RocksDB buffer. More details on how to use/enable it in file_system.h. Right now its supported only for MultiReads(async + sync) with non direct io. -* Start logging non-zero user-defined timestamp sizes in WAL to signal user key format in subsequent records and use it during recovery. This change will break recovery from WAL files written by early versions that contain user-defined timestamps. The workaround is to ensure there are no WAL files to recover (i.e. by flushing before close) before upgrade. -* Added new property "rocksdb.obsolete-sst-files-size-property" that reports the size of SST files that have become obsolete but have not yet been deleted or scheduled for deletion -* Start to record the value of the flag `AdvancedColumnFamilyOptions.persist_user_defined_timestamps` in the Manifest and table properties for a SST file when it is created. And use the recorded flag when creating a table reader for the SST file. This flag is only explicitly record if it's false. -* Add a new option OptimisticTransactionDBOptions::shared_lock_buckets that enables sharing mutexes for validating transactions between DB instances, for better balancing memory efficiency and validation contention across DB instances. Different column families and DBs also now use different hash seeds in this validation, so that the same set of key names will not contend across DBs or column families. -* Add a new ticker `rocksdb.files.marked.trash.deleted` to track the number of trash files deleted by background thread from the trash queue. -* Add an API NewTieredVolatileCache() in include/rocksdb/cache.h to allocate an instance of a block cache with a primary block cache tier and a compressed secondary cache tier. A cache of this type distributes memory reservations against the block cache, such as WriteBufferManager, table reader memory etc., proportionally across both the primary and compressed secondary cache. -* Add `WaitForCompact()` to wait for all flush and compactions jobs to finish. Jobs to wait include the unscheduled (queued, but not scheduled yet). -* Add `WriteBatch::Release()` that releases the batch's serialized data to the caller. - -### Public API Changes -* Add C API `rocksdb_options_add_compact_on_deletion_collector_factory_del_ratio`. -* change the FileSystem::use_async_io() API to SupportedOps API in order to extend it to various operations supported by underlying FileSystem. Right now it contains FSSupportedOps::kAsyncIO and FSSupportedOps::kFSBuffer. More details about FSSupportedOps in filesystem.h -* Add new tickers: `rocksdb.error.handler.bg.error.count`, `rocksdb.error.handler.bg.io.error.count`, `rocksdb.error.handler.bg.retryable.io.error.count` to replace the misspelled ones: `rocksdb.error.handler.bg.errro.count`, `rocksdb.error.handler.bg.io.errro.count`, `rocksdb.error.handler.bg.retryable.io.errro.count` ('error' instead of 'errro'). Users should switch to use the new tickers before 9.0 release as the misspelled old tickers will be completely removed then. -* Overload the API CreateColumnFamilyWithImport() to support creating ColumnFamily by importing multiple ColumnFamilies It requires that CFs should not overlap in user key range. - -### Behavior Changes -* Change the default value for option `level_compaction_dynamic_level_bytes` to true. This affects users who use leveled compaction and do not set this option explicitly. These users may see additional background compactions following DB open. These compactions help to shape the LSM according to `level_compaction_dynamic_level_bytes` such that the size of each level Ln is approximately size of Ln-1 * `max_bytes_for_level_multiplier`. Turning on this option has other benefits too: see more detail in wiki: https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#option-level_compaction_dynamic_level_bytes-and-levels-target-size and in option comment in advanced_options.h (#11525). -* For Leveled Compaction users, `CompactRange()` will now always try to compact to the last non-empty level. (#11468) -For Leveled Compaction users, `CompactRange()` with `bottommost_level_compaction = BottommostLevelCompaction::kIfHaveCompactionFilter` will behave similar to `kForceOptimized` in that it will skip files created during this manual compaction when compacting files in the bottommost level. (#11468) -* RocksDB will try to drop range tombstones during non-bottommost compaction when it is safe to do so. (#11459) -* When a DB is openend with `allow_ingest_behind=true` (currently only Universal compaction is supported), files in the last level, i.e. the ingested files, will not be included in any compaction. (#11489) -* Statistics `rocksdb.sst.read.micros` scope is expanded to all SST reads except for file ingestion and column family import (some compaction reads were previously excluded). - -### Bug Fixes -* Reduced cases of illegally using Env::Default() during static destruction by never destroying the internal PosixEnv itself (except for builds checking for memory leaks). (#11538) -* Fix extra prefetching during seek in async_io when BlockBasedTableOptions.num_file_reads_for_auto_readahead is 1 leading to extra reads than required. -* Fix a bug where compactions that are qualified to be run as 2 subcompactions were only run as one subcompaction. -* Fix a use-after-move bug in block.cc. - -## 8.3.0 (05/19/2023) -### New Features -* Introduced a new option `block_protection_bytes_per_key`, which can be used to enable per key-value integrity protection for in-memory blocks in block cache (#11287). -* Added `JemallocAllocatorOptions::num_arenas`. Setting `num_arenas > 1` may mitigate mutex contention in the allocator, particularly in scenarios where block allocations commonly bypass jemalloc tcache. -* Improve the operational safety of publishing a DB or SST files to many hosts by using different block cache hash seeds on different hosts. The exact behavior is controlled by new option `ShardedCacheOptions::hash_seed`, which also documents the solved problem in more detail. -* Introduced a new option `CompactionOptionsFIFO::file_temperature_age_thresholds` that allows FIFO compaction to compact files to different temperatures based on key age (#11428). -* Added a new ticker stat to count how many times RocksDB detected a corruption while verifying a block checksum: `BLOCK_CHECKSUM_MISMATCH_COUNT`. -* New statistics `rocksdb.file.read.db.open.micros` that measures read time of block-based SST tables or blob files during db open. -* New statistics tickers for various iterator seek behaviors and relevant filtering, as \*`_LEVEL_SEEK_`\*. (#11460) - -### Public API Changes -* EXPERIMENTAL: Add new API `DB::ClipColumnFamily` to clip the key in CF to a certain range. It will physically deletes all keys outside the range including tombstones. -* Add `MakeSharedCache()` construction functions to various cache Options objects, and deprecated the `NewWhateverCache()` functions with long parameter lists. -* Changed the meaning of various Bloom filter stats (prefix vs. whole key), with iterator-related filtering only being tracked in the new \*`_LEVEL_SEEK_`\*. stats. (#11460) - -### Behavior changes -* For x86, CPU features are no longer detected at runtime nor in build scripts, but in source code using common preprocessor defines. This will likely unlock some small performance improvements on some newer hardware, but could hurt performance of the kCRC32c checksum, which is no longer the default, on some "portable" builds. See PR #11419 for details. - -### Bug Fixes -* Delete an empty WAL file on DB open if the log number is less than the min log number to keep -* Delete temp OPTIONS file on DB open if there is a failure to write it out or rename it - -### Performance Improvements -* Improved the I/O efficiency of prefetching SST metadata by recording more information in the DB manifest. Opening files written with previous versions will still rely on heuristics for how much to prefetch (#11406). - -## 8.2.0 (04/24/2023) -### Public API Changes -* `SstFileWriter::DeleteRange()` now returns `Status::InvalidArgument` if the range's end key comes before its start key according to the user comparator. Previously the behavior was undefined. -* Add `multi_get_for_update` to C API. -* Remove unnecessary constructor for CompressionOptions. - -### Behavior changes -* Changed default block cache size from an 8MB to 32MB LRUCache, which increases the default number of cache shards from 16 to 64. This change is intended to minimize cache mutex contention under stress conditions. See https://github.com/facebook/rocksdb/wiki/Block-Cache for more information. -* For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now trivially moves levels down to fill LSM starting from bottommost level during DB open. See more in comments for option `level_compaction_dynamic_level_bytes` (#11321). -* User-provided `ReadOptions` take effect for more reads of non-`CacheEntryRole::kDataBlock` blocks. -* For level compaction with `level_compaction_dynamic_level_bytes=true`, RocksDB now drains unnecessary levels through background compaction automatically (#11340). This together with #11321 makes it automatic to migrate other compaction settings to level compaction with `level_compaction_dynamic_level_bytes=true`. In addition, a live DB that becomes smaller will now have unnecessary levels drained which can help to reduce read and space amp. -* If `CompactRange()` is called with `CompactRangeOptions::bottommost_level_compaction=kForce*` to compact from L0 to L1, RocksDB now will try to do trivial move from L0 to L1 and then do an intra L1 compaction, instead of a L0 to L1 compaction with trivial move disabled (#11375)). - -### Bug Fixes -* In the DB::VerifyFileChecksums API, ensure that file system reads of SST files are equal to the readahead_size in ReadOptions, if specified. Previously, each read was 2x the readahead_size. -* In block cache tracing, fixed some cases of bad hit/miss information (and more) with MultiGet. - -### New Features -* Add experimental `PerfContext` counters `iter_{next|prev|seek}_count` for db iterator, each counting the times of corresponding API being called. -* Allow runtime changes to whether `WriteBufferManager` allows stall or not by calling `SetAllowStall()` -* Added statistics tickers BYTES_COMPRESSED_FROM, BYTES_COMPRESSED_TO, BYTES_COMPRESSION_BYPASSED, BYTES_COMPRESSION_REJECTED, NUMBER_BLOCK_COMPRESSION_BYPASSED, and NUMBER_BLOCK_COMPRESSION_REJECTED. Disabled/deprecated histograms BYTES_COMPRESSED and BYTES_DECOMPRESSED, and ticker NUMBER_BLOCK_NOT_COMPRESSED. The new tickers offer more inight into compression ratios, rejected vs. disabled compression, etc. (#11388) -* New statistics `rocksdb.file.read.{flush|compaction}.micros` that measure read time of block-based SST tables or blob files during flush or compaction. - -## 8.1.0 (03/18/2023) -### Behavior changes -* Compaction output file cutting logic now considers range tombstone start keys. For example, SST partitioner now may receive ParitionRequest for range tombstone start keys. -* If the async_io ReadOption is specified for MultiGet or NewIterator on a platform that doesn't support IO uring, the option is ignored and synchronous IO is used. - -### Bug Fixes -* Fixed an issue for backward iteration when user defined timestamp is enabled in combination with BlobDB. -* Fixed a couple of cases where a Merge operand encountered during iteration wasn't reflected in the `internal_merge_count` PerfContext counter. -* Fixed a bug in CreateColumnFamilyWithImport()/ExportColumnFamily() which did not support range tombstones (#11252). -* Fixed a bug where an excluded column family from an atomic flush contains unflushed data that should've been included in this atomic flush (i.e, data of seqno less than the max seqno of this atomic flush), leading to potential data loss in this excluded column family when `WriteOptions::disableWAL == true` (#11148). - -### New Features -* Add statistics rocksdb.secondary.cache.filter.hits, rocksdb.secondary.cache.index.hits, and rocksdb.secondary.cache.filter.hits -* Added a new PerfContext counter `internal_merge_point_lookup_count` which tracks the number of Merge operands applied while serving point lookup queries. -* Add new statistics rocksdb.table.open.prefetch.tail.read.bytes, rocksdb.table.open.prefetch.tail.{miss|hit} -* Add support for SecondaryCache with HyperClockCache (`HyperClockCacheOptions` inherits `secondary_cache` option from `ShardedCacheOptions`) -* Add new db properties `rocksdb.cf-write-stall-stats`, `rocksdb.db-write-stall-stats`and APIs to examine them in a structured way. In particular, users of `GetMapProperty()` with property `kCFWriteStallStats`/`kDBWriteStallStats` can now use the functions in `WriteStallStatsMapKeys` to find stats in the map. - -### Public API Changes -* Changed various functions and features in `Cache` that are mostly relevant to custom implementations or wrappers. Especially, asychronous lookup functionality is moved from `Lookup()` to a new `StartAsyncLookup()` function. - -## 8.0.0 (02/19/2023) -### Behavior changes -* `ReadOptions::verify_checksums=false` disables checksum verification for more reads of non-`CacheEntryRole::kDataBlock` blocks. -* In case of scan with async_io enabled, if posix doesn't support IOUring, Status::NotSupported error will be returned to the users. Initially that error was swallowed and reads were switched to synchronous reads. - -### Bug Fixes -* Fixed a data race on `ColumnFamilyData::flush_reason` caused by concurrent flushes. -* Fixed an issue in `Get` and `MultiGet` when user-defined timestamps is enabled in combination with BlobDB. -* Fixed some atypical behaviors for `LockWAL()` such as allowing concurrent/recursive use and not expecting `UnlockWAL()` after non-OK result. See API comments. -* Fixed a feature interaction bug where for blobs `GetEntity` would expose the blob reference instead of the blob value. -* Fixed `DisableManualCompaction()` and `CompactRangeOptions::canceled` to cancel compactions even when they are waiting on conflicting compactions to finish -* Fixed a bug in which a successful `GetMergeOperands()` could transiently return `Status::MergeInProgress()` -* Return the correct error (Status::NotSupported()) to MultiGet caller when ReadOptions::async_io flag is true and IO uring is not enabled. Previously, Status::Corruption() was being returned when the actual failure was lack of async IO support. -* Fixed a bug in DB open/recovery from a compressed WAL that was caused due to incorrect handling of certain record fragments with the same offset within a WAL block. - -### Feature Removal -* Remove RocksDB Lite. -* The feature block_cache_compressed is removed. Statistics related to it are removed too. -* Remove deprecated Env::LoadEnv(). Use Env::CreateFromString() instead. -* Remove deprecated FileSystem::Load(). Use FileSystem::CreateFromString() instead. -* Removed the deprecated version of these utility functions and the corresponding Java bindings: `LoadOptionsFromFile`, `LoadLatestOptions`, `CheckOptionsCompatibility`. -* Remove the FactoryFunc from the LoadObject method from the Customizable helper methods. - -### Public API Changes -* Moved rarely-needed Cache class definition to new advanced_cache.h, and added a CacheWrapper class to advanced_cache.h. Minor changes to SimCache API definitions. -* Completely removed the following deprecated/obsolete statistics: the tickers `BLOCK_CACHE_INDEX_BYTES_EVICT`, `BLOCK_CACHE_FILTER_BYTES_EVICT`, `BLOOM_FILTER_MICROS`, `NO_FILE_CLOSES`, `STALL_L0_SLOWDOWN_MICROS`, `STALL_MEMTABLE_COMPACTION_MICROS`, `STALL_L0_NUM_FILES_MICROS`, `RATE_LIMIT_DELAY_MILLIS`, `NO_ITERATORS`, `NUMBER_FILTERED_DELETES`, `WRITE_TIMEDOUT`, `BLOB_DB_GC_NUM_KEYS_OVERWRITTEN`, `BLOB_DB_GC_NUM_KEYS_EXPIRED`, `BLOB_DB_GC_BYTES_OVERWRITTEN`, `BLOB_DB_GC_BYTES_EXPIRED`, `BLOCK_CACHE_COMPRESSION_DICT_BYTES_EVICT` as well as the histograms `STALL_L0_SLOWDOWN_COUNT`, `STALL_MEMTABLE_COMPACTION_COUNT`, `STALL_L0_NUM_FILES_COUNT`, `HARD_RATE_LIMIT_DELAY_COUNT`, `SOFT_RATE_LIMIT_DELAY_COUNT`, `BLOB_DB_GC_MICROS`, and `NUM_DATA_BLOCKS_READ_PER_LEVEL`. Note that as a result, the C++ enum values of the still supported statistics have changed. Developers are advised to not rely on the actual numeric values. -* Deprecated IngestExternalFileOptions::write_global_seqno and change default to false. This option only needs to be set to true to generate a DB compatible with RocksDB versions before 5.16.0. -* Remove deprecated APIs `GetColumnFamilyOptionsFrom{Map|String}(const ColumnFamilyOptions&, ..)`, `GetDBOptionsFrom{Map|String}(const DBOptions&, ..)`, `GetBlockBasedTableOptionsFrom{Map|String}(const BlockBasedTableOptions& table_options, ..)` and ` GetPlainTableOptionsFrom{Map|String}(const PlainTableOptions& table_options,..)`. -* Added a subcode of `Status::Corruption`, `Status::SubCode::kMergeOperatorFailed`, for users to identify corruption failures originating in the merge operator, as opposed to RocksDB's internally identified data corruptions - -### Build Changes -* The `make` build now builds a shared library by default instead of a static library. Use `LIB_MODE=static` to override. - -### New Features -* Compaction filters are now supported for wide-column entities by means of the `FilterV3` API. See the comment of the API for more details. -* Added `do_not_compress_roles` to `CompressedSecondaryCacheOptions` to disable compression on certain kinds of block. Filter blocks are now not compressed by CompressedSecondaryCache by default. -* Added a new `MultiGetEntity` API that enables batched wide-column point lookups. See the API comments for more details. - -## 7.10.0 (01/23/2023) -### Behavior changes -* Make best-efforts recovery verify SST unique ID before Version construction (#10962) -* Introduce `epoch_number` and sort L0 files by `epoch_number` instead of `largest_seqno`. `epoch_number` represents the order of a file being flushed or ingested/imported. Compaction output file will be assigned with the minimum `epoch_number` among input files'. For L0, larger `epoch_number` indicates newer L0 file. - -### Bug Fixes -* Fixed a regression in iterator where range tombstones after `iterate_upper_bound` is processed. -* Fixed a memory leak in MultiGet with async_io read option, caused by IO errors during table file open -* Fixed a bug that multi-level FIFO compaction deletes one file in non-L0 even when `CompactionOptionsFIFO::max_table_files_size` is no exceeded since #10348 or 7.8.0. -* Fixed a bug caused by `DB::SyncWAL()` affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#10892). -* Fixed a BackupEngine bug in which RestoreDBFromLatestBackup would fail if the latest backup was deleted and there is another valid backup available. -* Fix L0 file misorder corruption caused by ingesting files of overlapping seqnos with memtable entries' through introducing `epoch_number`. Before the fix, `force_consistency_checks=true` may catch the corruption before it's exposed to readers, in which case writes returning `Status::Corruption` would be expected. Also replace the previous incomplete fix (#5958) to the same corruption with this new and more complete fix. -* Fixed a bug in LockWAL() leading to re-locking mutex (#11020). -* Fixed a heap use after free bug in async scan prefetching when the scan thread and another thread try to read and load the same seek block into cache. -* Fixed a heap use after free in async scan prefetching if dictionary compression is enabled, in which case sync read of the compression dictionary gets mixed with async prefetching -* Fixed a data race bug of `CompactRange()` under `change_level=true` acts on overlapping range with an ongoing file ingestion for level compaction. This will either result in overlapping file ranges corruption at a certain level caught by `force_consistency_checks=true` or protentially two same keys both with seqno 0 in two different levels (i.e, new data ends up in lower/older level). The latter will be caught by assertion in debug build but go silently and result in read returning wrong result in release build. This fix is general so it also replaced previous fixes to a similar problem for `CompactFiles()` (#4665), general `CompactRange()` and auto compaction (commit 5c64fb6 and 87dfc1d). -* Fixed a bug in compaction output cutting where small output files were produced due to TTL file cutting states were not being updated (#11075). - -### New Features -* When an SstPartitionerFactory is configured, CompactRange() now automatically selects for compaction any files overlapping a partition boundary that is in the compaction range, even if no actual entries are in the requested compaction range. With this feature, manual compaction can be used to (re-)establish SST partition points when SstPartitioner changes, without a full compaction. -* Add BackupEngine feature to exclude files from backup that are known to be backed up elsewhere, using `CreateBackupOptions::exclude_files_callback`. To restore the DB, the excluded files must be provided in alternative backup directories using `RestoreOptions::alternate_dirs`. - -### Public API Changes -* Substantial changes have been made to the Cache class to support internal development goals. Direct use of Cache class members is discouraged and further breaking modifications are expected in the future. SecondaryCache has some related changes and implementations will need to be updated. (Unlike Cache, SecondaryCache is still intended to support user implementations, and disruptive changes will be avoided.) (#10975) -* Add `MergeOperationOutput::op_failure_scope` for merge operator users to control the blast radius of merge operator failures. Existing merge operator users do not need to make any change to preserve the old behavior - -### Performance Improvements -* Updated xxHash source code, which should improve kXXH3 checksum speed, at least on ARM (#11098). -* Improved CPU efficiency of DB reads, from block cache access improvements (#10975). - -## 7.9.0 (11/21/2022) -### Performance Improvements -* Fixed an iterator performance regression for delete range users when scanning through a consecutive sequence of range tombstones (#10877). - -### Bug Fixes -* Fix memory corruption error in scans if async_io is enabled. Memory corruption happened if there is IOError while reading the data leading to empty buffer and other buffer already in progress of async read goes again for reading. -* Fix failed memtable flush retry bug that could cause wrongly ordered updates, which would surface to writers as `Status::Corruption` in case of `force_consistency_checks=true` (default). It affects use cases that enable both parallel flush (`max_background_flushes > 1` or `max_background_jobs >= 8`) and non-default memtable count (`max_write_buffer_number > 2`). -* Fixed an issue where the `READ_NUM_MERGE_OPERANDS` ticker was not updated when the base key-value or tombstone was read from an SST file. -* Fixed a memory safety bug when using a SecondaryCache with `block_cache_compressed`. `block_cache_compressed` no longer attempts to use SecondaryCache features. -* Fixed a regression in scan for async_io. During seek, valid buffers were getting cleared causing a regression. -* Tiered Storage: fixed excessive keys written to penultimate level in non-debug builds. - -### New Features -* Add basic support for user-defined timestamp to Merge (#10819). -* Add stats for ReadAsync time spent and async read errors. -* Basic support for the wide-column data model is now available. Wide-column entities can be stored using the `PutEntity` API, and retrieved using `GetEntity` and the new `columns` API of iterator. For compatibility, the classic APIs `Get` and `MultiGet`, as well as iterator's `value` API return the value of the anonymous default column of wide-column entities; also, `GetEntity` and iterator's `columns` return any plain key-values in the form of an entity which only has the anonymous default column. `Merge` (and `GetMergeOperands`) currently also apply to the default column; any other columns of entities are unaffected by `Merge` operations. Note that some features like compaction filters, transactions, user-defined timestamps, and the SST file writer do not yet support wide-column entities; also, there is currently no `MultiGet`-like API to retrieve multiple entities at once. We plan to gradually close the above gaps and also implement new features like column-level operations (e.g. updating or querying only certain columns of an entity). -* Marked HyperClockCache as a production-ready alternative to LRUCache for the block cache. HyperClockCache greatly improves hot-path CPU efficiency under high parallel load or high contention, with some documented caveats and limitations. As much as 4.5x higher ops/sec vs. LRUCache has been seen in db_bench under high parallel load. -* Add periodic diagnostics to info_log (LOG file) for HyperClockCache block cache if performance is degraded by bad `estimated_entry_charge` option. - -### Public API Changes -* Marked `block_cache_compressed` as a deprecated feature. Use SecondaryCache instead. -* Added a `SecondaryCache::InsertSaved()` API, with default implementation depending on `Insert()`. Some implementations might need to add a custom implementation of `InsertSaved()`. (Details in API comments.) - -## 7.8.0 (10/22/2022) -### New Features -* `DeleteRange()` now supports user-defined timestamp. -* Provide support for async_io with tailing iterators when ReadOptions.tailing is enabled during scans. -* Tiered Storage: allow data moving up from the last level to the penultimate level if the input level is penultimate level or above. -* Added `DB::Properties::kFastBlockCacheEntryStats`, which is similar to `DB::Properties::kBlockCacheEntryStats`, except returns cached (stale) values in more cases to reduce overhead. -* FIFO compaction now supports migrating from a multi-level DB via DB::Open(). During the migration phase, FIFO compaction picker will: -* picks the sst file with the smallest starting key in the bottom-most non-empty level. -* Note that during the migration phase, the file purge order will only be an approximation of "FIFO" as files in lower-level might sometime contain newer keys than files in upper-level. -* Added an option `ignore_max_compaction_bytes_for_input` to ignore max_compaction_bytes limit when adding files to be compacted from input level. This should help reduce write amplification. The option is enabled by default. -* Tiered Storage: allow data moving up from the last level even if it's a last level only compaction, as long as the penultimate level is empty. -* Add a new option IOOptions.do_not_recurse that can be used by underlying file systems to skip recursing through sub directories and list only files in GetChildren API. -* Add option `preserve_internal_time_seconds` to preserve the time information for the latest data. Which can be used to determine the age of data when `preclude_last_level_data_seconds` is enabled. The time information is attached with SST in table property `rocksdb.seqno.time.map` which can be parsed by tool ldb or sst_dump. - -### Bug Fixes -* Fix a bug in io_uring_prep_cancel in AbortIO API for posix which expects sqe->addr to match with read request submitted and wrong paramter was being passed. -* Fixed a regression in iterator performance when the entire DB is a single memtable introduced in #10449. The fix is in #10705 and #10716. -* Fixed an optimistic transaction validation bug caused by DBImpl::GetLatestSequenceForKey() returning non-latest seq for merge (#10724). -* Fixed a bug in iterator refresh which could segfault for DeleteRange users (#10739). -* Fixed a bug causing manual flush with `flush_opts.wait=false` to stall when database has stopped all writes (#10001). -* Fixed a bug in iterator refresh that was not freeing up SuperVersion, which could cause excessive resource pinniung (#10770). -* Fixed a bug where RocksDB could be doing compaction endlessly when allow_ingest_behind is true and the bottommost level is not filled (#10767). -* Fixed a memory safety bug in experimental HyperClockCache (#10768) -* Fixed some cases where `ldb update_manifest` and `ldb unsafe_remove_sst_file` are not usable because they were requiring the DB files to match the existing manifest state (before updating the manifest to match a desired state). - -### Performance Improvements -* Try to align the compaction output file boundaries to the next level ones, which can reduce more than 10% compaction load for the default level compaction. The feature is enabled by default, to disable, set `AdvancedColumnFamilyOptions.level_compaction_dynamic_file_size` to false. As a side effect, it can create SSTs larger than the target_file_size (capped at 2x target_file_size) or smaller files. -* Improve RoundRobin TTL compaction, which is going to be the same as normal RoundRobin compaction to move the compaction cursor. -* Fix a small CPU regression caused by a change that UserComparatorWrapper was made Customizable, because Customizable itself has small CPU overhead for initialization. - -### Behavior Changes -* Sanitize min_write_buffer_number_to_merge to 1 if atomic flush is enabled to prevent unexpected data loss when WAL is disabled in a multi-column-family setting (#10773). -* With periodic stat dumper waits up every options.stats_dump_period_sec seconds, it won't dump stats for a CF if it has no change in the period, unless 7 periods have been skipped. -* Only periodic stats dumper triggered by options.stats_dump_period_sec will update stats interval. Ones triggered by DB::GetProperty() will not update stats interval and will report based on an interval since the last time stats dump period. - -### Public API changes -* Make kXXH3 checksum the new default, because it is faster on common hardware, especially with kCRC32c affected by a performance bug in some versions of clang (https://github.com/facebook/rocksdb/issues/9891). DBs written with this new setting can be read by RocksDB 6.27 and newer. -* Refactor the classes, APIs and data structures for block cache tracing to allow a user provided trace writer to be used. Introduced an abstract BlockCacheTraceWriter class that takes a structured BlockCacheTraceRecord. The BlockCacheTraceWriter implementation can then format and log the record in whatever way it sees fit. The default BlockCacheTraceWriterImpl does file tracing using a user provided TraceWriter. More details in rocksdb/includb/block_cache_trace_writer.h. - -## 7.7.0 (09/18/2022) -### Bug Fixes -* Fixed a hang when an operation such as `GetLiveFiles` or `CreateNewBackup` is asked to trigger and wait for memtable flush on a read-only DB. Such indirect requests for memtable flush are now ignored on a read-only DB. -* Fixed bug where `FlushWAL(true /* sync */)` (used by `GetLiveFilesStorageInfo()`, which is used by checkpoint and backup) could cause parallel writes at the tail of a WAL file to never be synced. -* Fix periodic_task unable to re-register the same task type, which may cause `SetOptions()` fail to update periodical_task time like: `stats_dump_period_sec`, `stats_persist_period_sec`. -* Fixed a bug in the rocksdb.prefetched.bytes.discarded stat. It was counting the prefetch buffer size, rather than the actual number of bytes discarded from the buffer. -* Fix bug where the directory containing CURRENT can left unsynced after CURRENT is updated to point to the latest MANIFEST, which leads to risk of unsync data loss of CURRENT. -* Update rocksdb.multiget.io.batch.size stat in non-async MultiGet as well. -* Fix a bug in key range overlap checking with concurrent compactions when user-defined timestamp is enabled. User-defined timestamps should be EXCLUDED when checking if two ranges overlap. -* Fixed a bug where the blob cache prepopulating logic did not consider the secondary cache (see #10603). -* Fixed the rocksdb.num.sst.read.per.level, rocksdb.num.index.and.filter.blocks.read.per.level and rocksdb.num.level.read.per.multiget stats in the MultiGet coroutines - -### Public API changes -* Add `rocksdb_column_family_handle_get_id`, `rocksdb_column_family_handle_get_name` to get name, id of column family in C API -* Add a new stat rocksdb.async.prefetch.abort.micros to measure time spent waiting for async prefetch reads to abort - -### Java API Changes -* Add CompactionPriority.RoundRobin. -* Revert to using the default metadata charge policy when creating an LRU cache via the Java API. - -### Behavior Change -* DBOptions::verify_sst_unique_id_in_manifest is now an on-by-default feature that verifies SST file identity whenever they are opened by a DB, rather than only at DB::Open time. -* Right now, when the option migration tool (OptionChangeMigration()) migrates to FIFO compaction, it compacts all the data into one single SST file and move to L0. This might create a problem for some users: the giant file may be soon deleted to satisfy max_table_files_size, and might cayse the DB to be almost empty. We change the behavior so that the files are cut to be smaller, but these files might not follow the data insertion order. With the change, after the migration, migrated data might not be dropped by insertion order by FIFO compaction. -* When a block is firstly found from `CompressedSecondaryCache`, we just insert a dummy block into the primary cache and don’t erase the block from `CompressedSecondaryCache`. A standalone handle is returned to the caller. Only if the block is found again from `CompressedSecondaryCache` before the dummy block is evicted, we erase the block from `CompressedSecondaryCache` and insert it into the primary cache. -* When a block is firstly evicted from the primary cache to `CompressedSecondaryCache`, we just insert a dummy block in `CompressedSecondaryCache`. Only if it is evicted again before the dummy block is evicted from the cache, it is treated as a hot block and is inserted into `CompressedSecondaryCache`. -* Improved the estimation of memory used by cached blobs by taking into account the size of the object owning the blob value and also the allocator overhead if `malloc_usable_size` is available (see #10583). -* Blob values now have their own category in the cache occupancy statistics, as opposed to being lumped into the "Misc" bucket (see #10601). -* Change the optimize_multiget_for_io experimental ReadOptions flag to default on. - -### New Features -* RocksDB does internal auto prefetching if it notices 2 sequential reads if readahead_size is not specified. New option `num_file_reads_for_auto_readahead` is added in BlockBasedTableOptions which indicates after how many sequential reads internal auto prefetching should be start (default is 2). -* Added new perf context counters `block_cache_standalone_handle_count`, `block_cache_real_handle_count`,`compressed_sec_cache_insert_real_count`, `compressed_sec_cache_insert_dummy_count`, `compressed_sec_cache_uncompressed_bytes`, and `compressed_sec_cache_compressed_bytes`. -* Memory for blobs which are to be inserted into the blob cache is now allocated using the cache's allocator (see #10628 and #10647). -* HyperClockCache is an experimental, lock-free Cache alternative for block cache that offers much improved CPU efficiency under high parallel load or high contention, with some caveats. As much as 4.5x higher ops/sec vs. LRUCache has been seen in db_bench under high parallel load. -* `CompressedSecondaryCacheOptions::enable_custom_split_merge` is added for enabling the custom split and merge feature, which split the compressed value into chunks so that they may better fit jemalloc bins. - -### Performance Improvements -* Iterator performance is improved for `DeleteRange()` users. Internally, iterator will skip to the end of a range tombstone when possible, instead of looping through each key and check individually if a key is range deleted. -* Eliminated some allocations and copies in the blob read path. Also, `PinnableSlice` now only points to the blob value and pins the backing resource (cache entry or buffer) in all cases, instead of containing a copy of the blob value. See #10625 and #10647. -* In case of scans with async_io enabled, few optimizations have been added to issue more asynchronous requests in parallel in order to avoid synchronous prefetching. -* `DeleteRange()` users should see improvement in get/iterator performance from mutable memtable (see #10547). - -## 7.6.0 (08/19/2022) -### New Features -* Added `prepopulate_blob_cache` to ColumnFamilyOptions. If enabled, prepopulate warm/hot blobs which are already in memory into blob cache at the time of flush. On a flush, the blob that is in memory (in memtables) get flushed to the device. If using Direct IO, additional IO is incurred to read this blob back into memory again, which is avoided by enabling this option. This further helps if the workload exhibits high temporal locality, where most of the reads go to recently written data. This also helps in case of the remote file system since it involves network traffic and higher latencies. -* Support using secondary cache with the blob cache. When creating a blob cache, the user can set a secondary blob cache by configuring `secondary_cache` in LRUCacheOptions. -* Charge memory usage of blob cache when the backing cache of the blob cache and the block cache are different. If an operation reserving memory for blob cache exceeds the avaible space left in the block cache at some point (i.e, causing a cache full under `LRUCacheOptions::strict_capacity_limit` = true), creation will fail with `Status::MemoryLimit()`. To opt in this feature, enable charging `CacheEntryRole::kBlobCache` in `BlockBasedTableOptions::cache_usage_options`. -* Improve subcompaction range partition so that it is likely to be more even. More evenly distribution of subcompaction will improve compaction throughput for some workloads. All input files' index blocks to sample some anchor key points from which we pick positions to partition the input range. This would introduce some CPU overhead in compaction preparation phase, if subcompaction is enabled, but it should be a small fraction of the CPU usage of the whole compaction process. This also brings a behavier change: subcompaction number is much more likely to maxed out than before. -* Add CompactionPri::kRoundRobin, a compaction picking mode that cycles through all the files with a compact cursor in a round-robin manner. This feature is available since 7.5. -* Provide support for subcompactions for user_defined_timestamp. -* Added an option `memtable_protection_bytes_per_key` that turns on memtable per key-value checksum protection. Each memtable entry will be suffixed by a checksum that is computed during writes, and verified in reads/compaction. Detected corruption will be logged and with corruption status returned to user. -* Added a blob-specific cache priority level - bottom level. Blobs are typically lower-value targets for caching than data blocks, since 1) with BlobDB, data blocks containing blob references conceptually form an index structure which has to be consulted before we can read the blob value, and 2) cached blobs represent only a single key-value, while cached data blocks generally contain multiple KVs. The user can specify the new option `low_pri_pool_ratio` in `LRUCacheOptions` to configure the ratio of capacity reserved for low priority cache entries (and therefore the remaining ratio is the space reserved for the bottom level), or configuring the new argument `low_pri_pool_ratio` in `NewLRUCache()` to achieve the same effect. - -### Public API changes -* Removed Customizable support for RateLimiter and removed its CreateFromString() and Type() functions. -* `CompactRangeOptions::exclusive_manual_compaction` is now false by default. This ensures RocksDB does not introduce artificial parallelism limitations by default. -* Tiered Storage: change `bottommost_temperture` to `last_level_temperture`. The old option name is kept only for migration, please use the new option. The behavior is changed to apply temperature for the `last_level` SST files only. -* Added a new experimental ReadOption flag called optimize_multiget_for_io, which when set attempts to reduce MultiGet latency by spawning coroutines for keys in multiple levels. - -### Bug Fixes -* Fix a bug starting in 7.4.0 in which some fsync operations might be skipped in a DB after any DropColumnFamily on that DB, until it is re-opened. This can lead to data loss on power loss. (For custom FileSystem implementations, this could lead to `FSDirectory::Fsync` or `FSDirectory::Close` after the first `FSDirectory::Close`; Also, valgrind could report call to `close()` with `fd=-1`.) -* Fix a bug where `GenericRateLimiter` could revert the bandwidth set dynamically using `SetBytesPerSecond()` when a user configures a structure enclosing it, e.g., using `GetOptionsFromString()` to configure an `Options` that references an existing `RateLimiter` object. -* Fix race conditions in `GenericRateLimiter`. -* Fix a bug in `FIFOCompactionPicker::PickTTLCompaction` where total_size calculating might cause underflow -* Fix data race bug in hash linked list memtable. With this bug, read request might temporarily miss an old record in the memtable in a race condition to the hash bucket. -* Fix a bug that `best_efforts_recovery` may fail to open the db with mmap read. -* Fixed a bug where blobs read during compaction would pollute the cache. -* Fixed a data race in LRUCache when used with a secondary_cache. -* Fixed a bug where blobs read by iterators would be inserted into the cache even with the `fill_cache` read option set to false. -* Fixed the segfault caused by `AllocateData()` in `CompressedSecondaryCache::SplitValueIntoChunks()` and `MergeChunksIntoValueTest`. -* Fixed a bug in BlobDB where a mix of inlined and blob values could result in an incorrect value being passed to the compaction filter (see #10391). -* Fixed a memory leak bug in stress tests caused by `FaultInjectionSecondaryCache`. - -### Behavior Change -* Added checksum handshake during the copying of decompressed WAL fragment. This together with #9875, #10037, #10212, #10114 and #10319 provides end-to-end integrity protection for write batch during recovery. -* To minimize the internal fragmentation caused by the variable size of the compressed blocks in `CompressedSecondaryCache`, the original block is split according to the jemalloc bin size in `Insert()` and then merged back in `Lookup()`. -* PosixLogger is removed and by default EnvLogger will be used for info logging. The behavior of the two loggers should be very similar when using the default Posix Env. -* Remove [min|max]_timestamp from VersionEdit for now since they are not tracked in MANIFEST anyway but consume two empty std::string (up to 64 bytes) for each file. Should they be added back in the future, we should store them more compactly. -* Improve universal tiered storage compaction picker to avoid extra major compaction triggered by size amplification. If `preclude_last_level_data_seconds` is enabled, the size amplification is calculated within non last_level data only which skip the last level and use the penultimate level as the size base. -* If an error is hit when writing to a file (append, sync, etc), RocksDB is more strict with not issuing more operations to it, except closing the file, with exceptions of some WAL file operations in error recovery path. -* A `WriteBufferManager` constructed with `allow_stall == false` will no longer trigger write stall implicitly by thrashing until memtable count limit is reached. Instead, a column family can continue accumulating writes while that CF is flushing, which means memory may increase. Users who prefer stalling writes must now explicitly set `allow_stall == true`. -* Add `CompressedSecondaryCache` into the stress tests. -* Block cache keys have changed, which will cause any persistent caches to miss between versions. - -### Performance Improvements -* Instead of constructing `FragmentedRangeTombstoneList` during every read operation, it is now constructed once and stored in immutable memtables. This improves speed of querying range tombstones from immutable memtables. -* When using iterators with the integrated BlobDB implementation, blob cache handles are now released immediately when the iterator's position changes. -* MultiGet can now do more IO in parallel by reading data blocks from SST files in multiple levels, if the optimize_multiget_for_io ReadOption flag is set. - -## 7.5.0 (07/15/2022) -### New Features -* Mempurge option flag `experimental_mempurge_threshold` is now a ColumnFamilyOptions and can now be dynamically configured using `SetOptions()`. -* Support backward iteration when `ReadOptions::iter_start_ts` is set. -* Provide support for ReadOptions.async_io with direct_io to improve Seek latency by using async IO to parallelize child iterator seek and doing asynchronous prefetching on sequential scans. -* Added support for blob caching in order to cache frequently used blobs for BlobDB. - * User can configure the new ColumnFamilyOptions `blob_cache` to enable/disable blob caching. - * Either sharing the backend cache with the block cache or using a completely separate cache is supported. - * A new abstraction interface called `BlobSource` for blob read logic gives all users access to blobs, whether they are in the blob cache, secondary cache, or (remote) storage. Blobs can be potentially read both while handling user reads (`Get`, `MultiGet`, or iterator) and during compaction (while dealing with compaction filters, Merges, or garbage collection) but eventually all blob reads go through `Version::GetBlob` or, for MultiGet, `Version::MultiGetBlob` (and then get dispatched to the interface -- `BlobSource`). -* Add experimental tiered compaction feature `AdvancedColumnFamilyOptions::preclude_last_level_data_seconds`, which makes sure the new data inserted within preclude_last_level_data_seconds won't be placed on cold tier (the feature is not complete). - -### Public API changes -* Add metadata related structs and functions in C API, including - * `rocksdb_get_column_family_metadata()` and `rocksdb_get_column_family_metadata_cf()` to obtain `rocksdb_column_family_metadata_t`. - * `rocksdb_column_family_metadata_t` and its get functions & destroy function. - * `rocksdb_level_metadata_t` and its and its get functions & destroy function. - * `rocksdb_file_metadata_t` and its and get functions & destroy functions. -* Add suggest_compact_range() and suggest_compact_range_cf() to C API. -* When using block cache strict capacity limit (`LRUCache` with `strict_capacity_limit=true`), DB operations now fail with Status code `kAborted` subcode `kMemoryLimit` (`IsMemoryLimit()`) instead of `kIncomplete` (`IsIncomplete()`) when the capacity limit is reached, because Incomplete can mean other specific things for some operations. In more detail, `Cache::Insert()` now returns the updated Status code and this usually propagates through RocksDB to the user on failure. -* NewClockCache calls temporarily return an LRUCache (with similar characteristics as the desired ClockCache). This is because ClockCache is being replaced by a new version (the old one had unknown bugs) but this is still under development. -* Add two functions `int ReserveThreads(int threads_to_be_reserved)` and `int ReleaseThreads(threads_to_be_released)` into `Env` class. In the default implementation, both return 0. Newly added `xxxEnv` class that inherits `Env` should implement these two functions for thread reservation/releasing features. -* Add `rocksdb_options_get_prepopulate_blob_cache` and `rocksdb_options_set_prepopulate_blob_cache` to C API. -* Add `prepopulateBlobCache` and `setPrepopulateBlobCache` to Java API. - -### Bug Fixes -* Fix a bug in which backup/checkpoint can include a WAL deleted by RocksDB. -* Fix a bug where concurrent compactions might cause unnecessary further write stalling. In some cases, this might cause write rate to drop to minimum. -* Fix a bug in Logger where if dbname and db_log_dir are on different filesystems, dbname creation would fail wrt to db_log_dir path returning an error and fails to open the DB. -* Fix a CPU and memory efficiency issue introduce by https://github.com/facebook/rocksdb/pull/8336 which made InternalKeyComparator configurable as an unintended side effect. - -## Behavior Change -* In leveled compaction with dynamic levelling, level multiplier is not anymore adjusted due to oversized L0. Instead, compaction score is adjusted by increasing size level target by adding incoming bytes from upper levels. This would deprioritize compactions from upper levels if more data from L0 is coming. This is to fix some unnecessary full stalling due to drastic change of level targets, while not wasting write bandwidth for compaction while writes are overloaded. -* For track_and_verify_wals_in_manifest, revert to the original behavior before #10087: syncing of live WAL file is not tracked, and we track only the synced sizes of **closed** WALs. (PR #10330). -* WAL compression now computes/verifies checksum during compression/decompression. - -### Performance Improvements -* Rather than doing total sort against all files in a level, SortFileByOverlappingRatio() to only find the top 50 files based on score. This can improve write throughput for the use cases where data is loaded in increasing key order and there are a lot of files in one LSM-tree, where applying compaction results is the bottleneck. -* In leveled compaction, L0->L1 trivial move will allow more than one file to be moved in one compaction. This would allow L0 files to be moved down faster when data is loaded in sequential order, making slowdown or stop condition harder to hit. Also seek L0->L1 trivial move when only some files qualify. -* In leveled compaction, try to trivial move more than one files if possible, up to 4 files or max_compaction_bytes. This is to allow higher write throughput for some use cases where data is loaded in sequential order, where appying compaction results is the bottleneck. - -## 7.4.0 (06/19/2022) -### Bug Fixes -* Fixed a bug in calculating key-value integrity protection for users of in-place memtable updates. In particular, the affected users would be those who configure `protection_bytes_per_key > 0` on `WriteBatch` or `WriteOptions`, and configure `inplace_callback != nullptr`. -* Fixed a bug where a snapshot taken during SST file ingestion would be unstable. -* Fixed a bug for non-TransactionDB with avoid_flush_during_recovery = true and TransactionDB where in case of crash, min_log_number_to_keep may not change on recovery and persisting a new MANIFEST with advanced log_numbers for some column families, results in "column family inconsistency" error on second recovery. As a solution, RocksDB will persist the new MANIFEST after successfully syncing the new WAL. If a future recovery starts from the new MANIFEST, then it means the new WAL is successfully synced. Due to the sentinel empty write batch at the beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point. If future recovery starts from the old MANIFEST, it means the writing the new MANIFEST failed. We won't have the "SST ahead of WAL" error. -* Fixed a bug where RocksDB DB::Open() may creates and writes to two new MANIFEST files even before recovery succeeds. Now writes to MANIFEST are persisted only after recovery is successful. -* Fix a race condition in WAL size tracking which is caused by an unsafe iterator access after container is changed. -* Fix unprotected concurrent accesses to `WritableFileWriter::filesize_` by `DB::SyncWAL()` and `DB::Put()` in two write queue mode. -* Fix a bug in WAL tracking. Before this PR (#10087), calling `SyncWAL()` on the only WAL file of the db will not log the event in MANIFEST, thus allowing a subsequent `DB::Open` even if the WAL file is missing or corrupted. -* Fix a bug that could return wrong results with `index_type=kHashSearch` and using `SetOptions` to change the `prefix_extractor`. -* Fixed a bug in WAL tracking with wal_compression. WAL compression writes a kSetCompressionType record which is not associated with any sequence number. As result, WalManager::GetSortedWalsOfType() will skip these WALs and not return them to caller, e.g. Checkpoint, Backup, causing the operations to fail. -* Avoid a crash if the IDENTITY file is accidentally truncated to empty. A new DB ID will be written and generated on Open. -* Fixed a possible corruption for users of `manual_wal_flush` and/or `FlushWAL(true /* sync */)`, together with `track_and_verify_wals_in_manifest == true`. For those users, losing unsynced data (e.g., due to power loss) could make future DB opens fail with a `Status::Corruption` complaining about missing WAL data. -* Fixed a bug in `WriteBatchInternal::Append()` where WAL termination point in write batch was not considered and the function appends an incorrect number of checksums. -* Fixed a crash bug introduced in 7.3.0 affecting users of MultiGet with `kDataBlockBinaryAndHash`. - -### Public API changes -* Add new API GetUnixTime in Snapshot class which returns the unix time at which Snapshot is taken. -* Add transaction `get_pinned` and `multi_get` to C API. -* Add two-phase commit support to C API. -* Add `rocksdb_transaction_get_writebatch_wi` and `rocksdb_transaction_rebuild_from_writebatch` to C API. -* Add `rocksdb_options_get_blob_file_starting_level` and `rocksdb_options_set_blob_file_starting_level` to C API. -* Add `blobFileStartingLevel` and `setBlobFileStartingLevel` to Java API. -* Add SingleDelete for DB in C API -* Add User Defined Timestamp in C API. - * `rocksdb_comparator_with_ts_create` to create timestamp aware comparator - * Put, Get, Delete, SingleDelete, MultiGet APIs has corresponding timestamp aware APIs with suffix `with_ts` - * And Add C API's for Transaction, SstFileWriter, Compaction as mentioned [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-(Experimental)) -* The contract for implementations of Comparator::IsSameLengthImmediateSuccessor has been updated to work around a design bug in `auto_prefix_mode`. -* The API documentation for `auto_prefix_mode` now notes some corner cases in which it returns different results than `total_order_seek`, due to design bugs that are not easily fixed. Users using built-in comparators and keys at least the size of a fixed prefix length are not affected. -* Obsoleted the NUM_DATA_BLOCKS_READ_PER_LEVEL stat and introduced the NUM_LEVEL_READ_PER_MULTIGET and MULTIGET_COROUTINE_COUNT stats -* Introduced `WriteOptions::protection_bytes_per_key`, which can be used to enable key-value integrity protection for live updates. - -### New Features -* Add FileSystem::ReadAsync API in io_tracing -* Add blob garbage collection parameters `blob_garbage_collection_policy` and `blob_garbage_collection_age_cutoff` to both force-enable and force-disable GC, as well as selectively override age cutoff when using CompactRange. -* Add an extra sanity check in `GetSortedWalFiles()` (also used by `GetLiveFilesStorageInfo()`, `BackupEngine`, and `Checkpoint`) to reduce risk of successfully created backup or checkpoint failing to open because of missing WAL file. -* Add a new column family option `blob_file_starting_level` to enable writing blob files during flushes and compactions starting from the specified LSM tree level. -* Add support for timestamped snapshots (#9879) -* Provide support for AbortIO in posix to cancel submitted asynchronous requests using io_uring. -* Add support for rate-limiting batched `MultiGet()` APIs -* Added several new tickers, perf context statistics, and DB properties to BlobDB - * Added new DB properties "rocksdb.blob-cache-capacity", "rocksdb.blob-cache-usage", "rocksdb.blob-cache-pinned-usage" to show blob cache usage. - * Added new perf context statistics `blob_cache_hit_count`, `blob_read_count`, `blob_read_byte`, `blob_read_time`, `blob_checksum_time` and `blob_decompress_time`. - * Added new tickers `BLOB_DB_CACHE_MISS`, `BLOB_DB_CACHE_HIT`, `BLOB_DB_CACHE_ADD`, `BLOB_DB_CACHE_ADD_FAILURES`, `BLOB_DB_CACHE_BYTES_READ` and `BLOB_DB_CACHE_BYTES_WRITE`. - -### Behavior changes -* DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984) -* DB::Write does not hold global `mutex_` if this db instance does not need to switch wal and mem-table (#7516). -* Removed support for reading Bloom filters using obsolete block-based filter format. (Support for writing such filters was dropped in 7.0.) For good read performance on old DBs using these filters, a full compaction is required. -* Per KV checksum in write batch is verified before a write batch is written to WAL to detect any corruption to the write batch (#10114). - -### Performance Improvements -* When compiled with folly (Meta-internal integration; experimental in open source build), improve the locking performance (CPU efficiency) of LRUCache by using folly DistributedMutex in place of standard mutex. - -## 7.3.0 (05/20/2022) -### Bug Fixes -* Fixed a bug where manual flush would block forever even though flush options had wait=false. -* Fixed a bug where RocksDB could corrupt DBs with `avoid_flush_during_recovery == true` by removing valid WALs, leading to `Status::Corruption` with message like "SST file is ahead of WALs" when attempting to reopen. -* Fixed a bug in async_io path where incorrect length of data is read by FilePrefetchBuffer if data is consumed from two populated buffers and request for more data is sent. -* Fixed a CompactionFilter bug. Compaction filter used to use `Delete` to remove keys, even if the keys should be removed with `SingleDelete`. Mixing `Delete` and `SingleDelete` may cause undefined behavior. -* Fixed a bug in `WritableFileWriter::WriteDirect` and `WritableFileWriter::WriteDirectWithChecksum`. The rate_limiter_priority specified in ReadOptions was not passed to the RateLimiter when requesting a token. -* Fixed a bug which might cause process crash when I/O error happens when reading an index block in MultiGet(). - -### New Features -* DB::GetLiveFilesStorageInfo is ready for production use. -* Add new stats PREFETCHED_BYTES_DISCARDED which records number of prefetched bytes discarded by RocksDB FilePrefetchBuffer on destruction and POLL_WAIT_MICROS records wait time for FS::Poll API completion. -* RemoteCompaction supports table_properties_collector_factories override on compaction worker. -* Start tracking SST unique id in MANIFEST, which will be used to verify with SST properties during DB open to make sure the SST file is not overwritten or misplaced. A db option `verify_sst_unique_id_in_manifest` is introduced to enable/disable the verification, if enabled all SST files will be opened during DB-open to verify the unique id (default is false), so it's recommended to use it with `max_open_files = -1` to pre-open the files. -* Added the ability to concurrently read data blocks from multiple files in a level in batched MultiGet. This can be enabled by setting the async_io option in ReadOptions. Using this feature requires a FileSystem that supports ReadAsync (PosixFileSystem is not supported yet for this), and for RocksDB to be compiled with folly and c++20. -* Charge memory usage of file metadata. RocksDB holds one file metadata structure in-memory per on-disk table file. If an operation reserving memory for file metadata exceeds the avaible space left in the block -cache at some point (i.e, causing a cache full under `LRUCacheOptions::strict_capacity_limit` = true), creation will fail with `Status::MemoryLimit()`. To opt in this feature, enable charging `CacheEntryRole::kFileMetadata` in `BlockBasedTableOptions::cache_usage_options`. - -### Public API changes -* Add rollback_deletion_type_callback to TransactionDBOptions so that write-prepared transactions know whether to issue a Delete or SingleDelete to cancel a previous key written during prior prepare phase. The PR aims to prevent mixing SingleDeletes and Deletes for the same key that can lead to undefined behaviors for write-prepared transactions. -* EXPERIMENTAL: Add new API AbortIO in file_system to abort the read requests submitted asynchronously. -* CompactionFilter::Decision has a new value: kRemoveWithSingleDelete. If CompactionFilter returns this decision, then CompactionIterator will use `SingleDelete` to mark a key as removed. -* Renamed CompactionFilter::Decision::kRemoveWithSingleDelete to kPurge since the latter sounds more general and hides the implementation details of how compaction iterator handles keys. -* Added ability to specify functions for Prepare and Validate to OptionsTypeInfo. Added methods to OptionTypeInfo to set the functions via an API. These methods are intended for RocksDB plugin developers for configuration management. -* Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate. -* Introduce `BlockBasedTableOptions::cache_usage_options` and use that to replace `BlockBasedTableOptions::reserve_table_builder_memory` and `BlockBasedTableOptions::reserve_table_reader_memory`. -* Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use. -* In IOOptions, mark `prio` as deprecated for future removal. -* In `file_system.h`, mark `IOPriority` as deprecated for future removal. -* Add an option, `CompressionOptions::use_zstd_dict_trainer`, to indicate whether zstd dictionary trainer should be used for generating zstd compression dictionaries. The default value of this option is true for backward compatibility. When this option is set to false, zstd API `ZDICT_finalizeDictionary` is used to generate compression dictionaries. -* Seek API which positions itself every LevelIterator on the correct data block in the correct SST file which can be parallelized if ReadOptions.async_io option is enabled. -* Add new stat number_async_seek in PerfContext that indicates number of async calls made by seek to prefetch data. -* Add support for user-defined timestamps to read only DB. - -### Bug Fixes -* RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue. -* Fixed unnecessary block cache contention when queries within a MultiGet batch and across parallel batches access the same data block, which previously could cause severely degraded performance in this unusual case. (In more typical MultiGet cases, this fix is expected to yield a small or negligible performance improvement.) - -### Behavior changes -* Enforce the existing contract of SingleDelete so that SingleDelete cannot be mixed with Delete because it leads to undefined behavior. Fix a number of unit tests that violate the contract but happen to pass. -* ldb `--try_load_options` default to true if `--db` is specified and not creating a new DB, the user can still explicitly disable that by `--try_load_options=false` (or explicitly enable that by `--try_load_options`). -* During Flush write or Compaction write/read, the WriteController is used to determine whether DB writes are stalled or slowed down. The priority (Env::IOPriority) can then be determined accordingly and be passed in IOOptions to the file system. - -### Performance Improvements -* Avoid calling malloc_usable_size() in LRU Cache's mutex. -* Reduce DB mutex holding time when finding obsolete files to delete. When a file is trivial moved to another level, the internal files will be referenced twice internally and sometimes opened twice too. If a deletion candidate file is not the last reference, we need to destroy the reference and close the file but not deleting the file. Right now we determine it by building a set of all live files. With the improvement, we check the file against all live LSM-tree versions instead. - -## 7.2.0 (04/15/2022) -### Bug Fixes -* Fixed bug which caused rocksdb failure in the situation when rocksdb was accessible using UNC path -* Fixed a race condition when 2PC is disabled and WAL tracking in the MANIFEST is enabled. The race condition is between two background flush threads trying to install flush results, causing a WAL deletion not tracked in the MANIFEST. A future DB open may fail. -* Fixed a heap use-after-free race with DropColumnFamily. -* Fixed a bug that `rocksdb.read.block.compaction.micros` cannot track compaction stats (#9722). -* Fixed `file_type`, `relative_filename` and `directory` fields returned by `GetLiveFilesMetaData()`, which were added in inheriting from `FileStorageInfo`. -* Fixed a bug affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#9766). -* Fix segfault in FilePrefetchBuffer with async_io as it doesn't wait for pending jobs to complete on destruction. -* Fix ERROR_HANDLER_AUTORESUME_RETRY_COUNT stat whose value was set wrong in portal.h -* Fixed a bug for non-TransactionDB with avoid_flush_during_recovery = true and TransactionDB where in case of crash, min_log_number_to_keep may not change on recovery and persisting a new MANIFEST with advanced log_numbers for some column families, results in "column family inconsistency" error on second recovery. As a solution the corrupted WALs whose numbers are larger than the corrupted wal and smaller than the new WAL will be moved to archive folder. -* Fixed a bug in RocksDB DB::Open() which may creates and writes to two new MANIFEST files even before recovery succeeds. Now writes to MANIFEST are persisted only after recovery is successful. - -### New Features -* For db_bench when --seed=0 or --seed is not set then it uses the current time as the seed value. Previously it used the value 1000. -* For db_bench when --benchmark lists multiple tests and each test uses a seed for a RNG then the seeds across tests will no longer be repeated. -* Added an option to dynamically charge an updating estimated memory usage of block-based table reader to block cache if block cache available. To enable this feature, set `BlockBasedTableOptions::reserve_table_reader_memory = true`. -* Add new stat ASYNC_READ_BYTES that calculates number of bytes read during async read call and users can check if async code path is being called by RocksDB internal automatic prefetching for sequential reads. -* Enable async prefetching if ReadOptions.readahead_size is set along with ReadOptions.async_io in FilePrefetchBuffer. -* Add event listener support on remote compaction compactor side. -* Added a dedicated integer DB property `rocksdb.live-blob-file-garbage-size` that exposes the total amount of garbage in the blob files in the current version. -* RocksDB does internal auto prefetching if it notices sequential reads. It starts with readahead size `initial_auto_readahead_size` which now can be configured through BlockBasedTableOptions. -* Add a merge operator that allows users to register specific aggregation function so that they can does aggregation using different aggregation types for different keys. See comments in include/rocksdb/utilities/agg_merge.h for actual usage. The feature is experimental and the format is subject to change and we won't provide a migration tool. -* Meta-internal / Experimental: Improve CPU performance by replacing many uses of std::unordered_map with folly::F14FastMap when RocksDB is compiled together with Folly. -* Experimental: Add CompressedSecondaryCache, a concrete implementation of rocksdb::SecondaryCache, that integrates with compression libraries (e.g. LZ4) to hold compressed blocks. - -### Behavior changes -* Disallow usage of commit-time-write-batch for write-prepared/write-unprepared transactions if TransactionOptions::use_only_the_last_commit_time_batch_for_recovery is false to prevent two (or more) uncommitted versions of the same key in the database. Otherwise, bottommost compaction may violate the internal key uniqueness invariant of SSTs if the sequence numbers of both internal keys are zeroed out (#9794). -* Make DB::GetUpdatesSince() return NotSupported early for write-prepared/write-unprepared transactions, as the API contract indicates. - -### Public API changes -* Exposed APIs to examine results of block cache stats collections in a structured way. In particular, users of `GetMapProperty()` with property `kBlockCacheEntryStats` can now use the functions in `BlockCacheEntryStatsMapKeys` to find stats in the map. -* Add `fail_if_not_bottommost_level` to IngestExternalFileOptions so that ingestion will fail if the file(s) cannot be ingested to the bottommost level. -* Add output parameter `is_in_sec_cache` to `SecondaryCache::Lookup()`. It is to indicate whether the handle is possibly erased from the secondary cache after the Lookup. - -## 7.1.0 (03/23/2022) -### New Features -* Allow WriteBatchWithIndex to index a WriteBatch that includes keys with user-defined timestamps. The index itself does not have timestamp. -* Add support for user-defined timestamps to write-committed transaction without API change. The `TransactionDB` layer APIs do not allow timestamps because we require that all user-defined-timestamps-aware operations go through the `Transaction` APIs. -* Added BlobDB options to `ldb` -* `BlockBasedTableOptions::detect_filter_construct_corruption` can now be dynamically configured using `DB::SetOptions`. -* Automatically recover from retryable read IO errors during backgorund flush/compaction. -* Experimental support for preserving file Temperatures through backup and restore, and for updating DB metadata for outside changes to file Temperature (`UpdateManifestForFilesState` or `ldb update_manifest --update_temperatures`). -* Experimental support for async_io in ReadOptions which is used by FilePrefetchBuffer to prefetch some of the data asynchronously, if reads are sequential and auto readahead is enabled by rocksdb internally. - -### Bug Fixes -* Fixed a major performance bug in which Bloom filters generated by pre-7.0 releases are not read by early 7.0.x releases (and vice-versa) due to changes to FilterPolicy::Name() in #9590. This can severely impact read performance and read I/O on upgrade or downgrade with existing DB, but not data correctness. -* Fixed a data race on `versions_` between `DBImpl::ResumeImpl()` and threads waiting for recovery to complete (#9496) -* Fixed a bug caused by race among flush, incoming writes and taking snapshots. Queries to snapshots created with these race condition can return incorrect result, e.g. resurfacing deleted data. -* Fixed a bug that DB flush uses `options.compression` even `options.compression_per_level` is set. -* Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction. -* Fix a race condition when cancel manual compaction with `DisableManualCompaction`. Also DB close can cancel the manual compaction thread. -* Fixed a potential timer crash when open close DB concurrently. -* Fixed a race condition for `alive_log_files_` in non-two-write-queues mode. The race is between the write_thread_ in WriteToWAL() and another thread executing `FindObsoleteFiles()`. The race condition will be caught if `__glibcxx_requires_nonempty` is enabled. -* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed. -* Fixed a race condition when disable and re-enable manual compaction. -* Fixed automatic error recovery failure in atomic flush. -* Fixed a race condition when mmaping a WritableFile on POSIX. - -### Public API changes -* Added pure virtual FilterPolicy::CompatibilityName(), which is needed for fixing major performance bug involving FilterPolicy naming in SST metadata without affecting Customizable aspect of FilterPolicy. This change only affects those with their own custom or wrapper FilterPolicy classes. -* `options.compression_per_level` is dynamically changeable with `SetOptions()`. -* Added `WriteOptions::rate_limiter_priority`. When set to something other than `Env::IO_TOTAL`, the internal rate limiter (`DBOptions::rate_limiter`) will be charged at the specified priority for writes associated with the API to which the `WriteOptions` was provided. Currently the support covers automatic WAL flushes, which happen during live updates (`Put()`, `Write()`, `Delete()`, etc.) when `WriteOptions::disableWAL == false` and `DBOptions::manual_wal_flush == false`. -* Add DB::OpenAndTrimHistory API. This API will open DB and trim data to the timestamp specified by trim_ts (The data with timestamp larger than specified trim bound will be removed). This API should only be used at a timestamp-enabled column families recovery. If the column family doesn't have timestamp enabled, this API won't trim any data on that column family. This API is not compatible with avoid_flush_during_recovery option. -* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect. - -## 7.0.0 (02/20/2022) -### Bug Fixes -* Fixed a major bug in which batched MultiGet could return old values for keys deleted by DeleteRange when memtable Bloom filter is enabled (memtable_prefix_bloom_size_ratio > 0). (The fix includes a substantial MultiGet performance improvement in the unusual case of both memtable_whole_key_filtering and prefix_extractor.) -* Fixed more cases of EventListener::OnTableFileCreated called with OK status, file_size==0, and no SST file kept. Now the status is Aborted. -* Fixed a read-after-free bug in `DB::GetMergeOperands()`. -* Fix a data loss bug for 2PC write-committed transaction caused by concurrent transaction commit and memtable switch (#9571). -* Fixed NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL, NUM_DATA_BLOCKS_READ_PER_LEVEL, and NUM_SST_READ_PER_LEVEL stats to be reported once per MultiGet batch per level. - -### Performance Improvements -* Mitigated the overhead of building the file location hash table used by the online LSM tree consistency checks, which can improve performance for certain workloads (see #9351). -* Switched to using a sorted `std::vector` instead of `std::map` for storing the metadata objects for blob files, which can improve performance for certain workloads, especially when the number of blob files is high. -* DisableManualCompaction() doesn't have to wait scheduled manual compaction to be executed in thread-pool to cancel the job. - -### Public API changes -* Require C++17 compatible compiler (GCC >= 7, Clang >= 5, Visual Studio >= 2017) for compiling RocksDB and any code using RocksDB headers. See #9388. -* Added `ReadOptions::rate_limiter_priority`. When set to something other than `Env::IO_TOTAL`, the internal rate limiter (`DBOptions::rate_limiter`) will be charged at the specified priority for file reads associated with the API to which the `ReadOptions` was provided. -* Remove HDFS support from main repo. -* Remove librados support from main repo. -* Remove obsolete backupable_db.h and type alias `BackupableDBOptions`. Use backup_engine.h and `BackupEngineOptions`. Similar renamings are in the C and Java APIs. -* Removed obsolete utility_db.h and `UtilityDB::OpenTtlDB`. Use db_ttl.h and `DBWithTTL::Open`. -* Remove deprecated API DB::AddFile from main repo. -* Remove deprecated API ObjectLibrary::Register() and the (now obsolete) Regex public API. Use ObjectLibrary::AddFactory() with PatternEntry instead. -* Remove deprecated option DBOption::table_cache_remove_scan_count_limit. -* Remove deprecated API AdvancedColumnFamilyOptions::soft_rate_limit. -* Remove deprecated API AdvancedColumnFamilyOptions::hard_rate_limit. -* Remove deprecated API DBOption::base_background_compactions. -* Remove deprecated API DBOptions::purge_redundant_kvs_while_flush. -* Remove deprecated overloads of API DB::CompactRange. -* Remove deprecated option DBOptions::skip_log_error_on_recovery. -* Remove ReadOptions::iter_start_seqnum which has been deprecated. -* Remove DBOptions::preserved_deletes and DB::SetPreserveDeletesSequenceNumber(). -* Remove deprecated API AdvancedColumnFamilyOptions::rate_limit_delay_max_milliseconds. -* Removed timestamp from WriteOptions. Accordingly, added to DB APIs Put, Delete, SingleDelete, etc. accepting an additional argument 'timestamp'. Added Put, Delete, SingleDelete, etc to WriteBatch accepting an additional argument 'timestamp'. Removed WriteBatch::AssignTimestamps(vector) API. Renamed WriteBatch::AssignTimestamp() to WriteBatch::UpdateTimestamps() with clarified comments. -* Changed type of cache buffer passed to `Cache::CreateCallback` from `void*` to `const void*`. -* Significant updates to FilterPolicy-related APIs and configuration: - * Remove public API support for deprecated, inefficient block-based filter (use_block_based_builder=true). - * Old code and configuration strings that would enable it now quietly enable full filters instead, though any built-in FilterPolicy can still read block-based filters. This includes changing the longstanding default behavior of the Java API. - * Remove deprecated FilterPolicy::CreateFilter() and FilterPolicy::KeyMayMatch() - * Remove `rocksdb_filterpolicy_create()` from C API, as the only C API support for custom filter policies is now obsolete. - * If temporary memory usage in full filter creation is a problem, consider using partitioned filters, smaller SST files, or setting reserve_table_builder_memory=true. - * Remove support for "filter_policy=experimental_ribbon" configuration - string. Use something like "filter_policy=ribbonfilter:10" instead. - * Allow configuration string like "filter_policy=bloomfilter:10" without - bool, to minimize acknowledgement of obsolete block-based filter. - * Made FilterPolicy Customizable. Configuration of filter_policy is now accurately saved in OPTIONS file and can be loaded with LoadOptionsFromFile. (Loading an OPTIONS file generated by a previous version only enables reading and using existing filters, not generating new filters. Previously, no filter_policy would be configured from a saved OPTIONS file.) - * Change meaning of nullptr return from GetBuilderWithContext() from "use - block-based filter" to "generate no filter in this case." - * Also, when user specifies bits_per_key < 0.5, we now round this down - to "no filter" because we expect a filter with >= 80% FP rate is - unlikely to be worth the CPU cost of accessing it (esp with - cache_index_and_filter_blocks=1 or partition_filters=1). - * bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP - rate) - * Remove class definitions for FilterBitsBuilder and FilterBitsReader from - public API, so these can evolve more easily as implementation details. - Custom FilterPolicy can still decide what kind of built-in filter to use - under what conditions. - * Also removed deprecated functions - * FilterPolicy::GetFilterBitsBuilder() - * NewExperimentalRibbonFilterPolicy() - * Remove default implementations of - * FilterPolicy::GetBuilderWithContext() -* Remove default implementation of Name() from FileSystemWrapper. -* Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`. -* Remove deprecated option DBOptions::max_mem_compaction_level. -* Return Status::InvalidArgument from ObjectRegistry::NewObject if a factory exists but the object ould not be created (returns NotFound if the factory is missing). -* Remove deprecated overloads of API DB::GetApproximateSizes. -* Remove deprecated option DBOptions::new_table_reader_for_compaction_inputs. -* Add Transaction::SetReadTimestampForValidation() and Transaction::SetCommitTimestamp(). Default impl returns NotSupported(). -* Add support for decimal patterns to ObjectLibrary::PatternEntry -* Remove deprecated remote compaction APIs `CompactionService::Start()` and `CompactionService::WaitForComplete()`. Please use `CompactionService::StartV2()`, `CompactionService::WaitForCompleteV2()` instead, which provides the same information plus extra data like priority, db_id, etc. -* `ColumnFamilyOptions::OldDefaults` and `DBOptions::OldDefaults` are marked deprecated, as they are no longer maintained. -* Add subcompaction callback APIs: `OnSubcompactionBegin()` and `OnSubcompactionCompleted()`. -* Add file Temperature information to `FileOperationInfo` in event listener API. -* Change the type of SizeApproximationFlags from enum to enum class. Also update the signature of DB::GetApproximateSizes API from uint8_t to SizeApproximationFlags. -* Add Temperature hints information from RocksDB in API `NewSequentialFile()`. backup and checkpoint operations need to open the source files with `NewSequentialFile()`, which will have the temperature hints. Other operations are not covered. - -### Behavior Changes -* Disallow the combination of DBOptions.use_direct_io_for_flush_and_compaction == true and DBOptions.writable_file_max_buffer_size == 0. This combination can cause WritableFileWriter::Append() to loop forever, and it does not make much sense in direct IO. -* `ReadOptions::total_order_seek` no longer affects `DB::Get()`. The original motivation for this interaction has been obsolete since RocksDB has been able to detect whether the current prefix extractor is compatible with that used to generate table files, probably RocksDB 5.14.0. - -## New Features -* Introduced an option `BlockBasedTableOptions::detect_filter_construct_corruption` for detecting corruption during Bloom Filter (format_version >= 5) and Ribbon Filter construction. -* Improved the SstDumpTool to read the comparator from table properties and use it to read the SST File. -* Extended the column family statistics in the info log so the total amount of garbage in the blob files and the blob file space amplification factor are also logged. Also exposed the blob file space amp via the `rocksdb.blob-stats` DB property. -* Introduced the API rocksdb_create_dir_if_missing in c.h that calls underlying file system's CreateDirIfMissing API to create the directory. -* Added last level and non-last level read statistics: `LAST_LEVEL_READ_*`, `NON_LAST_LEVEL_READ_*`. -* Experimental: Add support for new APIs ReadAsync in FSRandomAccessFile that reads the data asynchronously and Poll API in FileSystem that checks if requested read request has completed or not. ReadAsync takes a callback function. Poll API checks for completion of read IO requests and should call callback functions to indicate completion of read requests. - -## 6.29.0 (01/21/2022) -Note: The next release will be major release 7.0. See https://github.com/facebook/rocksdb/issues/9390 for more info. -### Public API change -* Added values to `TraceFilterType`: `kTraceFilterIteratorSeek`, `kTraceFilterIteratorSeekForPrev`, and `kTraceFilterMultiGet`. They can be set in `TraceOptions` to filter out the operation types after which they are named. -* Added `TraceOptions::preserve_write_order`. When enabled it guarantees write records are traced in the same order they are logged to WAL and applied to the DB. By default it is disabled (false) to match the legacy behavior and prevent regression. -* Made the Env class extend the Customizable class. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. -* `Options::OldDefaults` is marked deprecated, as it is no longer maintained. -* Add ObjectLibrary::AddFactory and ObjectLibrary::PatternEntry classes. This method and associated class are the preferred mechanism for registering factories with the ObjectLibrary going forward. The ObjectLibrary::Register method, which uses regular expressions and may be problematic, is deprecated and will be in a future release. -* Changed `BlockBasedTableOptions::block_size` from `size_t` to `uint64_t`. -* Added API warning against using `Iterator::Refresh()` together with `DB::DeleteRange()`, which are incompatible and have always risked causing the refreshed iterator to return incorrect results. -* Made `AdvancedColumnFamilyOptions.bottommost_temperature` dynamically changeable with `SetOptions()`. - -### Behavior Changes -* `DB::DestroyColumnFamilyHandle()` will return Status::InvalidArgument() if called with `DB::DefaultColumnFamily()`. -* On 32-bit platforms, mmap reads are no longer quietly disabled, just discouraged. - -### New Features -* Added `Options::DisableExtraChecks()` that can be used to improve peak write performance by disabling checks that should not be necessary in the absence of software logic errors or CPU+memory hardware errors. (Default options are slowly moving toward some performance overheads for extra correctness checking.) - -### Performance Improvements -* Improved read performance when a prefix extractor is used (Seek, Get, MultiGet), even compared to version 6.25 baseline (see bug fix below), by optimizing the common case of prefix extractor compatible with table file and unchanging. - -### Bug Fixes -* Fix a bug that FlushMemTable may return ok even flush not succeed. -* Fixed a bug of Sync() and Fsync() not using `fcntl(F_FULLFSYNC)` on OS X and iOS. -* Fixed a significant performance regression in version 6.26 when a prefix extractor is used on the read path (Seek, Get, MultiGet). (Excessive time was spent in SliceTransform::AsString().) -* Fixed a race condition in SstFileManagerImpl error recovery code that can cause a crash during process shutdown. - -### New Features -* Added RocksJava support for MacOS universal binary (ARM+x86) - -## 6.28.0 (2021-12-17) -### New Features -* Introduced 'CommitWithTimestamp' as a new tag. Currently, there is no API for user to trigger a write with this tag to the WAL. This is part of the efforts to support write-commited transactions with user-defined timestamps. -* Introduce SimulatedHybridFileSystem which can help simulating HDD latency in db_bench. Tiered Storage latency simulation can be enabled using -simulate_hybrid_fs_file (note that it doesn't work if db_bench is interrupted in the middle). -simulate_hdd can also be used to simulate all files on HDD. - -### Bug Fixes -* Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next. -* Fixed a bug in TableOptions.prepopulate_block_cache which causes segmentation fault when used with TableOptions.partition_filters = true and TableOptions.cache_index_and_filter_blocks = true. -* Fixed a bug affecting custom memtable factories which are not registered with the `ObjectRegistry`. The bug could result in failure to save the OPTIONS file. -* Fixed a bug causing two duplicate entries to be appended to a file opened in non-direct mode and tracked by `FaultInjectionTestFS`. -* Fixed a bug in TableOptions.prepopulate_block_cache to support block-based filters also. -* Block cache keys no longer use `FSRandomAccessFile::GetUniqueId()` (previously used when available), so a filesystem recycling unique ids can no longer lead to incorrect result or crash (#7405). For files generated by RocksDB >= 6.24, the cache keys are stable across DB::Open and DB directory move / copy / import / export / migration, etc. Although collisions are still theoretically possible, they are (a) impossible in many common cases, (b) not dependent on environmental factors, and (c) much less likely than a CPU miscalculation while executing RocksDB. -* Fixed a bug in C bindings causing iterator to return incorrect result (#9343). - -### Behavior Changes -* MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371. - -### Public API change -* Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes. -* Introduce a new EventListener callback that will be called upon the end of automatic error recovery. -* Add IncreaseFullHistoryTsLow API so users can advance each column family's full_history_ts_low seperately. -* Add GetFullHistoryTsLow API so users can query current full_history_low value of specified column family. - -### Performance Improvements -* Replaced map property `TableProperties::properties_offsets` with uint64_t property `external_sst_file_global_seqno_offset` to save table properties's memory. -* Block cache accesses are faster by RocksDB using cache keys of fixed size (16 bytes). - -### Java API Changes -* Removed Java API `TableProperties.getPropertiesOffsets()` as it exposed internal details to external users. - -## 6.27.0 (2021-11-19) -### New Features -* Added new ChecksumType kXXH3 which is faster than kCRC32c on almost all x86\_64 hardware. -* Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file. -* Provided support for tracking per-sst user-defined timestamp information in MANIFEST. -* Added new option "adaptive_readahead" in ReadOptions. For iterators, RocksDB does auto-readahead on noticing sequential reads and by enabling this option, readahead_size of current file (if reads are sequential) will be carried forward to next file instead of starting from the scratch at each level (except L0 level files). If reads are not sequential it will fall back to 8KB. This option is applicable only for RocksDB internal prefetch buffer and isn't supported with underlying file system prefetching. -* Added the read count and read bytes related stats to Statistics for tiered storage hot, warm, and cold file reads. -* Added an option to dynamically charge an updating estimated memory usage of block-based table building to block cache if block cache available. It currently only includes charging memory usage of constructing (new) Bloom Filter and Ribbon Filter to block cache. To enable this feature, set `BlockBasedTableOptions::reserve_table_builder_memory = true`. -* Add a new API OnIOError in listener.h that notifies listeners when an IO error occurs during FileSystem operation along with filename, status etc. -* Added compaction readahead support for blob files to the integrated BlobDB implementation, which can improve compaction performance when the database resides on higher-latency storage like HDDs or remote filesystems. Readahead can be configured using the column family option `blob_compaction_readahead_size`. - -### Bug Fixes -* Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption. -* Fixed a bug in CompactionIterator when write-prepared transaction is used. A released earliest write conflict snapshot may cause assertion failure in dbg mode and unexpected key in opt mode. -* Fix ticker WRITE_WITH_WAL("rocksdb.write.wal"), this bug is caused by a bad extra `RecordTick(stats_, WRITE_WITH_WAL)` (at 2 place), this fix remove the extra `RecordTick`s and fix the corresponding test case. -* EventListener::OnTableFileCreated was previously called with OK status and file_size==0 in cases of no SST file contents written (because there was no content to add) and the empty file deleted before calling the listener. Now the status is Aborted. -* Fixed a bug in CompactionIterator when write-preared transaction is used. Releasing earliest_snapshot during compaction may cause a SingleDelete to be output after a PUT of the same user key whose seq has been zeroed. -* Added input sanitization on negative bytes passed into `GenericRateLimiter::Request`. -* Fixed an assertion failure in CompactionIterator when write-prepared transaction is used. We prove that certain operations can lead to a Delete being followed by a SingleDelete (same user key). We can drop the SingleDelete. -* Fixed a bug of timestamp-based GC which can cause all versions of a key under full_history_ts_low to be dropped. This bug will be triggered when some of the ikeys' timestamps are lower than full_history_ts_low, while others are newer. -* In some cases outside of the DB read and compaction paths, SST block checksums are now checked where they were not before. -* Explicitly check for and disallow the `BlockBasedTableOptions` if insertion into one of {`block_cache`, `block_cache_compressed`, `persistent_cache`} can show up in another of these. (RocksDB expects to be able to use the same key for different physical data among tiers.) -* Users who configured a dedicated thread pool for bottommost compactions by explicitly adding threads to the `Env::Priority::BOTTOM` pool will no longer see RocksDB schedule automatic compactions exceeding the DB's compaction concurrency limit. For details on per-DB compaction concurrency limit, see API docs of `max_background_compactions` and `max_background_jobs`. -* Fixed a bug of background flush thread picking more memtables to flush and prematurely advancing column family's log_number. -* Fixed an assertion failure in ManifestTailer. -* Fixed a bug that could, with WAL enabled, cause backups, checkpoints, and `GetSortedWalFiles()` to fail randomly with an error like `IO error: 001234.log: No such file or directory` - -### Behavior Changes -* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files. -* `TransactionUtil::CheckKeyForConflicts` can also perform conflict-checking based on user-defined timestamps in addition to sequence numbers. -* Removed `GenericRateLimiter`'s minimum refill bytes per period previously enforced. - -### Public API change -* When options.ttl is used with leveled compaction with compactinon priority kMinOverlappingRatio, files exceeding half of TTL value will be prioritized more, so that by the time TTL is reached, fewer extra compactions will be scheduled to clear them up. At the same time, when compacting files with data older than half of TTL, output files may be cut off based on those files' boundaries, in order for the early TTL compaction to work properly. -* Made FileSystem and RateLimiter extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. -* Clarified in API comments that RocksDB is not exception safe for callbacks and custom extensions. An exception propagating into RocksDB can lead to undefined behavior, including data loss, unreported corruption, deadlocks, and more. -* Marked `WriteBufferManager` as `final` because it is not intended for extension. -* Removed unimportant implementation details from table_properties.h -* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%. -* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true. -* In builds where glibc provides `gettid()`, info log ("LOG" file) lines now print a system-wide thread ID from `gettid()` instead of the process-local `pthread_self()`. For all users, the thread ID format is changed from hexadecimal to decimal integer. -* In builds where glibc provides `pthread_setname_np()`, the background thread names no longer contain an ID suffix. For example, "rocksdb:bottom7" (and all other threads in the `Env::Priority::BOTTOM` pool) are now named "rocksdb:bottom". Previously large thread pools could breach the name size limit (e.g., naming "rocksdb:bottom10" would fail). -* Deprecating `ReadOptions::iter_start_seqnum` and `DBOptions::preserve_deletes`, please try using user defined timestamp feature instead. The options will be removed in a future release, currently it logs a warning message when using. - -### Performance Improvements -* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070) - -### Behavior Changes -* `NUM_FILES_IN_SINGLE_COMPACTION` was only counting the first input level files, now it's including all input files. - -## 6.26.0 (2021-10-20) -### Bug Fixes -* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets. -* Fix the incorrect disabling of SST rate limited deletion when the WAL and DB are in different directories. Only WAL rate limited deletion should be disabled if its in a different directory. -* Fix `DisableManualCompaction()` to cancel compactions even when they are waiting on automatic compactions to drain due to `CompactRangeOptions::exclusive_manual_compactions == true`. -* Fix contract of `Env::ReopenWritableFile()` and `FileSystem::ReopenWritableFile()` to specify any existing file must not be deleted or truncated. -* Fixed bug in calls to `IngestExternalFiles()` with files for multiple column families. The bug could have introduced a delay in ingested file keys becoming visible after `IngestExternalFiles()` returned. Furthermore, mutations to ingested file keys while they were invisible could have been dropped (not necessarily immediately). -* Fixed a possible race condition impacting users of `WriteBufferManager` who constructed it with `allow_stall == true`. The race condition led to undefined behavior (in our experience, typically a process crash). -* Fixed a bug where stalled writes would remain stalled forever after the user calls `WriteBufferManager::SetBufferSize()` with `new_size == 0` to dynamically disable memory limiting. -* Make `DB::close()` thread-safe. -* Fix a bug in atomic flush where one bg flush thread will wait forever for a preceding bg flush thread to commit its result to MANIFEST but encounters an error which is mapped to a soft error (DB not stopped). -* Fix a bug in `BackupEngine` where some internal callers of `GenericRateLimiter::Request()` do not honor `bytes <= GetSingleBurstBytes()`. - -### New Features -* Print information about blob files when using "ldb list_live_files_metadata" -* Provided support for SingleDelete with user defined timestamp. -* Experimental new function DB::GetLiveFilesStorageInfo offers essentially a unified version of other functions like GetLiveFiles, GetLiveFilesChecksumInfo, and GetSortedWalFiles. Checkpoints and backups could show small behavioral changes and/or improved performance as they now use this new API. -* Add remote compaction read/write bytes statistics: `REMOTE_COMPACT_READ_BYTES`, `REMOTE_COMPACT_WRITE_BYTES`. -* Introduce an experimental feature to dump out the blocks from block cache and insert them to the secondary cache to reduce the cache warmup time (e.g., used while migrating DB instance). More information are in `class CacheDumper` and `CacheDumpedLoader` at `rocksdb/utilities/cache_dump_load.h` Note that, this feature is subject to the potential change in the future, it is still experimental. -* Introduced a new BlobDB configuration option `blob_garbage_collection_force_threshold`, which can be used to trigger compactions targeting the SST files which reference the oldest blob files when the ratio of garbage in those blob files meets or exceeds the specified threshold. This can reduce space amplification with skewed workloads where the affected SST files might not otherwise get picked up for compaction. -* Added EXPERIMENTAL support for table file (SST) unique identifiers that are stable and universally unique, available with new function `GetUniqueIdFromTableProperties`. Only SST files from RocksDB >= 6.24 support unique IDs. -* Added `GetMapProperty()` support for "rocksdb.dbstats" (`DB::Properties::kDBStats`). As a map property, it includes DB-level internal stats accumulated over the DB's lifetime, such as user write related stats and uptime. - -### Public API change -* Made SystemClock extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. -* Made SliceTransform extend the Customizable class and added a CreateFromString method. Implementations need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. The Capped and Prefixed transform classes return a short name (no length); use GetId for the fully qualified name. -* Made FileChecksumGenFactory, SstPartitionerFactory, TablePropertiesCollectorFactory, and WalFilter extend the Customizable class and added a CreateFromString method. -* Some fields of SstFileMetaData are deprecated for compatibility with new base class FileStorageInfo. -* Add `file_temperature` to `IngestExternalFileArg` such that when ingesting SST files, we are able to indicate the temperature of the this batch of files. -* If `DB::Close()` failed with a non aborted status, calling `DB::Close()` again will return the original status instead of Status::OK. -* Add CacheTier to advanced_options.h to describe the cache tier we used. Add a `lowest_used_cache_tier` option to `DBOptions` (immutable) and pass it to BlockBasedTableReader. By default it is `CacheTier::kNonVolatileBlockTier`, which means, we always use both block cache (kVolatileTier) and secondary cache (kNonVolatileBlockTier). By set it to `CacheTier::kVolatileTier`, the DB will not use the secondary cache. -* Even when options.max_compaction_bytes is hit, compaction output files are only cut when it aligns with grandparent files' boundaries. options.max_compaction_bytes could be slightly violated with the change, but the violation is no more than one target SST file size, which is usually much smaller. - -### Performance Improvements -* Improved CPU efficiency of building block-based table (SST) files (#9039 and #9040). - -### Java API Changes -* Add Java API bindings for new integrated BlobDB options -* `keyMayExist()` supports ByteBuffer. -* Fix multiget throwing Null Pointer Exception for num of keys > 70k (https://github.com/facebook/rocksdb/issues/8039). - -## 6.25.0 (2021-09-20) -### Bug Fixes -* Allow secondary instance to refresh iterator. Assign read seq after referencing SuperVersion. -* Fixed a bug of secondary instance's last_sequence going backward, and reads on the secondary fail to see recent updates from the primary. -* Fixed a bug that could lead to duplicate DB ID or DB session ID in POSIX environments without /proc/sys/kernel/random/uuid. -* Fix a race in DumpStats() with column family destruction due to not taking a Ref on each entry while iterating the ColumnFamilySet. -* Fix a race in item ref counting in LRUCache when promoting an item from the SecondaryCache. -* Fix a race in BackupEngine if RateLimiter is reconfigured during concurrent Restore operations. -* Fix a bug on POSIX in which failure to create a lock file (e.g. out of space) can prevent future LockFile attempts in the same process on the same file from succeeding. -* Fix a bug that backup_rate_limiter and restore_rate_limiter in BackupEngine could not limit read rates. -* Fix the implementation of `prepopulate_block_cache = kFlushOnly` to only apply to flushes rather than to all generated files. -* Fix WAL log data corruption when using DBOptions.manual_wal_flush(true) and WriteOptions.sync(true) together. The sync WAL should work with locked log_write_mutex_. -* Add checks for validity of the IO uring completion queue entries, and fail the BlockBasedTableReader MultiGet sub-batch if there's an invalid completion -* Add an interface RocksDbIOUringEnable() that, if defined by the user, will allow them to enable/disable the use of IO uring by RocksDB -* Fix the bug that when direct I/O is used and MultiRead() returns a short result, RandomAccessFileReader::MultiRead() still returns full size buffer, with returned short value together with some data in original buffer. This bug is unlikely cause incorrect results, because (1) since FileSystem layer is expected to retry on short result, returning short results is only possible when asking more bytes in the end of the file, which RocksDB doesn't do when using MultiRead(); (2) checksum is unlikely to match. - -### New Features -* RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions. -* Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries. -* Added ticker statistics, "rocksdb.backup.read.bytes" and "rocksdb.backup.write.bytes", reporting how many bytes were read and written during backup. -* Added properties for BlobDB: `rocksdb.num-blob-files`, `rocksdb.blob-stats`, `rocksdb.total-blob-file-size`, and `rocksdb.live-blob-file-size`. The existing property `rocksdb.estimate_live-data-size` was also extended to include live bytes residing in blob files. -* Added two new RateLimiter IOPriorities: `Env::IO_USER`,`Env::IO_MID`. `Env::IO_USER` will have superior priority over all other RateLimiter IOPriorities without being subject to fair scheduling constraint. -* `SstFileWriter` now supports `Put`s and `Delete`s with user-defined timestamps. Note that the ingestion logic itself is not timestamp-aware yet. -* Allow a single write batch to include keys from multiple column families whose timestamps' formats can differ. For example, some column families may disable timestamp, while others enable timestamp. -* Add compaction priority information in RemoteCompaction, which can be used to schedule high priority job first. -* Added new callback APIs `OnBlobFileCreationStarted`,`OnBlobFileCreated`and `OnBlobFileDeleted` in `EventListener` class of listener.h. It notifies listeners during creation/deletion of individual blob files in Integrated BlobDB. It also log blob file creation finished event and deletion event in LOG file. -* Batch blob read requests for `DB::MultiGet` using `MultiRead`. -* Add support for fallback to local compaction, the user can return `CompactionServiceJobStatus::kUseLocal` to instruct RocksDB to run the compaction locally instead of waiting for the remote compaction result. -* Add built-in rate limiter's implementation of `RateLimiter::GetTotalPendingRequest(int64_t* total_pending_requests, const Env::IOPriority pri)` for the total number of requests that are pending for bytes in the rate limiter. -* Charge memory usage during data buffering, from which training samples are gathered for dictionary compression, to block cache. Unbuffering data can now be triggered if the block cache becomes full and `strict_capacity_limit=true` for the block cache, in addition to existing conditions that can trigger unbuffering. - -### Public API change -* Remove obsolete implementation details FullKey and ParseFullKey from public API -* Change `SstFileMetaData::size` from `size_t` to `uint64_t`. -* Made Statistics extend the Customizable class and added a CreateFromString method. Implementations of Statistics need to be registered with the ObjectRegistry and to implement a Name() method in order to be created via this method. -* Extended `FlushJobInfo` and `CompactionJobInfo` in listener.h to provide information about the blob files generated by a flush/compaction and garbage collected during compaction in Integrated BlobDB. Added struct members `blob_file_addition_infos` and `blob_file_garbage_infos` that contain this information. -* Extended parameter `output_file_names` of `CompactFiles` API to also include paths of the blob files generated by the compaction in Integrated BlobDB. -* Most `BackupEngine` functions now return `IOStatus` instead of `Status`. Most existing code should be compatible with this change but some calls might need to be updated. -* Add a new field `level_at_creation` in `TablePropertiesCollectorFactory::Context` to capture the level at creating the SST file (i.e, table), of which the properties are being collected. - -### Miscellaneous -* Add a paranoid check where in case FileSystem layer doesn't fill the buffer but returns succeed, checksum is unlikely to match even if buffer contains a previous block. The byte modified is not useful anyway, so it isn't expected to change any behavior when FileSystem is satisfying its contract. - -## 6.24.0 (2021-08-20) -### Bug Fixes -* If the primary's CURRENT file is missing or inaccessible, the secondary instance should not hang repeatedly trying to switch to a new MANIFEST. It should instead return the error code encountered while accessing the file. -* Restoring backups with BackupEngine is now a logically atomic operation, so that if a restore operation is interrupted, DB::Open on it will fail. Using BackupEngineOptions::sync (default) ensures atomicity even in case of power loss or OS crash. -* Fixed a race related to the destruction of `ColumnFamilyData` objects. The earlier logic unlocked the DB mutex before destroying the thread-local `SuperVersion` pointers, which could result in a process crash if another thread managed to get a reference to the `ColumnFamilyData` object. -* Removed a call to `RenameFile()` on a non-existent info log file ("LOG") when opening a new DB. Such a call was guaranteed to fail though did not impact applications since we swallowed the error. Now we also stopped swallowing errors in renaming "LOG" file. -* Fixed an issue where `OnFlushCompleted` was not called for atomic flush. -* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`. -* Fixed a potential incorrect result in opt mode and assertion failures caused by releasing snapshot(s) during compaction. -* Fixed passing of BlobFileCompletionCallback to Compaction job and Atomic flush job which was default paramter (nullptr). BlobFileCompletitionCallback is internal callback that manages addition of blob files to SSTFileManager. -* Fixed MultiGet not updating the block_read_count and block_read_byte PerfContext counters. - -### New Features -* Made the EventListener extend the Customizable class. -* EventListeners that have a non-empty Name() and that are registered with the ObjectRegistry can now be serialized to/from the OPTIONS file. -* Insert warm blocks (data blocks, uncompressed dict blocks, index and filter blocks) in Block cache during flush under option BlockBasedTableOptions.prepopulate_block_cache. Previously it was enabled for only data blocks. -* BlockBasedTableOptions.prepopulate_block_cache can be dynamically configured using DB::SetOptions. -* Add CompactionOptionsFIFO.age_for_warm, which allows RocksDB to move old files to warm tier in FIFO compactions. Note that file temperature is still an experimental feature. -* Add a comment to suggest btrfs user to disable file preallocation by setting `options.allow_fallocate=false`. -* Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench. -* Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature. -* Added a stat rocksdb.secondary.cache.hits. -* Added a PerfContext counter secondary_cache_hit_count. -* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`. -* Added hybrid configuration of Ribbon filter and Bloom filter where some LSM levels use Ribbon for memory space efficiency and some use Bloom for speed. See NewRibbonFilterPolicy. This also changes the default behavior of NewRibbonFilterPolicy to use Bloom for flushes under Leveled and Universal compaction and Ribbon otherwise. The C API function `rocksdb_filterpolicy_create_ribbon` is unchanged but adds new `rocksdb_filterpolicy_create_ribbon_hybrid`. - -### Public API change -* Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h, trace_record_result.h and utilities/replayer.h files to access the decoded Trace records, replay them, and query the actual operation results. -* Added Configurable::GetOptionsMap to the public API for use in creating new Customizable classes. -* Generalized bits_per_key parameters in C API from int to double for greater configurability. Although this is a compatible change for existing C source code, anything depending on C API signatures, such as foreign function interfaces, will need to be updated. - -### Performance Improvements -* Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value. - -### Behavior Changes -* `StringAppendOperator` additionally accepts a string as the delimiter. -* BackupEngineOptions::sync (default true) now applies to restoring backups in addition to creating backups. This could slow down restores, but ensures they are fully persisted before returning OK. (Consider increasing max_background_operations to improve performance.) - -## 6.23.0 (2021-07-16) -### Behavior Changes -* Obsolete keys in the bottommost level that were preserved for a snapshot will now be cleaned upon snapshot release in all cases. This form of compaction (snapshot release triggered compaction) previously had an artificial limitation that multiple tombstones needed to be present. -### Bug Fixes -* Blob file checksums are now printed in hexadecimal format when using the `manifest_dump` `ldb` command. -* `GetLiveFilesMetaData()` now populates the `temperature`, `oldest_ancester_time`, and `file_creation_time` fields of its `LiveFileMetaData` results when the information is available. Previously these fields always contained zero indicating unknown. -* Fix mismatches of OnCompaction{Begin,Completed} in case of DisableManualCompaction(). -* Fix continuous logging of an existing background error on every user write -* Fix a bug that `Get()` return Status::OK() and an empty value for non-existent key when `read_options.read_tier = kBlockCacheTier`. -* Fix a bug that stat in `get_context` didn't accumulate to statistics when query is failed. -* Fixed handling of DBOptions::wal_dir with LoadLatestOptions() or ldb --try_load_options on a copied or moved DB. Previously, when the WAL directory is same as DB directory (default), a copied or moved DB would reference the old path of the DB as the WAL directory, potentially corrupting both copies. Under this change, the wal_dir from DB::GetOptions() or LoadLatestOptions() may now be empty, indicating that the current DB directory is used for WALs. This is also a subtle API change. - -### New Features -* ldb has a new feature, `list_live_files_metadata`, that shows the live SST files, as well as their LSM storage level and the column family they belong to. -* The new BlobDB implementation now tracks the amount of garbage in each blob file in the MANIFEST. -* Integrated BlobDB now supports Merge with base values (Put/Delete etc.). -* RemoteCompaction supports sub-compaction, the job_id in the user interface is changed from `int` to `uint64_t` to support sub-compaction id. -* Expose statistics option in RemoteCompaction worker. - -### Public API change -* Added APIs to the Customizable class to allow developers to create their own Customizable classes. Created the utilities/customizable_util.h file to contain helper methods for developing new Customizable classes. -* Change signature of SecondaryCache::Name(). Make SecondaryCache customizable and add SecondaryCache::CreateFromString method. - -## 6.22.0 (2021-06-18) -### Behavior Changes -* Added two additional tickers, MEMTABLE_PAYLOAD_BYTES_AT_FLUSH and MEMTABLE_GARBAGE_BYTES_AT_FLUSH. These stats can be used to estimate the ratio of "garbage" (outdated) bytes in the memtable that are discarded at flush time. -* Added API comments clarifying safe usage of Disable/EnableManualCompaction and EventListener callbacks for compaction. -### Bug Fixes -* fs_posix.cc GetFreeSpace() always report disk space available to root even when running as non-root. Linux defaults often have disk mounts with 5 to 10 percent of total space reserved only for root. Out of space could result for non-root users. -* Subcompactions are now disabled when user-defined timestamps are used, since the subcompaction boundary picking logic is currently not timestamp-aware, which could lead to incorrect results when different subcompactions process keys that only differ by timestamp. -* Fix an issue that `DeleteFilesInRange()` may cause ongoing compaction reports corruption exception, or ASSERT for debug build. There's no actual data loss or corruption that we find. -* Fixed confusingly duplicated output in LOG for periodic stats ("DUMPING STATS"), including "Compaction Stats" and "File Read Latency Histogram By Level". -* Fixed performance bugs in background gathering of block cache entry statistics, that could consume a lot of CPU when there are many column families with a shared block cache. - -### New Features -* Marked the Ribbon filter and optimize_filters_for_memory features as production-ready, each enabling memory savings for Bloom-like filters. Use `NewRibbonFilterPolicy` in place of `NewBloomFilterPolicy` to use Ribbon filters instead of Bloom, or `ribbonfilter` in place of `bloomfilter` in configuration string. -* Allow `DBWithTTL` to use `DeleteRange` api just like other DBs. `DeleteRangeCF()` which executes `WriteBatchInternal::DeleteRange()` has been added to the handler in `DBWithTTLImpl::Write()` to implement it. -* Add BlockBasedTableOptions.prepopulate_block_cache. If enabled, it prepopulate warm/hot data blocks which are already in memory into block cache at the time of flush. On a flush, the data block that is in memory (in memtables) get flushed to the device. If using Direct IO, additional IO is incurred to read this data back into memory again, which is avoided by enabling this option and it also helps with Distributed FileSystem. More details in include/rocksdb/table.h. -* Added a `cancel` field to `CompactRangeOptions`, allowing individual in-process manual range compactions to be cancelled. - -### New Features -* Added BlobMetaData to the ColumnFamilyMetaData to return information about blob files - -### Public API change -* Added GetAllColumnFamilyMetaData API to retrieve the ColumnFamilyMetaData about all column families. - -## 6.21.0 (2021-05-21) -### Bug Fixes -* Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened. -* Fixed a bug where ingested files were written with incorrect boundary key metadata. In rare cases this could have led to a level's files being wrongly ordered and queries for the boundary keys returning wrong results. -* Fixed a data race between insertion into memtables and the retrieval of the DB properties `rocksdb.cur-size-active-mem-table`, `rocksdb.cur-size-all-mem-tables`, and `rocksdb.size-all-mem-tables`. -* Fixed the false-positive alert when recovering from the WAL file. Avoid reporting "SST file is ahead of WAL" on a newly created empty column family, if the previous WAL file is corrupted. -* Fixed a bug where `GetLiveFiles()` output included a non-existent file called "OPTIONS-000000". Backups and checkpoints, which use `GetLiveFiles()`, failed on DBs impacted by this bug. Read-write DBs were impacted when the latest OPTIONS file failed to write and `fail_if_options_file_error == false`. Read-only DBs were impacted when no OPTIONS files existed. -* Handle return code by io_uring_submit_and_wait() and io_uring_wait_cqe(). -* In the IngestExternalFile() API, only try to sync the ingested file if the file is linked and the FileSystem/Env supports reopening a writable file. -* Fixed a bug that `AdvancedColumnFamilyOptions.max_compaction_bytes` is under-calculated for manual compaction (`CompactRange()`). Manual compaction is split to multiple compactions if the compaction size exceed the `max_compaction_bytes`. The bug creates much larger compaction which size exceed the user setting. On the other hand, larger manual compaction size can increase the subcompaction parallelism, you can tune that by setting `max_compaction_bytes`. - -### Behavior Changes -* Due to the fix of false-postive alert of "SST file is ahead of WAL", all the CFs with no SST file (CF empty) will bypass the consistency check. We fixed a false-positive, but introduced a very rare true-negative which will be triggered in the following conditions: A CF with some delete operations in the last a few queries which will result in an empty CF (those are flushed to SST file and a compaction triggered which combines this file and all other SST files and generates an empty CF, or there is another reason to write a manifest entry for this CF after a flush that generates no SST file from an empty CF). The deletion entries are logged in a WAL and this WAL was corrupted, while the CF's log number points to the next WAL (due to the flush). Therefore, the DB can only recover to the point without these trailing deletions and cause the inconsistent DB status. - -### New Features -* Add new option allow_stall passed during instance creation of WriteBufferManager. When allow_stall is set, WriteBufferManager will stall all writers shared across multiple DBs and columns if memory usage goes beyond specified WriteBufferManager::buffer_size (soft limit). Stall will be cleared when memory is freed after flush and memory usage goes down below buffer_size. -* Allow `CompactionFilter`s to apply in more table file creation scenarios such as flush and recovery. For compatibility, `CompactionFilter`s by default apply during compaction. Users can customize this behavior by overriding `CompactionFilterFactory::ShouldFilterTableFileCreation()`. -* Added more fields to FilterBuildingContext with LSM details, for custom filter policies that vary behavior based on where they are in the LSM-tree. -* Added DB::Properties::kBlockCacheEntryStats for querying statistics on what percentage of block cache is used by various kinds of blocks, etc. using DB::GetProperty and DB::GetMapProperty. The same information is now dumped to info LOG periodically according to `stats_dump_period_sec`. -* Add an experimental Remote Compaction feature, which allows the user to run Compaction on a different host or process. The feature is still under development, currently only works on some basic use cases. The interface will be changed without backward/forward compatibility support. -* RocksDB would validate total entries read in flush, and compare with counter inserted into it. If flush_verify_memtable_count = true (default), flush will fail. Otherwise, only log to info logs. -* Add `TableProperties::num_filter_entries`, which can be used with `TableProperties::filter_size` to calculate the effective bits per filter entry (unique user key or prefix) for a table file. - -### Performance Improvements -* BlockPrefetcher is used by iterators to prefetch data if they anticipate more data to be used in future. It is enabled implicitly by rocksdb. Added change to take in account read pattern if reads are sequential. This would disable prefetching for random reads in MultiGet and iterators as readahead_size is increased exponential doing large prefetches. - -### Public API change -* Removed a parameter from TableFactory::NewTableBuilder, which should not be called by user code because TableBuilder is not a public API. -* Removed unused structure `CompactionFilterContext`. -* The `skip_filters` parameter to SstFileWriter is now considered deprecated. Use `BlockBasedTableOptions::filter_policy` to control generation of filters. -* ClockCache is known to have bugs that could lead to crash or corruption, so should not be used until fixed. Use NewLRUCache instead. -* Added a new pure virtual function `ApplyToAllEntries` to `Cache`, to replace `ApplyToAllCacheEntries`. Custom `Cache` implementations must add an implementation. Because this function is for gathering statistics, an empty implementation could be acceptable for some applications. -* Added the ObjectRegistry to the ConfigOptions class. This registry instance will be used to find any customizable loadable objects during initialization. -* Expanded the ObjectRegistry functionality to allow nested ObjectRegistry instances. Added methods to register a set of functions with the registry/library as a group. -* Deprecated backupable_db.h and BackupableDBOptions in favor of new versions with appropriate names: backup_engine.h and BackupEngineOptions. Old API compatibility is preserved. - -### Default Option Change -* When options.arena_block_size <= 0 (default value 0), still use writer_buffer_size / 8 but cap to 1MB. Too large alloation size might not be friendly to allocator and might cause performance issues in extreme cases. - -### Build -* By default, try to build with liburing. For make, if ROCKSDB_USE_IO_URING is not set, treat as enable, which means RocksDB will try to build with liburing. Users can disable it with ROCKSDB_USE_IO_URING=0. For cmake, add WITH_LIBURING to control it, with default on. - -## 6.20.0 (2021-04-16) -### Behavior Changes -* `ColumnFamilyOptions::sample_for_compression` now takes effect for creation of all block-based tables. Previously it only took effect for block-based tables created by flush. -* `CompactFiles()` can no longer compact files from lower level to up level, which has the risk to corrupt DB (details: #8063). The validation is also added to all compactions. -* Fixed some cases in which DB::OpenForReadOnly() could write to the filesystem. If you want a Logger with a read-only DB, you must now set DBOptions::info_log yourself, such as using CreateLoggerFromOptions(). -* get_iostats_context() will never return nullptr. If thread-local support is not available, and user does not opt-out iostats context, then compilation will fail. The same applies to perf context as well. -* Added support for WriteBatchWithIndex::NewIteratorWithBase when overwrite_key=false. Previously, this combination was not supported and would assert or return nullptr. -* Improve the behavior of WriteBatchWithIndex for Merge operations. Now more operations may be stored in order to return the correct merged result. - -### Bug Fixes -* Use thread-safe `strerror_r()` to get error messages. -* Fixed a potential hang in shutdown for a DB whose `Env` has high-pri thread pool disabled (`Env::GetBackgroundThreads(Env::Priority::HIGH) == 0`) -* Made BackupEngine thread-safe and added documentation comments to clarify what is safe for multiple BackupEngine objects accessing the same backup directory. -* Fixed crash (divide by zero) when compression dictionary is applied to a file containing only range tombstones. -* Fixed a backward iteration bug with partitioned filter enabled: not including the prefix of the last key of the previous filter partition in current filter partition can cause wrong iteration result. -* Fixed a bug that allowed `DBOptions::max_open_files` to be set with a non-negative integer with `ColumnFamilyOptions::compaction_style = kCompactionStyleFIFO`. - -### Performance Improvements -* On ARM platform, use `yield` instead of `wfe` to relax cpu to gain better performance. - -### Public API change -* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. -* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace -* Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`. -* Extend file_checksum_dump ldb command and DB::GetLiveFilesChecksumInfo API for IntegratedBlobDB and get checksum of blob files along with SST files. - -### New Features -* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true. -* Added BackupEngine support for integrated BlobDB, with blob files shared between backups when table files are shared. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up. -* Added an optional output parameter to BackupEngine::CreateNewBackup(WithMetadata) to return the BackupID of the new backup. -* Added BackupEngine::GetBackupInfo / GetLatestBackupInfo for querying individual backups. -* Made the Ribbon filter a long-term supported feature in terms of the SST schema(compatible with version >= 6.15.0) though the API for enabling it is expected to change. - -## 6.19.0 (2021-03-21) -### Bug Fixes -* Fixed the truncation error found in APIs/tools when dumping block-based SST files in a human-readable format. After fix, the block-based table can be fully dumped as a readable file. -* When hitting a write slowdown condition, no write delay (previously 1 millisecond) is imposed until `delayed_write_rate` is actually exceeded, with an initial burst allowance of 1 millisecond worth of bytes. Also, beyond the initial burst allowance, `delayed_write_rate` is now more strictly enforced, especially with multiple column families. - -### Public API change -* Changed default `BackupableDBOptions::share_files_with_checksum` to `true` and deprecated `false` because of potential for data loss. Note that accepting this change in behavior can temporarily increase backup data usage because files are not shared between backups using the two different settings. Also removed obsolete option kFlagMatchInterimNaming. -* Add a new option BlockBasedTableOptions::max_auto_readahead_size. RocksDB does auto-readahead for iterators on noticing more than two reads for a table file if user doesn't provide readahead_size. The readahead starts at 8KB and doubles on every additional read upto max_auto_readahead_size and now max_auto_readahead_size can be configured dynamically as well. Found that 256 KB readahead size provides the best performance, based on experiments, for auto readahead. Experiment data is in PR #3282. If value is set 0 then no automatic prefetching will be done by rocksdb. Also changing the value will only affect files opened after the change. -* Add suppport to extend DB::VerifyFileChecksums API to also verify blob files checksum. -* When using the new BlobDB, the amount of data written by flushes/compactions is now broken down into table files and blob files in the compaction statistics; namely, Write(GB) denotes the amount of data written to table files, while Wblob(GB) means the amount of data written to blob files. -* New default BlockBasedTableOptions::format_version=5 to enable new Bloom filter implementation by default, compatible with RocksDB versions >= 6.6.0. -* Add new SetBufferSize API to WriteBufferManager to allow dynamic management of memory allotted to all write buffers. This allows user code to adjust memory monitoring provided by WriteBufferManager as process memory needs change datasets grow and shrink. -* Clarified the required semantics of Read() functions in FileSystem and Env APIs. Please ensure any custom implementations are compliant. -* For the new integrated BlobDB implementation, compaction statistics now include the amount of data read from blob files during compaction (due to garbage collection or compaction filters). Write amplification metrics have also been extended to account for data read from blob files. -* Add EqualWithoutTimestamp() to Comparator. -* Extend support to track blob files in SSTFileManager whenever a blob file is created/deleted. Blob files will be scheduled to delete via SSTFileManager and SStFileManager will now take blob files in account while calculating size and space limits along with SST files. -* Add new Append and PositionedAppend API with checksum handoff to legacy Env. - -### New Features -* Support compaction filters for the new implementation of BlobDB. Add `FilterBlobByKey()` to `CompactionFilter`. Subclasses can override this method so that compaction filters can determine whether the actual blob value has to be read during compaction. Use a new `kUndetermined` in `CompactionFilter::Decision` to indicated that further action is necessary for compaction filter to make a decision. -* Add support to extend retrieval of checksums for blob files from the MANIFEST when checkpointing. During backup, rocksdb can detect corruption in blob files during file copies. -* Add new options for db_bench --benchmarks: flush, waitforcompaction, compact0, compact1. -* Add an option to BackupEngine::GetBackupInfo to include the name and size of each backed-up file. Especially in the presence of file sharing among backups, this offers detailed insight into backup space usage. -* Enable backward iteration on keys with user-defined timestamps. -* Add statistics and info log for error handler: counters for bg error, bg io error, bg retryable io error, auto resume count, auto resume total retry number, and auto resume sucess; Histogram for auto resume retry count in each recovery call. Note that, each auto resume attempt will have one or multiple retries. - -### Behavior Changes -* During flush, only WAL sync retryable IO error is mapped to hard error, which will stall the writes. When WAL is used but only SST file write has retryable IO error, it will be mapped to soft error and write will not be affected. - -## 6.18.0 (2021-02-19) -### Behavior Changes -* When retryable IO error occurs during compaction, it is mapped to soft error and set the BG error. However, auto resume is not called to clean the soft error since compaction will reschedule by itself. In this change, When retryable IO error occurs during compaction, BG error is not set. User will be informed the error via EventHelper. -* Introduce a new trace file format for query tracing and replay and trace file version is bump up to 0.2. A payload map is added as the first portion of the payload. We will not have backward compatible issues when adding new entries to trace records. Added the iterator_upper_bound and iterator_lower_bound in Seek and SeekForPrev tracing function. Added them as the new payload member for iterator tracing. - -### New Features -* Add support for key-value integrity protection in live updates from the user buffers provided to `WriteBatch` through the write to RocksDB's in-memory update buffer (memtable). This is intended to detect some cases of in-memory data corruption, due to either software or hardware errors. Users can enable protection by constructing their `WriteBatch` with `protection_bytes_per_key == 8`. -* Add support for updating `full_history_ts_low` option in manual compaction, which is for old timestamp data GC. -* Add a mechanism for using Makefile to build external plugin code into the RocksDB libraries/binaries. This intends to simplify compatibility and distribution for plugins (e.g., special-purpose `FileSystem`s) whose source code resides outside the RocksDB repo. See "plugin/README.md" for developer details, and "PLUGINS.md" for a listing of available plugins. -* Added memory pre-fetching for experimental Ribbon filter, which especially optimizes performance with batched MultiGet. -* A new, experimental version of BlobDB (key-value separation) is now available. The new implementation is integrated into the RocksDB core, i.e. it is accessible via the usual `rocksdb::DB` API, as opposed to the separate `rocksdb::blob_db::BlobDB` interface used by the earlier version, and can be configured on a per-column family basis using the configuration options `enable_blob_files`, `min_blob_size`, `blob_file_size`, `blob_compression_type`, `enable_blob_garbage_collection`, and `blob_garbage_collection_age_cutoff`. It extends RocksDB's consistency guarantees to blobs, and offers more features and better performance. Note that some features, most notably `Merge`, compaction filters, and backup/restore are not yet supported, and there is no support for migrating a database created by the old implementation. - -### Bug Fixes -* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details. -* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. -* Fix `WRITE_PREPARED`, `WRITE_UNPREPARED` TransactionDB `MultiGet()` may return uncommitted data with snapshot. -* In DB::OpenForReadOnly, if any error happens while checking Manifest file path, it was overridden by Status::NotFound. It has been fixed and now actual error is returned. - -### Public API Change -* Added a "only_mutable_options" flag to the ConfigOptions. When this flag is "true", the Configurable functions and convenience methods (such as GetDBOptionsFromString) will only deal with options that are marked as mutable. When this flag is true, only options marked as mutable can be configured (a Status::InvalidArgument will be returned) and options not marked as mutable will not be returned or compared. The default is "false", meaning to compare all options. -* Add new Append and PositionedAppend APIs to FileSystem to bring the data verification information (data checksum information) from upper layer (e.g., WritableFileWriter) to the storage layer. In this way, the customized FileSystem is able to verify the correctness of data being written to the storage on time. Add checksum_handoff_file_types to DBOptions. User can use this option to control which file types (Currently supported file tyes: kWALFile, kTableFile, kDescriptorFile.) should use the new Append and PositionedAppend APIs to handoff the verification information. Currently, RocksDB only use crc32c to calculate the checksum for write handoff. -* Add an option, `CompressionOptions::max_dict_buffer_bytes`, to limit the in-memory buffering for selecting samples for generating/training a dictionary. The limit is currently loosely adhered to. - - -## 6.17.0 (2021-01-15) -### Behavior Changes -* When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. -* Since RocksDB does not continue write the same file if a file write fails for any reason, the file scope write IO error is treated the same as retryable IO error. More information about error handling of file scope IO error is included in `ErrorHandler::SetBGError`. - -### Bug Fixes -* Version older than 6.15 cannot decode VersionEdits `WalAddition` and `WalDeletion`, fixed this by changing the encoded format of them to be ignorable by older versions. -* Fix a race condition between DB startups and shutdowns in managing the periodic background worker threads. One effect of this race condition could be the process being terminated. - -### Public API Change -* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks. -* Add a SystemClock class that contains the time-related methods from Env. The original methods in Env may be deprecated in a future release. This class will allow easier testing, development, and expansion of time-related features. -* Add a public API GetRocksBuildProperties and GetRocksBuildInfoAsString to get properties about the current build. These properties may include settings related to the GIT settings (branch, timestamp). This change also sets the "build date" based on the GIT properties, rather than the actual build time, thereby enabling more reproducible builds. - -## 6.16.0 (2020-12-18) -### Behavior Changes -* Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. - -### Bug Fixes -* Truncated WALs ending in incomplete records can no longer produce gaps in the recovered data when `WALRecoveryMode::kPointInTimeRecovery` is used. Gaps are still possible when WALs are truncated exactly on record boundaries; for complete protection, users should enable `track_and_verify_wals_in_manifest`. -* Fix a bug where compressed blocks read by MultiGet are not inserted into the compressed block cache when use_direct_reads = true. -* Fixed the issue of full scanning on obsolete files when there are too many outstanding compactions with ConcurrentTaskLimiter enabled. -* Fixed the logic of populating native data structure for `read_amp_bytes_per_bit` during OPTIONS file parsing on big-endian architecture. Without this fix, original code introduced in PR7659, when running on big-endian machine, can mistakenly store read_amp_bytes_per_bit (an uint32) in little endian format. Future access to `read_amp_bytes_per_bit` will give wrong values. Little endian architecture is not affected. -* Fixed prefix extractor with timestamp issues. -* Fixed a bug in atomic flush: in two-phase commit mode, the minimum WAL log number to keep is incorrect. -* Fixed a bug related to checkpoint in PR7789: if there are multiple column families, and the checkpoint is not opened as read only, then in rare cases, data loss may happen in the checkpoint. Since backup engine relies on checkpoint, it may also be affected. -* When ldb --try_load_options is used with the --column_family option, the ColumnFamilyOptions for the specified column family was not loaded from the OPTIONS file. Fix it so its loaded from OPTIONS and then overridden with command line overrides. - -### New Features -* User defined timestamp feature supports `CompactRange` and `GetApproximateSizes`. -* Support getting aggregated table properties (kAggregatedTableProperties and kAggregatedTablePropertiesAtLevel) with DB::GetMapProperty, for easier access to the data in a structured format. -* Experimental option BlockBasedTableOptions::optimize_filters_for_memory now works with experimental Ribbon filter (as well as Bloom filter). - -### Public API Change -* Deprecated public but rarely-used FilterBitsBuilder::CalculateNumEntry, which is replaced with ApproximateNumEntries taking a size_t parameter and returning size_t. -* To improve portability the functions `Env::GetChildren` and `Env::GetChildrenFileAttributes` will no longer return entries for the special directories `.` or `..`. -* Added a new option `track_and_verify_wals_in_manifest`. If `true`, the log numbers and sizes of the synced WALs are tracked in MANIFEST, then during DB recovery, if a synced WAL is missing from disk, or the WAL's size does not match the recorded size in MANIFEST, an error will be reported and the recovery will be aborted. Note that this option does not work with secondary instance. -* `rocksdb_approximate_sizes` and `rocksdb_approximate_sizes_cf` in the C API now requires an error pointer (`char** errptr`) for receiving any error. -* All overloads of DB::GetApproximateSizes now return Status, so that any failure to obtain the sizes is indicated to the caller. - -## 6.15.0 (2020-11-13) -### Bug Fixes -* Fixed a bug in the following combination of features: indexes with user keys (`format_version >= 3`), indexes are partitioned (`index_type == kTwoLevelIndexSearch`), and some index partitions are pinned in memory (`BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache`). The bug could cause keys to be truncated when read from the index leading to wrong read results or other unexpected behavior. -* Fixed a bug when indexes are partitioned (`index_type == kTwoLevelIndexSearch`), some index partitions are pinned in memory (`BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache`), and partitions reads could be mixed between block cache and directly from the file (e.g., with `enable_index_compression == 1` and `mmap_read == 1`, partitions that were stored uncompressed due to poor compression ratio would be read directly from the file via mmap, while partitions that were stored compressed would be read from block cache). The bug could cause index partitions to be mistakenly considered empty during reads leading to wrong read results. -* Since 6.12, memtable lookup should report unrecognized value_type as corruption (#7121). -* Since 6.14, fix false positive flush/compaction `Status::Corruption` failure when `paranoid_file_checks == true` and range tombstones were written to the compaction output files. -* Since 6.14, fix a bug that could cause a stalled write to crash with mixed of slowdown and no_slowdown writes (`WriteOptions.no_slowdown=true`). -* Fixed a bug which causes hang in closing DB when refit level is set in opt build. It was because ContinueBackgroundWork() was called in assert statement which is a no op. It was introduced in 6.14. -* Fixed a bug which causes Get() to return incorrect result when a key's merge operand is applied twice. This can occur if the thread performing Get() runs concurrently with a background flush thread and another thread writing to the MANIFEST file (PR6069). -* Reverted a behavior change silently introduced in 6.14.2, in which the effects of the `ignore_unknown_options` flag (used in option parsing/loading functions) changed. -* Reverted a behavior change silently introduced in 6.14, in which options parsing/loading functions began returning `NotFound` instead of `InvalidArgument` for option names not available in the present version. -* Fixed MultiGet bugs it doesn't return valid data with user defined timestamp. -* Fixed a potential bug caused by evaluating `TableBuilder::NeedCompact()` before `TableBuilder::Finish()` in compaction job. For example, the `NeedCompact()` method of `CompactOnDeletionCollector` returned by built-in `CompactOnDeletionCollectorFactory` requires `BlockBasedTable::Finish()` to return the correct result. The bug can cause a compaction-generated file not to be marked for future compaction based on deletion ratio. -* Fixed a seek issue with prefix extractor and timestamp. -* Fixed a bug of encoding and parsing BlockBasedTableOptions::read_amp_bytes_per_bit as a 64-bit integer. -* Fixed a bug of a recovery corner case, details in PR7621. - -### Public API Change -* Deprecate `BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache` and `BlockBasedTableOptions::pin_top_level_index_and_filter`. These options still take effect until users migrate to the replacement APIs in `BlockBasedTableOptions::metadata_cache_options`. Migration guidance can be found in the API comments on the deprecated options. -* Add new API `DB::VerifyFileChecksums` to verify SST file checksum with corresponding entries in the MANIFEST if present. Current implementation requires scanning and recomputing file checksums. - -### Behavior Changes -* The dictionary compression settings specified in `ColumnFamilyOptions::compression_opts` now additionally affect files generated by flush and compaction to non-bottommost level. Previously those settings at most affected files generated by compaction to bottommost level, depending on whether `ColumnFamilyOptions::bottommost_compression_opts` overrode them. Users who relied on dictionary compression settings in `ColumnFamilyOptions::compression_opts` affecting only the bottommost level can keep the behavior by moving their dictionary settings to `ColumnFamilyOptions::bottommost_compression_opts` and setting its `enabled` flag. -* When the `enabled` flag is set in `ColumnFamilyOptions::bottommost_compression_opts`, those compression options now take effect regardless of the value in `ColumnFamilyOptions::bottommost_compression`. Previously, those compression options only took effect when `ColumnFamilyOptions::bottommost_compression != kDisableCompressionOption`. Now, they additionally take effect when `ColumnFamilyOptions::bottommost_compression == kDisableCompressionOption` (such a setting causes bottommost compression type to fall back to `ColumnFamilyOptions::compression_per_level` if configured, and otherwise fall back to `ColumnFamilyOptions::compression`). - -### New Features -* An EXPERIMENTAL new Bloom alternative that saves about 30% space compared to Bloom filters, with about 3-4x construction time and similar query times is available using NewExperimentalRibbonFilterPolicy. - -## 6.14 (2020-10-09) -### Bug fixes -* Fixed a bug after a `CompactRange()` with `CompactRangeOptions::change_level` set fails due to a conflict in the level change step, which caused all subsequent calls to `CompactRange()` with `CompactRangeOptions::change_level` set to incorrectly fail with a `Status::NotSupported("another thread is refitting")` error. -* Fixed a bug that the bottom most level compaction could still be a trivial move even if `BottommostLevelCompaction.kForce` or `kForceOptimized` is set. - -### Public API Change -* The methods to create and manage EncrypedEnv have been changed. The EncryptionProvider is now passed to NewEncryptedEnv as a shared pointer, rather than a raw pointer. Comparably, the CTREncryptedProvider now takes a shared pointer, rather than a reference, to a BlockCipher. CreateFromString methods have been added to BlockCipher and EncryptionProvider to provide a single API by which different ciphers and providers can be created, respectively. -* The internal classes (CTREncryptionProvider, ROT13BlockCipher, CTRCipherStream) associated with the EncryptedEnv have been moved out of the public API. To create a CTREncryptionProvider, one can either use EncryptionProvider::NewCTRProvider, or EncryptionProvider::CreateFromString("CTR"). To create a new ROT13BlockCipher, one can either use BlockCipher::NewROT13Cipher or BlockCipher::CreateFromString("ROT13"). -* The EncryptionProvider::AddCipher method has been added to allow keys to be added to an EncryptionProvider. This API will allow future providers to support multiple cipher keys. -* Add a new option "allow_data_in_errors". When this new option is set by users, it allows users to opt-in to get error messages containing corrupted keys/values. Corrupt keys, values will be logged in the messages, logs, status etc. that will help users with the useful information regarding affected data. By default value of this option is set false to prevent users data to be exposed in the messages so currently, data will be redacted from logs, messages, status by default. -* AdvancedColumnFamilyOptions::force_consistency_checks is now true by default, for more proactive DB corruption detection at virtually no cost (estimated two extra CPU cycles per million on a major production workload). Corruptions reported by these checks now mention "force_consistency_checks" in case a false positive corruption report is suspected and the option needs to be disabled (unlikely). Since existing column families have a saved setting for force_consistency_checks, only new column families will pick up the new default. - -### General Improvements -* The settings of the DBOptions and ColumnFamilyOptions are now managed by Configurable objects (see New Features). The same convenience methods to configure these options still exist but the backend implementation has been unified under a common implementation. - -### New Features - -* Methods to configure serialize, and compare -- such as TableFactory -- are exposed directly through the Configurable base class (from which these objects inherit). This change will allow for better and more thorough configuration management and retrieval in the future. The options for a Configurable object can be set via the ConfigureFromMap, ConfigureFromString, or ConfigureOption method. The serialized version of the options of an object can be retrieved via the GetOptionString, ToString, or GetOption methods. The list of options supported by an object can be obtained via the GetOptionNames method. The "raw" object (such as the BlockBasedTableOption) for an option may be retrieved via the GetOptions method. Configurable options can be compared via the AreEquivalent method. The settings within a Configurable object may be validated via the ValidateOptions method. The object may be intialized (at which point only mutable options may be updated) via the PrepareOptions method. -* Introduce options.check_flush_compaction_key_order with default value to be true. With this option, during flush and compaction, key order will be checked when writing to each SST file. If the order is violated, the flush or compaction will fail. -* Added is_full_compaction to CompactionJobStats, so that the information is available through the EventListener interface. -* Add more stats for MultiGet in Histogram to get number of data blocks, index blocks, filter blocks and sst files read from file system per level. -* SST files have a new table property called db_host_id, which is set to the hostname by default. A new option in DBOptions, db_host_id, allows the property value to be overridden with a user specified string, or disable it completely by making the option string empty. -* Methods to create customizable extensions -- such as TableFactory -- are exposed directly through the Customizable base class (from which these objects inherit). This change will allow these Customizable classes to be loaded and configured in a standard way (via CreateFromString). More information on how to write and use Customizable classes is in the customizable.h header file. - -## 6.13 (2020-09-12) -### Bug fixes -* Fix a performance regression introduced in 6.4 that makes a upper bound check for every Next() even if keys are within a data block that is within the upper bound. -* Fix a possible corruption to the LSM state (overlapping files within a level) when a `CompactRange()` for refitting levels (`CompactRangeOptions::change_level == true`) and another manual compaction are executed in parallel. -* Sanitize `recycle_log_file_num` to zero when the user attempts to enable it in combination with `WALRecoveryMode::kTolerateCorruptedTailRecords`. Previously the two features were allowed together, which compromised the user's configured crash-recovery guarantees. -* Fix a bug where a level refitting in CompactRange() might race with an automatic compaction that puts the data to the target level of the refitting. The bug has been there for years. -* Fixed a bug in version 6.12 in which BackupEngine::CreateNewBackup could fail intermittently with non-OK status when backing up a read-write DB configured with a DBOptions::file_checksum_gen_factory. -* Fix useless no-op compactions scheduled upon snapshot release when options.disable-auto-compactions = true. -* Fix a bug when max_write_buffer_size_to_maintain is set, immutable flushed memtable destruction is delayed until the next super version is installed. A memtable is not added to delete list because of its reference hold by super version and super version doesn't switch because of empt delete list. So memory usage keeps on increasing beyond write_buffer_size + max_write_buffer_size_to_maintain. -* Avoid converting MERGES to PUTS when allow_ingest_behind is true. -* Fix compression dictionary sampling together with `SstFileWriter`. Previously, the dictionary would be trained/finalized immediately with zero samples. Now, the whole `SstFileWriter` file is buffered in memory and then sampled. -* Fix a bug with `avoid_unnecessary_blocking_io=1` and creating backups (BackupEngine::CreateNewBackup) or checkpoints (Checkpoint::Create). With this setting and WAL enabled, these operations could randomly fail with non-OK status. -* Fix a bug in which bottommost compaction continues to advance the underlying InternalIterator to skip tombstones even after shutdown. - -### New Features -* A new field `std::string requested_checksum_func_name` is added to `FileChecksumGenContext`, which enables the checksum factory to create generators for a suite of different functions. -* Added a new subcommand, `ldb unsafe_remove_sst_file`, which removes a lost or corrupt SST file from a DB's metadata. This command involves data loss and must not be used on a live DB. - -### Performance Improvements -* Reduce thread number for multiple DB instances by re-using one global thread for statistics dumping and persisting. -* Reduce write-amp in heavy write bursts in `kCompactionStyleLevel` compaction style with `level_compaction_dynamic_level_bytes` set. -* BackupEngine incremental backups no longer read DB table files that are already saved to a shared part of the backup directory, unless `share_files_with_checksum` is used with `kLegacyCrc32cAndFileSize` naming (discouraged). - * For `share_files_with_checksum`, we are confident there is no regression (vs. pre-6.12) in detecting DB or backup corruption at backup creation time, mostly because the old design did not leverage this extra checksum computation for detecting inconsistencies at backup creation time. - * For `share_table_files` without "checksum" (not recommended), there is a regression in detecting fundamentally unsafe use of the option, greatly mitigated by file size checking (under "Behavior Changes"). Almost no reason to use `share_files_with_checksum=false` should remain. - * `DB::VerifyChecksum` and `BackupEngine::VerifyBackup` with checksum checking are still able to catch corruptions that `CreateNewBackup` does not. - -### Public API Change -* Expose kTypeDeleteWithTimestamp in EntryType and update GetEntryType() accordingly. -* Added file_checksum and file_checksum_func_name to TableFileCreationInfo, which can pass the table file checksum information through the OnTableFileCreated callback during flush and compaction. -* A warning is added to `DB::DeleteFile()` API describing its known problems and deprecation plan. -* Add a new stats level, i.e. StatsLevel::kExceptTickers (PR7329) to exclude tickers even if application passes a non-null Statistics object. -* Added a new status code IOStatus::IOFenced() for the Env/FileSystem to indicate that writes from this instance are fenced off. Like any other background error, this error is returned to the user in Put/Merge/Delete/Flush calls and can be checked using Status::IsIOFenced(). - -### Behavior Changes -* File abstraction `FSRandomAccessFile.Prefetch()` default return status is changed from `OK` to `NotSupported`. If the user inherited file doesn't implement prefetch, RocksDB will create internal prefetch buffer to improve read performance. -* When retryabel IO error happens during Flush (manifest write error is excluded) and WAL is disabled, originally it is mapped to kHardError. Now,it is mapped to soft error. So DB will not stall the writes unless the memtable is full. At the same time, when auto resume is triggered to recover the retryable IO error during Flush, SwitchMemtable is not called to avoid generating to many small immutable memtables. If WAL is enabled, no behavior changes. -* When considering whether a table file is already backed up in a shared part of backup directory, BackupEngine would already query the sizes of source (DB) and pre-existing destination (backup) files. BackupEngine now uses these file sizes to detect corruption, as at least one of (a) old backup, (b) backup in progress, or (c) current DB is corrupt if there's a size mismatch. - -### Others -* Error in prefetching partitioned index blocks will not be swallowed. It will fail the query and return the IOError users. - -## 6.12 (2020-07-28) -### Public API Change -* Encryption file classes now exposed for inheritance in env_encryption.h -* File I/O listener is extended to cover more I/O operations. Now class `EventListener` in listener.h contains new callback functions: `OnFileFlushFinish()`, `OnFileSyncFinish()`, `OnFileRangeSyncFinish()`, `OnFileTruncateFinish()`, and ``OnFileCloseFinish()``. -* `FileOperationInfo` now reports `duration` measured by `std::chrono::steady_clock` and `start_ts` measured by `std::chrono::system_clock` instead of start and finish timestamps measured by `system_clock`. Note that `system_clock` is called before `steady_clock` in program order at operation starts. -* `DB::GetDbSessionId(std::string& session_id)` is added. `session_id` stores a unique identifier that gets reset every time the DB is opened. This DB session ID should be unique among all open DB instances on all hosts, and should be unique among re-openings of the same or other DBs. This identifier is recorded in the LOG file on the line starting with "DB Session ID:". -* `DB::OpenForReadOnly()` now returns `Status::NotFound` when the specified DB directory does not exist. Previously the error returned depended on the underlying `Env`. This change is available in all 6.11 releases as well. -* A parameter `verify_with_checksum` is added to `BackupEngine::VerifyBackup`, which is false by default. If it is ture, `BackupEngine::VerifyBackup` verifies checksums and file sizes of backup files. Pass `false` for `verify_with_checksum` to maintain the previous behavior and performance of `BackupEngine::VerifyBackup`, by only verifying sizes of backup files. - -### Behavior Changes -* Best-efforts recovery ignores CURRENT file completely. If CURRENT file is missing during recovery, best-efforts recovery still proceeds with MANIFEST file(s). -* In best-efforts recovery, an error that is not Corruption or IOError::kNotFound or IOError::kPathNotFound will be overwritten silently. Fix this by checking all non-ok cases and return early. -* When `file_checksum_gen_factory` is set to `GetFileChecksumGenCrc32cFactory()`, BackupEngine will compare the crc32c checksums of table files computed when creating a backup to the expected checksums stored in the DB manifest, and will fail `CreateNewBackup()` on mismatch (corruption). If the `file_checksum_gen_factory` is not set or set to any other customized factory, there is no checksum verification to detect if SST files in a DB are corrupt when read, copied, and independently checksummed by BackupEngine. -* When a DB sets `stats_dump_period_sec > 0`, either as the initial value for DB open or as a dynamic option change, the first stats dump is staggered in the following X seconds, where X is an integer in `[0, stats_dump_period_sec)`. Subsequent stats dumps are still spaced `stats_dump_period_sec` seconds apart. -* When the paranoid_file_checks option is true, a hash is generated of all keys and values are generated when the SST file is written, and then the values are read back in to validate the file. A corruption is signaled if the two hashes do not match. - -### Bug fixes -* Compressed block cache was automatically disabled with read-only DBs by mistake. Now it is fixed: compressed block cache will be in effective with read-only DB too. -* Fix a bug of wrong iterator result if another thread finishes an update and a DB flush between two statement. -* Disable file deletion after MANIFEST write/sync failure until db re-open or Resume() so that subsequent re-open will not see MANIFEST referencing deleted SSTs. -* Fix a bug when index_type == kTwoLevelIndexSearch in PartitionedIndexBuilder to update FlushPolicy to point to internal key partitioner when it changes from user-key mode to internal-key mode in index partition. -* Make compaction report InternalKey corruption while iterating over the input. -* Fix a bug which may cause MultiGet to be slow because it may read more data than requested, but this won't affect correctness. The bug was introduced in 6.10 release. -* Fail recovery and report once hitting a physical log record checksum mismatch, while reading MANIFEST. RocksDB should not continue processing the MANIFEST any further. -* Fixed a bug in size-amp-triggered and periodic-triggered universal compaction, where the compression settings for the first input level were used rather than the compression settings for the output (bottom) level. - -### New Features -* DB identity (`db_id`) and DB session identity (`db_session_id`) are added to table properties and stored in SST files. SST files generated from SstFileWriter and Repairer have DB identity “SST Writer” and “DB Repairer”, respectively. Their DB session IDs are generated in the same way as `DB::GetDbSessionId`. The session ID for SstFileWriter (resp., Repairer) resets every time `SstFileWriter::Open` (resp., `Repairer::Run`) is called. -* Added experimental option BlockBasedTableOptions::optimize_filters_for_memory for reducing allocated memory size of Bloom filters (~10% savings with Jemalloc) while preserving the same general accuracy. To have an effect, the option requires format_version=5 and malloc_usable_size. Enabling this option is forward and backward compatible with existing format_version=5. -* `BackupableDBOptions::share_files_with_checksum_naming` is added with new default behavior for naming backup files with `share_files_with_checksum`, to address performance and backup integrity issues. See API comments for details. -* Added auto resume function to automatically recover the DB from background Retryable IO Error. When retryable IOError happens during flush and WAL write, the error is mapped to Hard Error and DB will be in read mode. When retryable IO Error happens during compaction, the error will be mapped to Soft Error. DB is still in write/read mode. Autoresume function will create a thread for a DB to call DB->ResumeImpl() to try the recover for Retryable IO Error during flush and WAL write. Compaction will be rescheduled by itself if retryable IO Error happens. Auto resume may also cause other Retryable IO Error during the recovery, so the recovery will fail. Retry the auto resume may solve the issue, so we use max_bgerror_resume_count to decide how many resume cycles will be tried in total. If it is <=0, auto resume retryable IO Error is disabled. Default is INT_MAX, which will lead to a infinit auto resume. bgerror_resume_retry_interval decides the time interval between two auto resumes. -* Option `max_subcompactions` can be set dynamically using DB::SetDBOptions(). -* Added experimental ColumnFamilyOptions::sst_partitioner_factory to define determine the partitioning of sst files. This helps compaction to split the files on interesting boundaries (key prefixes) to make propagation of sst files less write amplifying (covering the whole key space). - -### Performance Improvements -* Eliminate key copies for internal comparisons while accessing ingested block-based tables. -* Reduce key comparisons during random access in all block-based tables. -* BackupEngine avoids unnecessary repeated checksum computation for backing up a table file to the `shared_checksum` directory when using `share_files_with_checksum_naming = kUseDbSessionId` (new default), except on SST files generated before this version of RocksDB, which fall back on using `kLegacyCrc32cAndFileSize`. - -## 6.11 (2020-06-12) -### Bug Fixes -* Fix consistency checking error swallowing in some cases when options.force_consistency_checks = true. -* Fix possible false NotFound status from batched MultiGet using index type kHashSearch. -* Fix corruption caused by enabling delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode, along with parallel compactions. The bug can result in two parallel compactions picking the same input files, resulting in the DB resurrecting older and deleted versions of some keys. -* Fix a use-after-free bug in best-efforts recovery. column_family_memtables_ needs to point to valid ColumnFamilySet. -* Let best-efforts recovery ignore corrupted files during table loading. -* Fix corrupt key read from ingested file when iterator direction switches from reverse to forward at a key that is a prefix of another key in the same file. It is only possible in files with a non-zero global seqno. -* Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size. -* Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile(). -* Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch). -* Fix sst_dump to return non-zero exit code if the specified file is not a recognized SST file or fails requested checks. -* Fix incorrect results from batched MultiGet for duplicate keys, when the duplicate key matches the largest key of an SST file and the value type for the key in the file is a merge value. - -### Public API Change -* Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request. -* BlobDB now explicitly disallows using the default column family's storage directories as blob directory. -* DeleteRange now returns `Status::InvalidArgument` if the range's end key comes before its start key according to the user comparator. Previously the behavior was undefined. -* ldb now uses options.force_consistency_checks = true by default and "--disable_consistency_checks" is added to disable it. -* DB::OpenForReadOnly no longer creates files or directories if the named DB does not exist, unless create_if_missing is set to true. -* The consistency checks that validate LSM state changes (table file additions/deletions during flushes and compactions) are now stricter, more efficient, and no longer optional, i.e. they are performed even if `force_consistency_checks` is `false`. -* Disable delete triggered compaction (NewCompactOnDeletionCollectorFactory) in universal compaction mode and num_levels = 1 in order to avoid a corruption bug. -* `pin_l0_filter_and_index_blocks_in_cache` no longer applies to L0 files larger than `1.5 * write_buffer_size` to give more predictable memory usage. Such L0 files may exist due to intra-L0 compaction, external file ingestion, or user dynamically changing `write_buffer_size` (note, however, that files that are already pinned will continue being pinned, even after such a dynamic change). -* In point-in-time wal recovery mode, fail database recovery in case of IOError while reading the WAL to avoid data loss. -* A new method `Env::LowerThreadPoolCPUPriority(Priority, CpuPriority)` is added to `Env` to be able to lower to a specific priority such as `CpuPriority::kIdle`. - -### New Features -* sst_dump to add a new --readahead_size argument. Users can specify read size when scanning the data. Sst_dump also tries to prefetch tail part of the SST files so usually some number of I/Os are saved there too. -* Generate file checksum in SstFileWriter if Options.file_checksum_gen_factory is set. The checksum and checksum function name are stored in ExternalSstFileInfo after the sst file write is finished. -* Add a value_size_soft_limit in read options which limits the cumulative value size of keys read in batches in MultiGet. Once the cumulative value size of found keys exceeds read_options.value_size_soft_limit, all the remaining keys are returned with status Abort without further finding their values. By default the value_size_soft_limit is std::numeric_limits::max(). -* Enable SST file ingestion with file checksum information when calling IngestExternalFiles(const std::vector& args). Added files_checksums and files_checksum_func_names to IngestExternalFileArg such that user can ingest the sst files with their file checksum information. Added verify_file_checksum to IngestExternalFileOptions (default is True). To be backward compatible, if DB does not enable file checksum or user does not provide checksum information (vectors of files_checksums and files_checksum_func_names are both empty), verification of file checksum is always sucessful. If DB enables file checksum, DB will always generate the checksum for each ingested SST file during Prepare stage of ingestion and store the checksum in Manifest, unless verify_file_checksum is False and checksum information is provided by the application. In this case, we only verify the checksum function name and directly store the ingested checksum in Manifest. If verify_file_checksum is set to True, DB will verify the ingested checksum and function name with the genrated ones. Any mismatch will fail the ingestion. Note that, if IngestExternalFileOptions::write_global_seqno is True, the seqno will be changed in the ingested file. Therefore, the checksum of the file will be changed. In this case, a new checksum will be generated after the seqno is updated and be stored in the Manifest. - -### Performance Improvements -* Eliminate redundant key comparisons during random access in block-based tables. - -## 6.10 (2020-05-02) -### Bug Fixes -* Fix wrong result being read from ingested file. May happen when a key in the file happen to be prefix of another key also in the file. The issue can further cause more data corruption. The issue exists with rocksdb >= 5.0.0 since DB::IngestExternalFile() was introduced. -* Finish implementation of BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey. It's now ready for use. Significantly reduces read amplification in some setups, especially for iterator seeks. -* Fix a bug by updating CURRENT file so that it points to the correct MANIFEST file after best-efforts recovery. -* Fixed a bug where ColumnFamilyHandle objects were not cleaned up in case an error happened during BlobDB's open after the base DB had been opened. -* Fix a potential undefined behavior caused by trying to dereference nullable pointer (timestamp argument) in DB::MultiGet. -* Fix a bug caused by not including user timestamp in MultiGet LookupKey construction. This can lead to wrong query result since the trailing bytes of a user key, if not shorter than timestamp, will be mistaken for user timestamp. -* Fix a bug caused by using wrong compare function when sorting the input keys of MultiGet with timestamps. -* Upgraded version of bzip library (1.0.6 -> 1.0.8) used with RocksJava to address potential vulnerabilities if an attacker can manipulate compressed data saved and loaded by RocksDB (not normal). See issue #6703. - -### Public API Change -* Add a ConfigOptions argument to the APIs dealing with converting options to and from strings and files. The ConfigOptions is meant to replace some of the options (such as input_strings_escaped and ignore_unknown_options) and allow for more parameters to be passed in the future without changing the function signature. -* Add NewFileChecksumGenCrc32cFactory to the file checksum public API, such that the builtin Crc32c based file checksum generator factory can be used by applications. -* Add IsDirectory to Env and FS to indicate if a path is a directory. - -### New Features -* Added support for pipelined & parallel compression optimization for `BlockBasedTableBuilder`. This optimization makes block building, block compression and block appending a pipeline, and uses multiple threads to accelerate block compression. Users can set `CompressionOptions::parallel_threads` greater than 1 to enable compression parallelism. This feature is experimental for now. -* Provide an allocator for memkind to be used with block cache. This is to work with memory technologies (Intel DCPMM is one such technology currently available) that require different libraries for allocation and management (such as PMDK and memkind). The high capacities available make it possible to provision large caches (up to several TBs in size) beyond what is achievable with DRAM. -* Option `max_background_flushes` can be set dynamically using DB::SetDBOptions(). -* Added functionality in sst_dump tool to check the compressed file size for different compression levels and print the time spent on compressing files with each compression type. Added arguments `--compression_level_from` and `--compression_level_to` to report size of all compression levels and one compression_type must be specified with it so that it will report compressed sizes of one compression type with different levels. -* Added statistics for redundant insertions into block cache: rocksdb.block.cache.*add.redundant. (There is currently no coordination to ensure that only one thread loads a table block when many threads are trying to access that same table block.) - -### Bug Fixes -* Fix a bug when making options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts dynamically changeable: the modified values are not written to option files or returned back to users when being queried. -* Fix a bug where index key comparisons were unaccounted in `PerfContext::user_key_comparison_count` for lookups in files written with `format_version >= 3`. -* Fix many bloom.filter statistics not being updated in batch MultiGet. - -### Performance Improvements -* Improve performance of batch MultiGet with partitioned filters, by sharing block cache lookups to applicable filter blocks. -* Reduced memory copies when fetching and uncompressing compressed blocks from sst files. - -## 6.9.0 (2020-03-29) -### Behavior changes -* Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly. - -### Public API Change -* Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. -* Updated default format_version in BlockBasedTableOptions from 2 to 4. SST files generated with the new default can be read by RocksDB versions 5.16 and newer, and use more efficient encoding of keys in index blocks. -* A new parameter `CreateBackupOptions` is added to both `BackupEngine::CreateNewBackup` and `BackupEngine::CreateNewBackupWithMetadata`, you can decrease CPU priority of `BackupEngine`'s background threads by setting `decrease_background_thread_cpu_priority` and `background_thread_cpu_priority` in `CreateBackupOptions`. -* Updated the public API of SST file checksum. Introduce the FileChecksumGenFactory to create the FileChecksumGenerator for each SST file, such that the FileChecksumGenerator is not shared and it can be more general for checksum implementations. Changed the FileChecksumGenerator interface from Value, Extend, and GetChecksum to Update, Finalize, and GetChecksum. Finalize should be only called once after all data is processed to generate the final checksum. Temproal data should be maintained by the FileChecksumGenerator object itself and finally it can return the checksum string. - -### Bug Fixes -* Fix a bug where range tombstone blocks in ingested files were cached incorrectly during ingestion. If range tombstones were read from those incorrectly cached blocks, the keys they covered would be exposed. -* Fix a data race that might cause crash when calling DB::GetCreationTimeOfOldestFile() by a small chance. The bug was introduced in 6.6 Release. -* Fix a bug where a boolean value optimize_filters_for_hits was for max threads when calling load table handles after a flush or compaction. The value is correct to 1. The bug should not cause user visible problems. -* Fix a bug which might crash the service when write buffer manager fails to insert the dummy handle to the block cache. - -### Performance Improvements -* In CompactRange, for levels starting from 0, if the level does not have any file with any key falling in the specified range, the level is skipped. So instead of always compacting from level 0, the compaction starts from the first level with keys in the specified range until the last such level. -* Reduced memory copy when reading sst footer and blobdb in direct IO mode. -* When restarting a database with large numbers of sst files, large amount of CPU time is spent on getting logical block size of the sst files, which slows down the starting progress, this inefficiency is optimized away with an internal cache for the logical block sizes. - -### New Features -* Basic support for user timestamp in iterator. Seek/SeekToFirst/Next and lower/upper bounds are supported. Reverse iteration is not supported. Merge is not considered. -* When file lock failure when the lock is held by the current process, return acquiring time and thread ID in the error message. -* Added a new option, best_efforts_recovery (default: false), to allow database to open in a db dir with missing table files. During best efforts recovery, missing table files are ignored, and database recovers to the most recent state without missing table file. Cross-column-family consistency is not guaranteed even if WAL is enabled. -* options.bottommost_compression, options.compression_opts and options.bottommost_compression_opts are now dynamically changeable. - -## 6.8.0 (2020-02-24) -### Java API Changes -* Major breaking changes to Java comparators, toward standardizing on ByteBuffer for performant, locale-neutral operations on keys (#6252). -* Added overloads of common API methods using direct ByteBuffers for keys and values (#2283). - -### Bug Fixes -* Fix incorrect results while block-based table uses kHashSearch, together with Prev()/SeekForPrev(). -* Fix a bug that prevents opening a DB after two consecutive crash with TransactionDB, where the first crash recovers from a corrupted WAL with kPointInTimeRecovery but the second cannot. -* Fixed issue #6316 that can cause a corruption of the MANIFEST file in the middle when writing to it fails due to no disk space. -* Add DBOptions::skip_checking_sst_file_sizes_on_db_open. It disables potentially expensive checking of all sst file sizes in DB::Open(). -* BlobDB now ignores trivially moved files when updating the mapping between blob files and SSTs. This should mitigate issue #6338 where out of order flush/compaction notifications could trigger an assertion with the earlier code. -* Batched MultiGet() ignores IO errors while reading data blocks, causing it to potentially continue looking for a key and returning stale results. -* `WriteBatchWithIndex::DeleteRange` returns `Status::NotSupported`. Previously it returned success even though reads on the batch did not account for range tombstones. The corresponding language bindings now cannot be used. In C, that includes `rocksdb_writebatch_wi_delete_range`, `rocksdb_writebatch_wi_delete_range_cf`, `rocksdb_writebatch_wi_delete_rangev`, and `rocksdb_writebatch_wi_delete_rangev_cf`. In Java, that includes `WriteBatchWithIndex::deleteRange`. -* Assign new MANIFEST file number when caller tries to create a new MANIFEST by calling LogAndApply(..., new_descriptor_log=true). This bug can cause MANIFEST being overwritten during recovery if options.write_dbid_to_manifest = true and there are WAL file(s). - -### Performance Improvements -* Perfom readahead when reading from option files. Inside DB, options.log_readahead_size will be used as the readahead size. In other cases, a default 512KB is used. - -### Public API Change -* The BlobDB garbage collector now emits the statistics `BLOB_DB_GC_NUM_FILES` (number of blob files obsoleted during GC), `BLOB_DB_GC_NUM_NEW_FILES` (number of new blob files generated during GC), `BLOB_DB_GC_FAILURES` (number of failed GC passes), `BLOB_DB_GC_NUM_KEYS_RELOCATED` (number of blobs relocated during GC), and `BLOB_DB_GC_BYTES_RELOCATED` (total size of blobs relocated during GC). On the other hand, the following statistics, which are not relevant for the new GC implementation, are now deprecated: `BLOB_DB_GC_NUM_KEYS_OVERWRITTEN`, `BLOB_DB_GC_NUM_KEYS_EXPIRED`, `BLOB_DB_GC_BYTES_OVERWRITTEN`, `BLOB_DB_GC_BYTES_EXPIRED`, and `BLOB_DB_GC_MICROS`. -* Disable recycle_log_file_num when an inconsistent recovery modes are requested: kPointInTimeRecovery and kAbsoluteConsistency - -### New Features -* Added the checksum for each SST file generated by Flush or Compaction. Added sst_file_checksum_func to Options such that user can plugin their own SST file checksum function via override the FileChecksumFunc class. If user does not set the sst_file_checksum_func, SST file checksum calculation will not be enabled. The checksum information inlcuding uint32_t checksum value and a checksum function name (string). The checksum information is stored in FileMetadata in version store and also logged to MANIFEST. A new tool is added to LDB such that user can dump out a list of file checksum information from MANIFEST (stored in an unordered_map). -* `db_bench` now supports `value_size_distribution_type`, `value_size_min`, `value_size_max` options for generating random variable sized value. Added `blob_db_compression_type` option for BlobDB to enable blob compression. -* Replace RocksDB namespace "rocksdb" with flag "ROCKSDB_NAMESPACE" which if is not defined, defined as "rocksdb" in header file rocksdb_namespace.h. - -## 6.7.0 (2020-01-21) -### Public API Change -* Added a rocksdb::FileSystem class in include/rocksdb/file_system.h to encapsulate file creation/read/write operations, and an option DBOptions::file_system to allow a user to pass in an instance of rocksdb::FileSystem. If its a non-null value, this will take precendence over DBOptions::env for file operations. A new API rocksdb::FileSystem::Default() returns a platform default object. The DBOptions::env option and Env::Default() API will continue to be used for threading and other OS related functions, and where DBOptions::file_system is not specified, for file operations. For storage developers who are accustomed to rocksdb::Env, the interface in rocksdb::FileSystem is new and will probably undergo some changes as more storage systems are ported to it from rocksdb::Env. As of now, no env other than Posix has been ported to the new interface. -* A new rocksdb::NewSstFileManager() API that allows the caller to pass in separate Env and FileSystem objects. -* Changed Java API for RocksDB.keyMayExist functions to use Holder instead of StringBuilder, so that retrieved values need not decode to Strings. -* A new `OptimisticTransactionDBOptions` Option that allows users to configure occ validation policy. The default policy changes from kValidateSerial to kValidateParallel to reduce mutex contention. - -### Bug Fixes -* Fix a bug that can cause unnecessary bg thread to be scheduled(#6104). -* Fix crash caused by concurrent CF iterations and drops(#6147). -* Fix a race condition for cfd->log_number_ between manifest switch and memtable switch (PR 6249) when number of column families is greater than 1. -* Fix a bug on fractional cascading index when multiple files at the same level contain the same smallest user key, and those user keys are for merge operands. In this case, Get() the exact key may miss some merge operands. -* Delcare kHashSearch index type feature-incompatible with index_block_restart_interval larger than 1. -* Fixed an issue where the thread pools were not resized upon setting `max_background_jobs` dynamically through the `SetDBOptions` interface. -* Fix a bug that can cause write threads to hang when a slowdown/stall happens and there is a mix of writers with WriteOptions::no_slowdown set/unset. -* Fixed an issue where an incorrect "number of input records" value was used to compute the "records dropped" statistics for compactions. -* Fix a regression bug that causes segfault when hash is used, max_open_files != -1 and total order seek is used and switched back. - -### New Features -* It is now possible to enable periodic compactions for the base DB when using BlobDB. -* BlobDB now garbage collects non-TTL blobs when `enable_garbage_collection` is set to `true` in `BlobDBOptions`. Garbage collection is performed during compaction: any valid blobs located in the oldest N files (where N is the number of non-TTL blob files multiplied by the value of `BlobDBOptions::garbage_collection_cutoff`) encountered during compaction get relocated to new blob files, and old blob files are dropped once they are no longer needed. Note: we recommend enabling periodic compactions for the base DB when using this feature to deal with the case when some old blob files are kept alive by SSTs that otherwise do not get picked for compaction. -* `db_bench` now supports the `garbage_collection_cutoff` option for BlobDB. -* Introduce ReadOptions.auto_prefix_mode. When set to true, iterator will return the same result as total order seek, but may choose to use prefix seek internally based on seek key and iterator upper bound. -* MultiGet() can use IO Uring to parallelize read from the same SST file. This featuer is by default disabled. It can be enabled with environment variable ROCKSDB_USE_IO_URING. - -## 6.6.2 (2020-01-13) -### Bug Fixes -* Fixed a bug where non-L0 compaction input files were not considered to compute the `creation_time` of new compaction outputs. - -## 6.6.1 (2020-01-02) -### Bug Fixes -* Fix a bug in WriteBatchWithIndex::MultiGetFromBatchAndDB, which is called by Transaction::MultiGet, that causes due to stale pointer access when the number of keys is > 32 -* Fixed two performance issues related to memtable history trimming. First, a new SuperVersion is now created only if some memtables were actually trimmed. Second, trimming is only scheduled if there is at least one flushed memtable that is kept in memory for the purposes of transaction conflict checking. -* BlobDB no longer updates the SST to blob file mapping upon failed compactions. -* Fix a bug in which a snapshot read through an iterator could be affected by a DeleteRange after the snapshot (#6062). -* Fixed a bug where BlobDB was comparing the `ColumnFamilyHandle` pointers themselves instead of only the column family IDs when checking whether an API call uses the default column family or not. -* Delete superversions in BackgroundCallPurge. -* Fix use-after-free and double-deleting files in BackgroundCallPurge(). - -## 6.6.0 (2019-11-25) -### Bug Fixes -* Fix data corruption caused by output of intra-L0 compaction on ingested file not being placed in correct order in L0. -* Fix a data race between Version::GetColumnFamilyMetaData() and Compaction::MarkFilesBeingCompacted() for access to being_compacted (#6056). The current fix acquires the db mutex during Version::GetColumnFamilyMetaData(), which may cause regression. -* Fix a bug in DBIter that is_blob_ state isn't updated when iterating backward using seek. -* Fix a bug when format_version=3, partitioned filters, and prefix search are used in conjunction. The bug could result into Seek::(prefix) returning NotFound for an existing prefix. -* Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strong results when reseek happens with a different iterator upper bound. -* Fix a bug causing a crash during ingest external file when background compaction cause severe error (file not found). -* Fix a bug when partitioned filters and prefix search are used in conjunction, ::SeekForPrev could return invalid for an existing prefix. ::SeekForPrev might be called by the user, or internally on ::Prev, or within ::Seek if the return value involves Delete or a Merge operand. -* Fix OnFlushCompleted fired before flush result persisted in MANIFEST when there's concurrent flush job. The bug exists since OnFlushCompleted was introduced in rocksdb 3.8. -* Fixed an sst_dump crash on some plain table SST files. -* Fixed a memory leak in some error cases of opening plain table SST files. -* Fix a bug when a crash happens while calling WriteLevel0TableForRecovery for multiple column families, leading to a column family's log number greater than the first corrutped log number when the DB is being opened in PointInTime recovery mode during next recovery attempt (#5856). - -### New Features -* Universal compaction to support options.periodic_compaction_seconds. A full compaction will be triggered if any file is over the threshold. -* `GetLiveFilesMetaData` and `GetColumnFamilyMetaData` now expose the file number of SST files as well as the oldest blob file referenced by each SST. -* A batched MultiGet API (DB::MultiGet()) that supports retrieving keys from multiple column families. -* Full and partitioned filters in the block-based table use an improved Bloom filter implementation, enabled with format_version 5 (or above) because previous releases cannot read this filter. This replacement is faster and more accurate, especially for high bits per key or millions of keys in a single (full) filter. For example, the new Bloom filter has the same false positive rate at 9.55 bits per key as the old one at 10 bits per key, and a lower false positive rate at 16 bits per key than the old one at 100 bits per key. -* Added AVX2 instructions to USE_SSE builds to accelerate the new Bloom filter and XXH3-based hash function on compatible x86_64 platforms (Haswell and later, ~2014). -* Support options.ttl or options.periodic_compaction_seconds with options.max_open_files = -1. File's oldest ancester time and file creation time will be written to manifest. If it is availalbe, this information will be used instead of creation_time and file_creation_time in table properties. -* Setting options.ttl for universal compaction now has the same meaning as setting periodic_compaction_seconds. -* SstFileMetaData also returns file creation time and oldest ancester time. -* The `sst_dump` command line tool `recompress` command now displays how many blocks were compressed and how many were not, in particular how many were not compressed because the compression ratio was not met (12.5% threshold for GoodCompressionRatio), as seen in the `number.block.not_compressed` counter stat since version 6.0.0. -* The block cache usage is now takes into account the overhead of metadata per each entry. This results into more accurate management of memory. A side-effect of this feature is that less items are fit into the block cache of the same size, which would result to higher cache miss rates. This can be remedied by increasing the block cache size or passing kDontChargeCacheMetadata to its constuctor to restore the old behavior. -* When using BlobDB, a mapping is maintained and persisted in the MANIFEST between each SST file and the oldest non-TTL blob file it references. -* `db_bench` now supports and by default issues non-TTL Puts to BlobDB. TTL Puts can be enabled by specifying a non-zero value for the `blob_db_max_ttl_range` command line parameter explicitly. -* `sst_dump` now supports printing BlobDB blob indexes in a human-readable format. This can be enabled by specifying the `decode_blob_index` flag on the command line. -* A number of new information elements are now exposed through the EventListener interface. For flushes, the file numbers of the new SST file and the oldest blob file referenced by the SST are propagated. For compactions, the level, file number, and the oldest blob file referenced are passed to the client for each compaction input and output file. - -### Public API Change -* RocksDB release 4.1 or older will not be able to open DB generated by the new release. 4.2 was released on Feb 23, 2016. -* TTL Compactions in Level compaction style now initiate successive cascading compactions on a key range so that it reaches the bottom level quickly on TTL expiry. `creation_time` table property for compaction output files is now set to the minimum of the creation times of all compaction inputs. -* With FIFO compaction style, options.periodic_compaction_seconds will have the same meaning as options.ttl. Whichever stricter will be used. With the default options.periodic_compaction_seconds value with options.ttl's default of 0, RocksDB will give a default of 30 days. -* Added an API GetCreationTimeOfOldestFile(uint64_t* creation_time) to get the file_creation_time of the oldest SST file in the DB. -* FilterPolicy now exposes additional API to make it possible to choose filter configurations based on context, such as table level and compaction style. See `LevelAndStyleCustomFilterPolicy` in db_bloom_filter_test.cc. While most existing custom implementations of FilterPolicy should continue to work as before, those wrapping the return of NewBloomFilterPolicy will require overriding new function `GetBuilderWithContext()`, because calling `GetFilterBitsBuilder()` on the FilterPolicy returned by NewBloomFilterPolicy is no longer supported. -* An unlikely usage of FilterPolicy is no longer supported. Calling GetFilterBitsBuilder() on the FilterPolicy returned by NewBloomFilterPolicy will now cause an assertion violation in debug builds, because RocksDB has internally migrated to a more elaborate interface that is expected to evolve further. Custom implementations of FilterPolicy should work as before, except those wrapping the return of NewBloomFilterPolicy, which will require a new override of a protected function in FilterPolicy. -* NewBloomFilterPolicy now takes bits_per_key as a double instead of an int. This permits finer control over the memory vs. accuracy trade-off in the new Bloom filter implementation and should not change source code compatibility. -* The option BackupableDBOptions::max_valid_backups_to_open is now only used when opening BackupEngineReadOnly. When opening a read/write BackupEngine, anything but the default value logs a warning and is treated as the default. This change ensures that backup deletion has proper accounting of shared files to ensure they are deleted when no longer referenced by a backup. -* Deprecate `snap_refresh_nanos` option. -* Added DisableManualCompaction/EnableManualCompaction to stop and resume manual compaction. -* Add TryCatchUpWithPrimary() to StackableDB in non-LITE mode. -* Add a new Env::LoadEnv() overloaded function to return a shared_ptr to Env. -* Flush sets file name to "(nil)" for OnTableFileCreationCompleted() if the flush does not produce any L0. This can happen if the file is empty thus delete by RocksDB. - -### Default Option Changes -* Changed the default value of periodic_compaction_seconds to `UINT64_MAX - 1` which allows RocksDB to auto-tune periodic compaction scheduling. When using the default value, periodic compactions are now auto-enabled if a compaction filter is used. A value of `0` will turn off the feature completely. -* Changed the default value of ttl to `UINT64_MAX - 1` which allows RocksDB to auto-tune ttl value. When using the default value, TTL will be auto-enabled to 30 days, when the feature is supported. To revert the old behavior, you can explicitly set it to 0. - -### Performance Improvements -* For 64-bit hashing, RocksDB is standardizing on a slightly modified preview version of XXH3. This function is now used for many non-persisted hashes, along with fastrange64() in place of the modulus operator, and some benchmarks show a slight improvement. -* Level iterator to invlidate the iterator more often in prefix seek and the level is filtered out by prefix bloom. - -## 6.5.2 (2019-11-15) -### Bug Fixes -* Fix a assertion failure in MultiGet() when BlockBasedTableOptions::no_block_cache is true and there is no compressed block cache -* Fix a buffer overrun problem in BlockBasedTable::MultiGet() when compression is enabled and no compressed block cache is configured. -* If a call to BackupEngine::PurgeOldBackups or BackupEngine::DeleteBackup suffered a crash, power failure, or I/O error, files could be left over from old backups that could only be purged with a call to GarbageCollect. Any call to PurgeOldBackups, DeleteBackup, or GarbageCollect should now suffice to purge such files. - -## 6.5.1 (2019-10-16) -### Bug Fixes -* Revert the feature "Merging iterator to avoid child iterator reseek for some cases (#5286)" since it might cause strange results when reseek happens with a different iterator upper bound. -* Fix a bug in BlockBasedTableIterator that might return incorrect results when reseek happens with a different iterator upper bound. -* Fix a bug when partitioned filters and prefix search are used in conjunction, ::SeekForPrev could return invalid for an existing prefix. ::SeekForPrev might be called by the user, or internally on ::Prev, or within ::Seek if the return value involves Delete or a Merge operand. - -## 6.5.0 (2019-09-13) -### Bug Fixes -* Fixed a number of data races in BlobDB. -* Fix a bug where the compaction snapshot refresh feature is not disabled as advertised when `snap_refresh_nanos` is set to 0.. -* Fix bloom filter lookups by the MultiGet batching API when BlockBasedTableOptions::whole_key_filtering is false, by checking that a key is in the perfix_extractor domain and extracting the prefix before looking up. -* Fix a bug in file ingestion caused by incorrect file number allocation when the number of column families involved in the ingestion exceeds 2. - -### New Features -* Introduced DBOptions::max_write_batch_group_size_bytes to configure maximum limit on number of bytes that are written in a single batch of WAL or memtable write. It is followed when the leader write size is larger than 1/8 of this limit. -* VerifyChecksum() by default will issue readahead. Allow ReadOptions to be passed in to those functions to override the readhead size. For checksum verifying before external SST file ingestion, a new option IngestExternalFileOptions.verify_checksums_readahead_size, is added for this readahead setting. -* When user uses options.force_consistency_check in RocksDb, instead of crashing the process, we now pass the error back to the users without killing the process. -* Add an option `memtable_insert_hint_per_batch` to WriteOptions. If it is true, each WriteBatch will maintain its own insert hints for each memtable in concurrent write. See include/rocksdb/options.h for more details. - -### Public API Change -* Added max_write_buffer_size_to_maintain option to better control memory usage of immutable memtables. -* Added a lightweight API GetCurrentWalFile() to get last live WAL filename and size. Meant to be used as a helper for backup/restore tooling in a larger ecosystem such as MySQL with a MyRocks storage engine. -* The MemTable Bloom filter, when enabled, now always uses cache locality. Options::bloom_locality now only affects the PlainTable SST format. - -### Performance Improvements -* Improve the speed of the MemTable Bloom filter, reducing the write overhead of enabling it by 1/3 to 1/2, with similar benefit to read performance. - -## 6.4.0 (2019-07-30) -### Default Option Change -* LRUCacheOptions.high_pri_pool_ratio is set to 0.5 (previously 0.0) by default, which means that by default midpoint insertion is enabled. The same change is made for the default value of high_pri_pool_ratio argument in NewLRUCache(). When block cache is not explicitly created, the small block cache created by BlockBasedTable will still has this option to be 0.0. -* Change BlockBasedTableOptions.cache_index_and_filter_blocks_with_high_priority's default value from false to true. - -### Public API Change -* Filter and compression dictionary blocks are now handled similarly to data blocks with regards to the block cache: instead of storing objects in the cache, only the blocks themselves are cached. In addition, filter and compression dictionary blocks (as well as filter partitions) no longer get evicted from the cache when a table is closed. -* Due to the above refactoring, block cache eviction statistics for filter and compression dictionary blocks are temporarily broken. We plan to reintroduce them in a later phase. -* The semantics of the per-block-type block read counts in the performance context now match those of the generic block_read_count. -* Errors related to the retrieval of the compression dictionary are now propagated to the user. -* db_bench adds a "benchmark" stats_history, which prints out the whole stats history. -* Overload GetAllKeyVersions() to support non-default column family. -* Added new APIs ExportColumnFamily() and CreateColumnFamilyWithImport() to support export and import of a Column Family. https://github.com/facebook/rocksdb/issues/3469 -* ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator. -* Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env. -* Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors. -* LDBCommandRunner::RunCommand() to return the status code as an integer, rather than call exit() using the code. - -### New Features -* Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact. -* Compression dictionary blocks are now prefetched and pinned in the cache (based on the customer's settings) the same way as index and filter blocks. -* Added DBOptions::log_readahead_size which specifies the number of bytes to prefetch when reading the log. This is mostly useful for reading a remotely located log, as it can save the number of round-trips. If 0 (default), then the prefetching is disabled. -* Added new option in SizeApproximationOptions used with DB::GetApproximateSizes. When approximating the files total size that is used to store a keys range, allow approximation with an error margin of up to total_files_size * files_size_error_margin. This allows to take some shortcuts in files size approximation, resulting in better performance, while guaranteeing the resulting error is within a reasonable margin. -* Support loading custom objects in unit tests. In the affected unit tests, RocksDB will create custom Env objects based on environment variable TEST_ENV_URI. Users need to make sure custom object types are properly registered. For example, a static library should expose a `RegisterCustomObjects` function. By linking the unit test binary with the static library, the unit test can execute this function. - -### Performance Improvements -* Reduce iterator key comparison for upper/lower bound check. -* Improve performance of row_cache: make reads with newer snapshots than data in an SST file share the same cache key, except in some transaction cases. -* The compression dictionary is no longer copied to a new object upon retrieval. - -### Bug Fixes -* Fix ingested file and directory not being fsync. -* Return TryAgain status in place of Corruption when new tail is not visible to TransactionLogIterator. -* Fixed a regression where the fill_cache read option also affected index blocks. -* Fixed an issue where using cache_index_and_filter_blocks==false affected partitions of partitioned indexes/filters as well. - -## 6.3.2 (2019-08-15) -### Public API Change -* The semantics of the per-block-type block read counts in the performance context now match those of the generic block_read_count. - -### Bug Fixes -* Fixed a regression where the fill_cache read option also affected index blocks. -* Fixed an issue where using cache_index_and_filter_blocks==false affected partitions of partitioned indexes as well. - -## 6.3.1 (2019-07-24) -### Bug Fixes -* Fix auto rolling bug introduced in 6.3.0, which causes segfault if log file creation fails. - -## 6.3.0 (2019-06-18) -### Public API Change -* Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released. -* Index blocks are now handled similarly to data blocks with regards to the block cache: instead of storing objects in the cache, only the blocks themselves are cached. In addition, index blocks no longer get evicted from the cache when a table is closed, can now use the compressed block cache (if any), and can be shared among multiple table readers. -* Partitions of partitioned indexes no longer affect the read amplification statistics. -* Due to the above refactoring, block cache eviction statistics for indexes are temporarily broken. We plan to reintroduce them in a later phase. -* options.keep_log_file_num will be enforced strictly all the time. File names of all log files will be tracked, which may take significantly amount of memory if options.keep_log_file_num is large and either of options.max_log_file_size or options.log_file_time_to_roll is set. -* Add initial support for Get/Put with user timestamps. Users can specify timestamps via ReadOptions and WriteOptions when calling DB::Get and DB::Put. -* Accessing a partition of a partitioned filter or index through a pinned reference is no longer considered a cache hit. -* Add C bindings for secondary instance, i.e. DBImplSecondary. -* Rate limited deletion of WALs is only enabled if DBOptions::wal_dir is not set, or explicitly set to db_name passed to DB::Open and DBOptions::db_paths is empty, or same as db_paths[0].path - -### New Features -* Add an option `snap_refresh_nanos` (default to 0) to periodically refresh the snapshot list in compaction jobs. Assign to 0 to disable the feature. -* Add an option `unordered_write` which trades snapshot guarantees with higher write throughput. When used with WRITE_PREPARED transactions with two_write_queues=true, it offers higher throughput with however no compromise on guarantees. -* Allow DBImplSecondary to remove memtables with obsolete data after replaying MANIFEST and WAL. -* Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error. -* Add command `list_file_range_deletes` in ldb, which prints out tombstones in SST files. - -### Performance Improvements -* Reduce binary search when iterator reseek into the same data block. -* DBIter::Next() can skip user key checking if previous entry's seqnum is 0. -* Merging iterator to avoid child iterator reseek for some cases -* Log Writer will flush after finishing the whole record, rather than a fragment. -* Lower MultiGet batching API latency by reading data blocks from disk in parallel - -### General Improvements -* Added new status code kColumnFamilyDropped to distinguish between Column Family Dropped and DB Shutdown in progress. -* Improve ColumnFamilyOptions validation when creating a new column family. - -### Bug Fixes -* Fix a bug in WAL replay of secondary instance by skipping write batches with older sequence numbers than the current last sequence number. -* Fix flush's/compaction's merge processing logic which allowed `Put`s covered by range tombstones to reappear. Note `Put`s may exist even if the user only ever called `Merge()` due to an internal conversion during compaction to the bottommost level. -* Fix/improve memtable earliest sequence assignment and WAL replay so that WAL entries of unflushed column families will not be skipped after replaying the MANIFEST and increasing db sequence due to another flushed/compacted column family. -* Fix a bug caused by secondary not skipping the beginning of new MANIFEST. -* On DB open, delete WAL trash files left behind in wal_dir - -## 6.2.0 (2019-04-30) -### New Features -* Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. -* Improve range scan performance by avoiding per-key upper bound check in BlockBasedTableIterator. -* Introduce Periodic Compaction for Level style compaction. Files are re-compacted periodically and put in the same level. -* Block-based table index now contains exact highest key in the file, rather than an upper bound. This may improve Get() and iterator Seek() performance in some situations, especially when direct IO is enabled and block cache is disabled. A setting BlockBasedTableOptions::index_shortening is introduced to control this behavior. Set it to kShortenSeparatorsAndSuccessor to get the old behavior. -* When reading from option file/string/map, customized envs can be filled according to object registry. -* Improve range scan performance when using explicit user readahead by not creating new table readers for every iterator. -* Add index type BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey. It significantly reduces read amplification in some setups, especially for iterator seeks. It's not fully implemented yet: IO errors are not handled right. - -### Public API Change -* Change the behavior of OptimizeForPointLookup(): move away from hash-based block-based-table index, and use whole key memtable filtering. -* Change the behavior of OptimizeForSmallDb(): use a 16MB block cache, put index and filter blocks into it, and cost the memtable size to it. DBOptions.OptimizeForSmallDb() and ColumnFamilyOptions.OptimizeForSmallDb() start to take an optional cache object. -* Added BottommostLevelCompaction::kForceOptimized to avoid double compacting newly compacted files in the bottommost level compaction of manual compaction. Note this option may prohibit the manual compaction to produce a single file in the bottommost level. - -### Bug Fixes -* Adjust WriteBufferManager's dummy entry size to block cache from 1MB to 256KB. -* Fix a race condition between WritePrepared::Get and ::Put with duplicate keys. -* Fix crash when memtable prefix bloom is enabled and read/write a key out of domain of prefix extractor. -* Close a WAL file before another thread deletes it. -* Fix an assertion failure `IsFlushPending() == true` caused by one bg thread releasing the db mutex in ~ColumnFamilyData and another thread clearing `flush_requested_` flag. - -## 6.1.1 (2019-04-09) -### New Features -* When reading from option file/string/map, customized comparators and/or merge operators can be filled according to object registry. - -### Public API Change - -### Bug Fixes -* Fix a bug in 2PC where a sequence of txn prepare, memtable flush, and crash could result in losing the prepared transaction. -* Fix a bug in Encryption Env which could cause encrypted files to be read beyond file boundaries. - -## 6.1.0 (2019-03-27) -### New Features -* Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers. -* Added a feature to perform data-block sampling for compressibility, and report stats to user. -* Add support for trace filtering. -* Add DBOptions.avoid_unnecessary_blocking_io. If true, we avoid file deletion when destroying ColumnFamilyHandle and Iterator. Instead, a job is scheduled to delete the files in background. - -### Public API Change -* Remove bundled fbson library. -* statistics.stats_level_ becomes atomic. It is preferred to use statistics.set_stats_level() and statistics.get_stats_level() to access it. -* Introduce a new IOError subcode, PathNotFound, to indicate trying to open a nonexistent file or directory for read. -* Add initial support for multiple db instances sharing the same data in single-writer, multi-reader mode. -* Removed some "using std::xxx" from public headers. - -### Bug Fixes -* Fix JEMALLOC_CXX_THROW macro missing from older Jemalloc versions, causing build failures on some platforms. -* Fix SstFileReader not able to open file ingested with write_glbal_seqno=true. - -## 6.0.0 (2019-02-19) -### New Features -* Enabled checkpoint on readonly db (DBImplReadOnly). -* Make DB ignore dropped column families while committing results of atomic flush. -* RocksDB may choose to preopen some files even if options.max_open_files != -1. This may make DB open slightly longer. -* For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds. -* For all users of dictionary compression who set `cache_index_and_filter_blocks == true`, we now store dictionary data used for decompression in the block cache for better control over memory usage. For users of ZSTD v1.1.4+ who compile with -DZSTD_STATIC_LINKING_ONLY, this includes a digested dictionary, which is used to increase decompression speed. -* Add support for block checksums verification for external SST files before ingestion. -* Introduce stats history which periodically saves Statistics snapshots and added `GetStatsHistory` API to retrieve these snapshots. -* Add a place holder in manifest which indicate a record from future that can be safely ignored. -* Add support for trace sampling. -* Enable properties block checksum verification for block-based tables. -* For all users of dictionary compression, we now generate a separate dictionary for compressing each bottom-level SST file. Previously we reused a single dictionary for a whole compaction to bottom level. The new approach achieves better compression ratios; however, it uses more memory and CPU for buffering/sampling data blocks and training dictionaries. -* Add whole key bloom filter support in memtable. -* Files written by `SstFileWriter` will now use dictionary compression if it is configured in the file writer's `CompressionOptions`. - -### Public API Change -* Disallow CompactionFilter::IgnoreSnapshots() = false, because it is not very useful and the behavior is confusing. The filter will filter everything if there is no snapshot declared by the time the compaction starts. However, users can define a snapshot after the compaction starts and before it finishes and this new snapshot won't be repeatable, because after the compaction finishes, some keys may be dropped. -* CompactionPri = kMinOverlappingRatio also uses compensated file size, which boosts file with lots of tombstones to be compacted first. -* Transaction::GetForUpdate is extended with a do_validate parameter with default value of true. If false it skips validating the snapshot before doing the read. Similarly ::Merge, ::Put, ::Delete, and ::SingleDelete are extended with assume_tracked with default value of false. If true it indicates that call is assumed to be after a ::GetForUpdate. -* `TableProperties::num_entries` and `TableProperties::num_deletions` now also account for number of range tombstones. -* Remove geodb, spatial_db, document_db, json_document, date_tiered_db, and redis_lists. -* With "ldb ----try_load_options", when wal_dir specified by the option file doesn't exist, ignore it. -* Change time resolution in FileOperationInfo. -* Deleting Blob files also go through SStFileManager. -* Remove CuckooHash memtable. -* The counter stat `number.block.not_compressed` now also counts blocks not compressed due to poor compression ratio. -* Remove ttl option from `CompactionOptionsFIFO`. The option has been deprecated and ttl in `ColumnFamilyOptions` is used instead. -* Support SST file ingestion across multiple column families via DB::IngestExternalFiles. See the function's comment about atomicity. -* Remove Lua compaction filter. - -### Bug Fixes -* Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls. -* Fix a memory leak when files with range tombstones are read in mmap mode and block cache is enabled -* Fix handling of corrupt range tombstone blocks such that corruptions cannot cause deleted keys to reappear -* Lock free MultiGet -* Fix incorrect `NotFound` point lookup result when querying the endpoint of a file that has been extended by a range tombstone. -* Fix with pipelined write, write leaders's callback failure lead to the whole write group fail. - -### Change Default Options -* Change options.compaction_pri's default to kMinOverlappingRatio - -## 5.18.0 (2018-11-30) -### New Features -* Introduced `JemallocNodumpAllocator` memory allocator. When being use, block cache will be excluded from core dump. -* Introduced `PerfContextByLevel` as part of `PerfContext` which allows storing perf context at each level. Also replaced `__thread` with `thread_local` keyword for perf_context. Added per-level perf context for bloom filter and `Get` query. -* With level_compaction_dynamic_level_bytes = true, level multiplier may be adjusted automatically when Level 0 to 1 compaction is lagged behind. -* Introduced DB option `atomic_flush`. If true, RocksDB supports flushing multiple column families and atomically committing the result to MANIFEST. Useful when WAL is disabled. -* Added `num_deletions` and `num_merge_operands` members to `TableProperties`. -* Added "rocksdb.min-obsolete-sst-number-to-keep" DB property that reports the lower bound on SST file numbers that are being kept from deletion, even if the SSTs are obsolete. -* Add xxhash64 checksum support -* Introduced `MemoryAllocator`, which lets the user specify custom memory allocator for block based table. -* Improved `DeleteRange` to prevent read performance degradation. The feature is no longer marked as experimental. - -### Public API Change -* `DBOptions::use_direct_reads` now affects reads issued by `BackupEngine` on the database's SSTs. -* `NO_ITERATORS` is divided into two counters `NO_ITERATOR_CREATED` and `NO_ITERATOR_DELETE`. Both of them are only increasing now, just as other counters. - -### Bug Fixes -* Fix corner case where a write group leader blocked due to write stall blocks other writers in queue with WriteOptions::no_slowdown set. -* Fix in-memory range tombstone truncation to avoid erroneously covering newer keys at a lower level, and include range tombstones in compacted files whose largest key is the range tombstone's start key. -* Properly set the stop key for a truncated manual CompactRange -* Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds. -* Fix the bug that WriteBatchWithIndex's SeekForPrev() doesn't see the entries with the same key. -* Fix the bug where user comparator was sometimes fed with InternalKey instead of the user key. The bug manifests when during GenerateBottommostFiles. -* Fix a bug in WritePrepared txns where if the number of old snapshots goes beyond the snapshot cache size (128 default) the rest will not be checked when evicting a commit entry from the commit cache. -* Fixed Get correctness bug in the presence of range tombstones where merge operands covered by a range tombstone always result in NotFound. -* Start populating `NO_FILE_CLOSES` ticker statistic, which was always zero previously. -* The default value of NewBloomFilterPolicy()'s argument use_block_based_builder is changed to false. Note that this new default may cause large temp memory usage when building very large SST files. - -## 5.17.0 (2018-10-05) -### Public API Change -* `OnTableFileCreated` will now be called for empty files generated during compaction. In that case, `TableFileCreationInfo::file_path` will be "(nil)" and `TableFileCreationInfo::file_size` will be zero. -* Add `FlushOptions::allow_write_stall`, which controls whether Flush calls start working immediately, even if it causes user writes to stall, or will wait until flush can be performed without causing write stall (similar to `CompactRangeOptions::allow_write_stall`). Note that the default value is false, meaning we add delay to Flush calls until stalling can be avoided when possible. This is behavior change compared to previous RocksDB versions, where Flush calls didn't check if they might cause stall or not. -* Application using PessimisticTransactionDB is expected to rollback/commit recovered transactions before starting new ones. This assumption is used to skip concurrency control during recovery. -* Expose column family id to `OnCompactionCompleted`. - -### New Features -* TransactionOptions::skip_concurrency_control allows pessimistic transactions to skip the overhead of concurrency control. Could be used for optimizing certain transactions or during recovery. - -### Bug Fixes -* Avoid creating empty SSTs and subsequently deleting them in certain cases during compaction. -* Sync CURRENT file contents during checkpoint. - -## 5.16.3 (2018-10-01) -### Bug Fixes -* Fix crash caused when `CompactFiles` run with `CompactionOptions::compression == CompressionType::kDisableCompressionOption`. Now that setting causes the compression type to be chosen according to the column family-wide compression options. - -## 5.16.2 (2018-09-21) -### Bug Fixes -* Fix bug in partition filters with format_version=4. - -## 5.16.1 (2018-09-17) -### Bug Fixes -* Remove trace_analyzer_tool from rocksdb_lib target in TARGETS file. -* Fix RocksDB Java build and tests. -* Remove sync point in Block destructor. - -## 5.16.0 (2018-08-21) -### Public API Change -* The merge operands are passed to `MergeOperator::ShouldMerge` in the reversed order relative to how they were merged (passed to FullMerge or FullMergeV2) for performance reasons -* GetAllKeyVersions() to take an extra argument of `max_num_ikeys`. -* Using ZSTD dictionary trainer (i.e., setting `CompressionOptions::zstd_max_train_bytes` to a nonzero value) now requires ZSTD version 1.1.3 or later. - -### New Features -* Changes the format of index blocks by delta encoding the index values, which are the block handles. This saves the encoding of BlockHandle::offset of the non-head index entries in each restart interval. The feature is backward compatible but not forward compatible. It is disabled by default unless format_version 4 or above is used. -* Add a new tool: trace_analyzer. Trace_analyzer analyzes the trace file generated by using trace_replay API. It can convert the binary format trace file to a human readable txt file, output the statistics of the analyzed query types such as access statistics and size statistics, combining the dumped whole key space file to analyze, support query correlation analyzing, and etc. Current supported query types are: Get, Put, Delete, SingleDelete, DeleteRange, Merge, Iterator (Seek, SeekForPrev only). -* Add hash index support to data blocks, which helps reducing the cpu utilization of point-lookup operations. This feature is backward compatible with the data block created without the hash index. It is disabled by default unless BlockBasedTableOptions::data_block_index_type is set to data_block_index_type = kDataBlockBinaryAndHash. - -### Bug Fixes -* Fix a bug in misreporting the estimated partition index size in properties block. - -## 5.15.0 (2018-07-17) -### Public API Change -* Remove managed iterator. ReadOptions.managed is not effective anymore. -* For bottommost_compression, a compatible CompressionOptions is added via `bottommost_compression_opts`. To keep backward compatible, a new boolean `enabled` is added to CompressionOptions. For compression_opts, it will be always used no matter what value of `enabled` is. For bottommost_compression_opts, it will only be used when user set `enabled=true`, otherwise, compression_opts will be used for bottommost_compression as default. -* With LRUCache, when high_pri_pool_ratio > 0, midpoint insertion strategy will be enabled to put low-pri items to the tail of low-pri list (the midpoint) when they first inserted into the cache. This is to make cache entries never get hit age out faster, improving cache efficiency when large background scan presents. -* For users of `Statistics` objects created via `CreateDBStatistics()`, the format of the string returned by its `ToString()` method has changed. -* The "rocksdb.num.entries" table property no longer counts range deletion tombstones as entries. - -### New Features -* Changes the format of index blocks by storing the key in their raw form rather than converting them to InternalKey. This saves 8 bytes per index key. The feature is backward compatible but not forward compatible. It is disabled by default unless format_version 3 or above is used. -* Avoid memcpy when reading mmap files with OpenReadOnly and max_open_files==-1. -* Support dynamically changing `ColumnFamilyOptions::ttl` via `SetOptions()`. -* Add a new table property, "rocksdb.num.range-deletions", which counts the number of range deletion tombstones in the table. -* Improve the performance of iterators doing long range scans by using readahead, when using direct IO. -* pin_top_level_index_and_filter (default true) in BlockBasedTableOptions can be used in combination with cache_index_and_filter_blocks to prefetch and pin the top-level index of partitioned index and filter blocks in cache. It has no impact when cache_index_and_filter_blocks is false. -* Write properties meta-block at the end of block-based table to save read-ahead IO. - -### Bug Fixes -* Fix deadlock with enable_pipelined_write=true and max_successive_merges > 0 -* Check conflict at output level in CompactFiles. -* Fix corruption in non-iterator reads when mmap is used for file reads -* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled. -* Change default value of `bytes_max_delete_chunk` to 0 in NewSstFileManager() as it doesn't work well with checkpoints. -* Fix a bug caused by not copying the block trailer with compressed SST file, direct IO, prefetcher and no compressed block cache. -* Fix write can stuck indefinitely if enable_pipelined_write=true. The issue exists since pipelined write was introduced in 5.5.0. - -## 5.14.0 (2018-05-16) -### Public API Change -* Add a BlockBasedTableOption to align uncompressed data blocks on the smaller of block size or page size boundary, to reduce flash reads by avoiding reads spanning 4K pages. -* The background thread naming convention changed (on supporting platforms) to "rocksdb:", e.g., "rocksdb:low0". -* Add a new ticker stat rocksdb.number.multiget.keys.found to count number of keys successfully read in MultiGet calls -* Touch-up to write-related counters in PerfContext. New counters added: write_scheduling_flushes_compactions_time, write_thread_wait_nanos. Counters whose behavior was fixed or modified: write_memtable_time, write_pre_and_post_process_time, write_delay_time. -* Posix Env's NewRandomRWFile() will fail if the file doesn't exist. -* Now, `DBOptions::use_direct_io_for_flush_and_compaction` only applies to background writes, and `DBOptions::use_direct_reads` applies to both user reads and background reads. This conforms with Linux's `open(2)` manpage, which advises against simultaneously reading a file in buffered and direct modes, due to possibly undefined behavior and degraded performance. -* Iterator::Valid() always returns false if !status().ok(). So, now when doing a Seek() followed by some Next()s, there's no need to check status() after every operation. -* Iterator::Seek()/SeekForPrev()/SeekToFirst()/SeekToLast() always resets status(). -* Introduced `CompressionOptions::kDefaultCompressionLevel`, which is a generic way to tell RocksDB to use the compression library's default level. It is now the default value for `CompressionOptions::level`. Previously the level defaulted to -1, which gave poor compression ratios in ZSTD. - -### New Features -* Introduce TTL for level compaction so that all files older than ttl go through the compaction process to get rid of old data. -* TransactionDBOptions::write_policy can be configured to enable WritePrepared 2PC transactions. Read more about them in the wiki. -* Add DB properties "rocksdb.block-cache-capacity", "rocksdb.block-cache-usage", "rocksdb.block-cache-pinned-usage" to show block cache usage. -* Add `Env::LowerThreadPoolCPUPriority(Priority)` method, which lowers the CPU priority of background (esp. compaction) threads to minimize interference with foreground tasks. -* Fsync parent directory after deleting a file in delete scheduler. -* In level-based compaction, if bottom-pri thread pool was setup via `Env::SetBackgroundThreads()`, compactions to the bottom level will be delegated to that thread pool. -* `prefix_extractor` has been moved from ImmutableCFOptions to MutableCFOptions, meaning it can be dynamically changed without a DB restart. - -### Bug Fixes -* Fsync after writing global seq number to the ingestion file in ExternalSstFileIngestionJob. -* Fix WAL corruption caused by race condition between user write thread and FlushWAL when two_write_queue is not set. -* Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined. -* Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters -* Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true. -* Fix wrong results by ReverseBytewiseComparator::FindShortSuccessor() - -### Java API Changes -* Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances. -* Added SstFileManager to the Java API to allow managing SST files across DB instances. - -## 5.13.0 (2018-03-20) -### Public API Change -* RocksDBOptionsParser::Parse()'s `ignore_unknown_options` argument will only be effective if the option file shows it is generated using a higher version of RocksDB than the current version. -* Remove CompactionEventListener. - -### New Features -* SstFileManager now can cancel compactions if they will result in max space errors. SstFileManager users can also use SetCompactionBufferSize to specify how much space must be leftover during a compaction for auxiliary file functions such as logging and flushing. -* Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables. -* If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions. -* Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree. -* NewSstFileManager to add an argument bytes_max_delete_chunk with default 64MB. With this argument, a file larger than 64MB will be ftruncated multiple times based on this size. - -### Bug Fixes -* Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map. -* Fix WAL corruption caused by race condition between user write thread and backup/checkpoint thread. - -## 5.12.0 (2018-02-14) -### Public API Change -* Iterator::SeekForPrev is now a pure virtual method. This is to prevent user who implement the Iterator interface fail to implement SeekForPrev by mistake. -* Add `include_end` option to make the range end exclusive when `include_end == false` in `DeleteFilesInRange()`. -* Add `CompactRangeOptions::allow_write_stall`, which makes `CompactRange` start working immediately, even if it causes user writes to stall. The default value is false, meaning we add delay to `CompactRange` calls until stalling can be avoided when possible. Note this delay is not present in previous RocksDB versions. -* Creating checkpoint with empty directory now returns `Status::InvalidArgument`; previously, it returned `Status::IOError`. -* Adds a BlockBasedTableOption to turn off index block compression. -* Close() method now returns a status when closing a db. - -### New Features -* Improve the performance of iterators doing long range scans by using readahead. -* Add new function `DeleteFilesInRanges()` to delete files in multiple ranges at once for better performance. -* FreeBSD build support for RocksDB and RocksJava. -* Improved performance of long range scans with readahead. -* Updated to and now continuously tested in Visual Studio 2017. - -### Bug Fixes -* Fix `DisableFileDeletions()` followed by `GetSortedWalFiles()` to not return obsolete WAL files that `PurgeObsoleteFiles()` is going to delete. -* Fix Handle error return from WriteBuffer() during WAL file close and DB close. -* Fix advance reservation of arena block addresses. -* Fix handling of empty string as checkpoint directory. - -## 5.11.0 (2018-01-08) -### Public API Change -* Add `autoTune` and `getBytesPerSecond()` to RocksJava RateLimiter - -### New Features -* Add a new histogram stat called rocksdb.db.flush.micros for memtable flush. -* Add "--use_txn" option to use transactional API in db_stress. -* Disable onboard cache for compaction output in Windows platform. -* Improve the performance of iterators doing long range scans by using readahead. - -### Bug Fixes -* Fix a stack-use-after-scope bug in ForwardIterator. -* Fix builds on platforms including Linux, Windows, and PowerPC. -* Fix buffer overrun in backup engine for DBs with huge number of files. -* Fix a mislabel bug for bottom-pri compaction threads. -* Fix DB::Flush() keep waiting after flush finish under certain condition. - -## 5.10.0 (2017-12-11) -### Public API Change -* When running `make` with environment variable `USE_SSE` set and `PORTABLE` unset, will use all machine features available locally. Previously this combination only compiled SSE-related features. - -### New Features -* Provide lifetime hints when writing files on Linux. This reduces hardware write-amp on storage devices supporting multiple streams. -* Add a DB stat, `NUMBER_ITER_SKIP`, which returns how many internal keys were skipped during iterations (e.g., due to being tombstones or duplicate versions of a key). -* Add PerfContext counters, `key_lock_wait_count` and `key_lock_wait_time`, which measure the number of times transactions wait on key locks and total amount of time waiting. - -### Bug Fixes -* Fix IOError on WAL write doesn't propagate to write group follower -* Make iterator invalid on merge error. -* Fix performance issue in `IngestExternalFile()` affecting databases with large number of SST files. -* Fix possible corruption to LSM structure when `DeleteFilesInRange()` deletes a subset of files spanned by a `DeleteRange()` marker. - -## 5.9.0 (2017-11-01) -### Public API Change -* `BackupableDBOptions::max_valid_backups_to_open == 0` now means no backups will be opened during BackupEngine initialization. Previously this condition disabled limiting backups opened. -* `DBOptions::preserve_deletes` is a new option that allows one to specify that DB should not drop tombstones for regular deletes if they have sequence number larger than what was set by the new API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)`. Disabled by default. -* API call `DB::SetPreserveDeletesSequenceNumber(SequenceNumber seqnum)` was added, users who wish to preserve deletes are expected to periodically call this function to advance the cutoff seqnum (all deletes made before this seqnum can be dropped by DB). It's user responsibility to figure out how to advance the seqnum in the way so the tombstones are kept for the desired period of time, yet are eventually processed in time and don't eat up too much space. -* `ReadOptions::iter_start_seqnum` was added; -if set to something > 0 user will see 2 changes in iterators behavior 1) only keys written with sequence larger than this parameter would be returned and 2) the `Slice` returned by iter->key() now points to the memory that keep User-oriented representation of the internal key, rather than user key. New struct `FullKey` was added to represent internal keys, along with a new helper function `ParseFullKey(const Slice& internal_key, FullKey* result);`. -* Deprecate trash_dir param in NewSstFileManager, right now we will rename deleted files to .trash instead of moving them to trash directory -* Allow setting a custom trash/DB size ratio limit in the SstFileManager, after which files that are to be scheduled for deletion are deleted immediately, regardless of any delete ratelimit. -* Return an error on write if write_options.sync = true and write_options.disableWAL = true to warn user of inconsistent options. Previously we will not write to WAL and not respecting the sync options in this case. - -### New Features -* CRC32C is now using the 3-way pipelined SSE algorithm `crc32c_3way` on supported platforms to improve performance. The system will choose to use this algorithm on supported platforms automatically whenever possible. If PCLMULQDQ is not supported it will fall back to the old Fast_CRC32 algorithm. -* `DBOptions::writable_file_max_buffer_size` can now be changed dynamically. -* `DBOptions::bytes_per_sync`, `DBOptions::compaction_readahead_size`, and `DBOptions::wal_bytes_per_sync` can now be changed dynamically, `DBOptions::wal_bytes_per_sync` will flush all memtables and switch to a new WAL file. -* Support dynamic adjustment of rate limit according to demand for background I/O. It can be enabled by passing `true` to the `auto_tuned` parameter in `NewGenericRateLimiter()`. The value passed as `rate_bytes_per_sec` will still be respected as an upper-bound. -* Support dynamically changing `ColumnFamilyOptions::compaction_options_fifo`. -* Introduce `EventListener::OnStallConditionsChanged()` callback. Users can implement it to be notified when user writes are stalled, stopped, or resumed. -* Add a new db property "rocksdb.estimate-oldest-key-time" to return oldest data timestamp. The property is available only for FIFO compaction with compaction_options_fifo.allow_compaction = false. -* Upon snapshot release, recompact bottommost files containing deleted/overwritten keys that previously could not be dropped due to the snapshot. This alleviates space-amp caused by long-held snapshots. -* Support lower bound on iterators specified via `ReadOptions::iterate_lower_bound`. -* Support for differential snapshots (via iterator emitting the sequence of key-values representing the difference between DB state at two different sequence numbers). Supports preserving and emitting puts and regular deletes, doesn't support SingleDeletes, MergeOperator, Blobs and Range Deletes. - -### Bug Fixes -* Fix a potential data inconsistency issue during point-in-time recovery. `DB:Open()` will abort if column family inconsistency is found during PIT recovery. -* Fix possible metadata corruption in databases using `DeleteRange()`. - -## 5.8.0 (2017-08-30) -### Public API Change -* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints. -* `Slice::compare` and BytewiseComparator `Compare` no longer accept `Slice`s containing nullptr. -* `Transaction::Get` and `Transaction::GetForUpdate` variants with `PinnableSlice` added. - -### New Features -* Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators. -* Replace dynamic_cast<> (except unit test) so people can choose to build with RTTI off. With make, release mode is by default built with -fno-rtti and debug mode is built without it. Users can override it by setting USE_RTTI=0 or 1. -* Universal compactions including the bottom level can be executed in a dedicated thread pool. This alleviates head-of-line blocking in the compaction queue, which cause write stalling, particularly in multi-instance use cases. Users can enable this feature via `Env::SetBackgroundThreads(N, Env::Priority::BOTTOM)`, where `N > 0`. -* Allow merge operator to be called even with a single merge operand during compactions, by appropriately overriding `MergeOperator::AllowSingleOperand`. -* Add `DB::VerifyChecksum()`, which verifies the checksums in all SST files in a running DB. -* Block-based table support for disabling checksums by setting `BlockBasedTableOptions::checksum = kNoChecksum`. - -### Bug Fixes -* Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`. -* Fix incorrect dropping of deletions during intra-L0 compaction. -* Fix transient reappearance of keys covered by range deletions when memtable prefix bloom filter is enabled. -* Fix potentially wrong file smallest key when range deletions separated by snapshot are written together. - -## 5.7.0 (2017-07-13) -### Public API Change -* DB property "rocksdb.sstables" now prints keys in hex form. - -### New Features -* Measure estimated number of reads per file. The information can be accessed through DB::GetColumnFamilyMetaData or "rocksdb.sstables" DB property. -* RateLimiter support for throttling background reads, or throttling the sum of background reads and writes. This can give more predictable I/O usage when compaction reads more data than it writes, e.g., due to lots of deletions. -* [Experimental] FIFO compaction with TTL support. It can be enabled by setting CompactionOptionsFIFO.ttl > 0. -* Introduce `EventListener::OnBackgroundError()` callback. Users can implement it to be notified of errors causing the DB to enter read-only mode, and optionally override them. -* Partitioned Index/Filters exiting the experimental mode. To enable partitioned indexes set index_type to kTwoLevelIndexSearch and to further enable partitioned filters set partition_filters to true. To configure the partition size set metadata_block_size. - - -### Bug Fixes -* Fix discarding empty compaction output files when `DeleteRange()` is used together with subcompactions. - -## 5.6.0 (2017-06-06) -### Public API Change -* Scheduling flushes and compactions in the same thread pool is no longer supported by setting `max_background_flushes=0`. Instead, users can achieve this by configuring their high-pri thread pool to have zero threads. -* Replace `Options::max_background_flushes`, `Options::max_background_compactions`, and `Options::base_background_compactions` all with `Options::max_background_jobs`, which automatically decides how many threads to allocate towards flush/compaction. -* options.delayed_write_rate by default take the value of options.rate_limiter rate. -* Replace global variable `IOStatsContext iostats_context` with `IOStatsContext* get_iostats_context()`; replace global variable `PerfContext perf_context` with `PerfContext* get_perf_context()`. - -### New Features -* Change ticker/histogram statistics implementations to use core-local storage. This improves aggregation speed compared to our previous thread-local approach, particularly for applications with many threads. -* Users can pass a cache object to write buffer manager, so that they can cap memory usage for memtable and block cache using one single limit. -* Flush will be triggered when 7/8 of the limit introduced by write_buffer_manager or db_write_buffer_size is triggered, so that the hard threshold is hard to hit. -* Introduce WriteOptions.low_pri. If it is true, low priority writes will be throttled if the compaction is behind. -* `DB::IngestExternalFile()` now supports ingesting files into a database containing range deletions. - -### Bug Fixes -* Shouldn't ignore return value of fsync() in flush. - -## 5.5.0 (2017-05-17) -### New Features -* FIFO compaction to support Intra L0 compaction too with CompactionOptionsFIFO.allow_compaction=true. -* DB::ResetStats() to reset internal stats. -* Statistics::Reset() to reset user stats. -* ldb add option --try_load_options, which will open DB with its own option file. -* Introduce WriteBatch::PopSavePoint to pop the most recent save point explicitly. -* Support dynamically change `max_open_files` option via SetDBOptions() -* Added DB::CreateColumnFamilie() and DB::DropColumnFamilies() to bulk create/drop column families. -* Add debugging function `GetAllKeyVersions` to see internal versions of a range of keys. -* Support file ingestion with universal compaction style -* Support file ingestion behind with option `allow_ingest_behind` -* New option enable_pipelined_write which may improve write throughput in case writing from multiple threads and WAL enabled. - -### Bug Fixes -* Fix the bug that Direct I/O uses direct reads for non-SST file - -## 5.4.0 (2017-04-11) -### Public API Change -* random_access_max_buffer_size no longer has any effect -* Removed Env::EnableReadAhead(), Env::ShouldForwardRawRequest() -* Support dynamically change `stats_dump_period_sec` option via SetDBOptions(). -* Added ReadOptions::max_skippable_internal_keys to set a threshold to fail a request as incomplete when too many keys are being skipped when using iterators. -* DB::Get in place of std::string accepts PinnableSlice, which avoids the extra memcpy of value to std::string in most of cases. - * PinnableSlice releases the pinned resources that contain the value when it is destructed or when ::Reset() is called on it. - * The old API that accepts std::string, although discouraged, is still supported. -* Replace Options::use_direct_writes with Options::use_direct_io_for_flush_and_compaction. Read Direct IO wiki for details. -* Added CompactionEventListener and EventListener::OnFlushBegin interfaces. - -### New Features -* Memtable flush can be avoided during checkpoint creation if total log file size is smaller than a threshold specified by the user. -* Introduce level-based L0->L0 compactions to reduce file count, so write delays are incurred less often. -* (Experimental) Partitioning filters which creates an index on the partitions. The feature can be enabled by setting partition_filters when using kFullFilter. Currently the feature also requires two-level indexing to be enabled. Number of partitions is the same as the number of partitions for indexes, which is controlled by metadata_block_size. - -## 5.3.0 (2017-03-08) -### Public API Change -* Remove disableDataSync option. -* Remove timeout_hint_us option from WriteOptions. The option has been deprecated and has no effect since 3.13.0. -* Remove option min_partial_merge_operands. Partial merge operands will always be merged in flush or compaction if there are more than one. -* Remove option verify_checksums_in_compaction. Compaction will always verify checksum. - -### Bug Fixes -* Fix the bug that iterator may skip keys - -## 5.2.0 (2017-02-08) -### Public API Change -* NewLRUCache() will determine number of shard bits automatically based on capacity, if the user doesn't pass one. This also impacts the default block cache when the user doesn't explicit provide one. -* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files. -* Options::use_direct_writes and Options::use_direct_reads are now ready to use. -* (Experimental) Two-level indexing that partition the index and creates a 2nd level index on the partitions. The feature can be enabled by setting kTwoLevelIndexSearch as IndexType and configuring index_per_partition. - -### New Features -* Added new overloaded function GetApproximateSizes that allows to specify if memtable stats should be computed only without computing SST files' stats approximations. -* Added new function GetApproximateMemTableStats that approximates both number of records and size of memtables. -* Add Direct I/O mode for SST file I/O - -### Bug Fixes -* RangeSync() should work if ROCKSDB_FALLOCATE_PRESENT is not set -* Fix wrong results in a data race case in Get() -* Some fixes related to 2PC. -* Fix bugs of data corruption in direct I/O - -## 5.1.0 (2017-01-13) -* Support dynamically change `delete_obsolete_files_period_micros` option via SetDBOptions(). -* Added EventListener::OnExternalFileIngested which will be called when IngestExternalFile() add a file successfully. -* BackupEngine::Open and BackupEngineReadOnly::Open now always return error statuses matching those of the backup Env. - -### Bug Fixes -* Fix the bug that if 2PC is enabled, checkpoints may loss some recent transactions. -* When file copying is needed when creating checkpoints or bulk loading files, fsync the file after the file copying. - -## 5.0.0 (2016-11-17) -### Public API Change -* Options::max_bytes_for_level_multiplier is now a double along with all getters and setters. -* Support dynamically change `delayed_write_rate` and `max_total_wal_size` options via SetDBOptions(). -* Introduce DB::DeleteRange for optimized deletion of large ranges of contiguous keys. -* Support dynamically change `delayed_write_rate` option via SetDBOptions(). -* Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default. -* Remove Tickers::SEQUENCE_NUMBER to avoid confusion if statistics object is shared among RocksDB instance. Alternatively DB::GetLatestSequenceNumber() can be used to get the same value. -* Options.level0_stop_writes_trigger default value changes from 24 to 32. -* New compaction filter API: CompactionFilter::FilterV2(). Allows to drop ranges of keys. -* Removed flashcache support. -* DB::AddFile() is deprecated and is replaced with DB::IngestExternalFile(). DB::IngestExternalFile() remove all the restrictions that existed for DB::AddFile. - -### New Features -* Add avoid_flush_during_shutdown option, which speeds up DB shutdown by not flushing unpersisted data (i.e. with disableWAL = true). Unpersisted data will be lost. The options is dynamically changeable via SetDBOptions(). -* Add memtable_insert_with_hint_prefix_extractor option. The option is mean to reduce CPU usage for inserting keys into memtable, if keys can be group by prefix and insert for each prefix are sequential or almost sequential. See include/rocksdb/options.h for more details. -* Add LuaCompactionFilter in utilities. This allows developers to write compaction filters in Lua. To use this feature, LUA_PATH needs to be set to the root directory of Lua. -* No longer populate "LATEST_BACKUP" file in backup directory, which formerly contained the number of the latest backup. The latest backup can be determined by finding the highest numbered file in the "meta/" subdirectory. - -## 4.13.0 (2016-10-18) -### Public API Change -* DB::GetOptions() reflect dynamic changed options (i.e. through DB::SetOptions()) and return copy of options instead of reference. -* Added Statistics::getAndResetTickerCount(). - -### New Features -* Add DB::SetDBOptions() to dynamic change base_background_compactions and max_background_compactions. -* Added Iterator::SeekForPrev(). This new API will seek to the last key that less than or equal to the target key. - -## 4.12.0 (2016-09-12) -### Public API Change -* CancelAllBackgroundWork() flushes all memtables for databases containing writes that have bypassed the WAL (writes issued with WriteOptions::disableWAL=true) before shutting down background threads. -* Merge options source_compaction_factor, max_grandparent_overlap_bytes and expanded_compaction_factor into max_compaction_bytes. -* Remove ImmutableCFOptions. -* Add a compression type ZSTD, which can work with ZSTD 0.8.0 or up. Still keep ZSTDNotFinal for compatibility reasons. - -### New Features -* Introduce NewClockCache, which is based on CLOCK algorithm with better concurrent performance in some cases. It can be used to replace the default LRU-based block cache and table cache. To use it, RocksDB need to be linked with TBB lib. -* Change ticker/histogram statistics implementations to accumulate data in thread-local storage, which improves CPU performance by reducing cache coherency costs. Callers of CreateDBStatistics do not need to change anything to use this feature. -* Block cache mid-point insertion, where index and filter block are inserted into LRU block cache with higher priority. The feature can be enabled by setting BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority to true and high_pri_pool_ratio > 0 when creating NewLRUCache. - -## 4.11.0 (2016-08-01) -### Public API Change -* options.memtable_prefix_bloom_huge_page_tlb_size => memtable_huge_page_size. When it is set, RocksDB will try to allocate memory from huge page for memtable too, rather than just memtable bloom filter. - -### New Features -* A tool to migrate DB after options change. See include/rocksdb/utilities/option_change_migration.h. -* Add ReadOptions.background_purge_on_iterator_cleanup. If true, we avoid file deletion when destroying iterators. - -## 4.10.0 (2016-07-05) -### Public API Change -* options.memtable_prefix_bloom_bits changes to options.memtable_prefix_bloom_bits_ratio and deprecate options.memtable_prefix_bloom_probes -* enum type CompressionType and PerfLevel changes from char to unsigned char. Value of all PerfLevel shift by one. -* Deprecate options.filter_deletes. - -### New Features -* Add avoid_flush_during_recovery option. -* Add a read option background_purge_on_iterator_cleanup to avoid deleting files in foreground when destroying iterators. Instead, a job is scheduled in high priority queue and would be executed in a separate background thread. -* RepairDB support for column families. RepairDB now associates data with non-default column families using information embedded in the SST/WAL files (4.7 or later). For data written by 4.6 or earlier, RepairDB associates it with the default column family. -* Add options.write_buffer_manager which allows users to control total memtable sizes across multiple DB instances. - -## 4.9.0 (2016-06-09) -### Public API changes -* Add bottommost_compression option, This option can be used to set a specific compression algorithm for the bottommost level (Last level containing files in the DB). -* Introduce CompactionJobInfo::compression, This field state the compression algorithm used to generate the output files of the compaction. -* Deprecate BlockBaseTableOptions.hash_index_allow_collision=false -* Deprecate options builder (GetOptions()). - -### New Features -* Introduce NewSimCache() in rocksdb/utilities/sim_cache.h. This function creates a block cache that is able to give simulation results (mainly hit rate) of simulating block behavior with a configurable cache size. - -## 4.8.0 (2016-05-02) -### Public API Change -* Allow preset compression dictionary for improved compression of block-based tables. This is supported for zlib, zstd, and lz4. The compression dictionary's size is configurable via CompressionOptions::max_dict_bytes. -* Delete deprecated classes for creating backups (BackupableDB) and restoring from backups (RestoreBackupableDB). Now, BackupEngine should be used for creating backups, and BackupEngineReadOnly should be used for restorations. For more details, see https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F -* Expose estimate of per-level compression ratio via DB property: "rocksdb.compression-ratio-at-levelN". -* Added EventListener::OnTableFileCreationStarted. EventListener::OnTableFileCreated will be called on failure case. User can check creation status via TableFileCreationInfo::status. - -### New Features -* Add ReadOptions::readahead_size. If non-zero, NewIterator will create a new table reader which performs reads of the given size. - -## 4.7.0 (2016-04-08) -### Public API Change -* rename options compaction_measure_io_stats to report_bg_io_stats and include flush too. -* Change some default options. Now default options will optimize for server-workloads. Also enable slowdown and full stop triggers for pending compaction bytes. These changes may cause sub-optimal performance or significant increase of resource usage. To avoid these risks, users can open existing RocksDB with options extracted from RocksDB option files. See https://github.com/facebook/rocksdb/wiki/RocksDB-Options-File for how to use RocksDB option files. Or you can call Options.OldDefaults() to recover old defaults. DEFAULT_OPTIONS_HISTORY.md will track change history of default options. - -## 4.6.0 (2016-03-10) -### Public API Changes -* Change default of BlockBasedTableOptions.format_version to 2. It means default DB created by 4.6 or up cannot be opened by RocksDB version 3.9 or earlier. -* Added strict_capacity_limit option to NewLRUCache. If the flag is set to true, insert to cache will fail if no enough capacity can be free. Signature of Cache::Insert() is updated accordingly. -* Tickers [NUMBER_DB_NEXT, NUMBER_DB_PREV, NUMBER_DB_NEXT_FOUND, NUMBER_DB_PREV_FOUND, ITER_BYTES_READ] are not updated immediately. The are updated when the Iterator is deleted. -* Add monotonically increasing counter (DB property "rocksdb.current-super-version-number") that increments upon any change to the LSM tree. - -### New Features -* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification. -* Deprecate Iterator::IsKeyPinned() and replace it with Iterator::GetProperty() with prop_name="rocksdb.iterator.is.key.pinned" - -## 4.5.0 (2016-02-05) -### Public API Changes -* Add a new perf context level between kEnableCount and kEnableTime. Level 2 now does not include timers for mutexes. -* Statistics of mutex operation durations will not be measured by default. If you want to have them enabled, you need to set Statistics::stats_level_ to kAll. -* DBOptions::delete_scheduler and NewDeleteScheduler() are removed, please use DBOptions::sst_file_manager and NewSstFileManager() instead - -### New Features -* ldb tool now supports operations to non-default column families. -* Add kPersistedTier to ReadTier. This option allows Get and MultiGet to read only the persited data and skip mem-tables if writes were done with disableWAL = true. -* Add DBOptions::sst_file_manager. Use NewSstFileManager() in include/rocksdb/sst_file_manager.h to create a SstFileManager that can be used to track the total size of SST files and control the SST files deletion rate. - -## 4.4.0 (2016-01-14) -### Public API Changes -* Change names in CompactionPri and add a new one. -* Deprecate options.soft_rate_limit and add options.soft_pending_compaction_bytes_limit. -* If options.max_write_buffer_number > 3, writes will be slowed down when writing to the last write buffer to delay a full stop. -* Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction. -* After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more. -* Increase default options.delayed_write_rate to 2MB/s. -* Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb. - -## 4.3.0 (2015-12-08) -### New Features -* CompactionFilter has new member function called IgnoreSnapshots which allows CompactionFilter to be called even if there are snapshots later than the key. -* RocksDB will now persist options under the same directory as the RocksDB database on successful DB::Open, CreateColumnFamily, DropColumnFamily, and SetOptions. -* Introduce LoadLatestOptions() in rocksdb/utilities/options_util.h. This function can construct the latest DBOptions / ColumnFamilyOptions used by the specified RocksDB intance. -* Introduce CheckOptionsCompatibility() in rocksdb/utilities/options_util.h. This function checks whether the input set of options is able to open the specified DB successfully. - -### Public API Changes -* When options.db_write_buffer_size triggers, only the column family with the largest column family size will be flushed, not all the column families. - -## 4.2.0 (2015-11-09) -### New Features -* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. -* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. -* Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances. - -### Public API Changes -* CompactionFilter::Context includes information of Column Family ID -* The need-compaction hint given by TablePropertiesCollector::NeedCompact() will be persistent and recoverable after DB recovery. This introduces a breaking format change. If you use this experimental feature, including NewCompactOnDeletionCollectorFactory() in the new version, you may not be able to directly downgrade the DB back to version 4.0 or lower. -* TablePropertiesCollectorFactory::CreateTablePropertiesCollector() now takes an option Context, containing the information of column family ID for the file being written. -* Remove DefaultCompactionFilterFactory. - - -## 4.1.0 (2015-10-08) -### New Features -* Added single delete operation as a more efficient way to delete keys that have not been overwritten. -* Added experimental AddFile() to DB interface that allow users to add files created by SstFileWriter into an empty Database, see include/rocksdb/sst_file_writer.h and DB::AddFile() for more info. -* Added support for opening SST files with .ldb suffix which enables opening LevelDB databases. -* CompactionFilter now supports filtering of merge operands and merge results. - -### Public API Changes -* Added SingleDelete() to the DB interface. -* Added AddFile() to DB interface. -* Added SstFileWriter class. -* CompactionFilter has a new method FilterMergeOperand() that RocksDB applies to every merge operand during compaction to decide whether to filter the operand. -* We removed CompactionFilterV2 interfaces from include/rocksdb/compaction_filter.h. The functionality was deprecated already in version 3.13. - -## 4.0.0 (2015-09-09) -### New Features -* Added support for transactions. See include/rocksdb/utilities/transaction.h for more info. -* DB::GetProperty() now accepts "rocksdb.aggregated-table-properties" and "rocksdb.aggregated-table-properties-at-levelN", in which case it returns aggregated table properties of the target column family, or the aggregated table properties of the specified level N if the "at-level" version is used. -* Add compression option kZSTDNotFinalCompression for people to experiment ZSTD although its format is not finalized. -* We removed the need for LATEST_BACKUP file in BackupEngine. We still keep writing it when we create new backups (because of backward compatibility), but we don't read it anymore. - -### Public API Changes -* Removed class Env::RandomRWFile and Env::NewRandomRWFile(). -* Renamed DBOptions.num_subcompactions to DBOptions.max_subcompactions to make the name better match the actual functionality of the option. -* Added Equal() method to the Comparator interface that can optionally be overwritten in cases where equality comparisons can be done more efficiently than three-way comparisons. -* Previous 'experimental' OptimisticTransaction class has been replaced by Transaction class. - -## 3.13.0 (2015-08-06) -### New Features -* RollbackToSavePoint() in WriteBatch/WriteBatchWithIndex -* Add NewCompactOnDeletionCollectorFactory() in utilities/table_properties_collectors, which allows rocksdb to mark a SST file as need-compaction when it observes at least D deletion entries in any N consecutive entries in that SST file. Note that this feature depends on an experimental NeedCompact() API --- the result of this API will not persist after DB restart. -* Add DBOptions::delete_scheduler. Use NewDeleteScheduler() in include/rocksdb/delete_scheduler.h to create a DeleteScheduler that can be shared among multiple RocksDB instances to control the file deletion rate of SST files that exist in the first db_path. - -### Public API Changes -* Deprecated WriteOptions::timeout_hint_us. We no longer support write timeout. If you really need this option, talk to us and we might consider returning it. -* Deprecated purge_redundant_kvs_while_flush option. -* Removed BackupEngine::NewBackupEngine() and NewReadOnlyBackupEngine() that were deprecated in RocksDB 3.8. Please use BackupEngine::Open() instead. -* Deprecated Compaction Filter V2. We are not aware of any existing use-cases. If you use this filter, your compile will break with RocksDB 3.13. Please let us know if you use it and we'll put it back in RocksDB 3.14. -* Env::FileExists now returns a Status instead of a boolean -* Add statistics::getHistogramString() to print detailed distribution of a histogram metric. -* Add DBOptions::skip_stats_update_on_db_open. When it is on, DB::Open() will run faster as it skips the random reads required for loading necessary stats from SST files to optimize compaction. - -## 3.12.0 (2015-07-02) -### New Features -* Added experimental support for optimistic transactions. See include/rocksdb/utilities/optimistic_transaction.h for more info. -* Added a new way to report QPS from db_bench (check out --report_file and --report_interval_seconds) -* Added a cache for individual rows. See DBOptions::row_cache for more info. -* Several new features on EventListener (see include/rocksdb/listener.h): - - OnCompactionCompleted() now returns per-compaction job statistics, defined in include/rocksdb/compaction_job_stats.h. - - Added OnTableFileCreated() and OnTableFileDeleted(). -* Add compaction_options_universal.enable_trivial_move to true, to allow trivial move while performing universal compaction. Trivial move will happen only when all the input files are non overlapping. - -### Public API changes -* EventListener::OnFlushCompleted() now passes FlushJobInfo instead of a list of parameters. -* DB::GetDbIdentity() is now a const function. If this function is overridden in your application, be sure to also make GetDbIdentity() const to avoid compile error. -* Move listeners from ColumnFamilyOptions to DBOptions. -* Add max_write_buffer_number_to_maintain option -* DB::CompactRange()'s parameter reduce_level is changed to change_level, to allow users to move levels to lower levels if allowed. It can be used to migrate a DB from options.level_compaction_dynamic_level_bytes=false to options.level_compaction_dynamic_level_bytes.true. -* Change default value for options.compaction_filter_factory and options.compaction_filter_factory_v2 to nullptr instead of DefaultCompactionFilterFactory and DefaultCompactionFilterFactoryV2. -* If CancelAllBackgroundWork is called without doing a flush after doing loads with WAL disabled, the changes which haven't been flushed before the call to CancelAllBackgroundWork will be lost. -* WBWIIterator::Entry() now returns WriteEntry instead of `const WriteEntry&` -* options.hard_rate_limit is deprecated. -* When options.soft_rate_limit or options.level0_slowdown_writes_trigger is triggered, the way to slow down writes is changed to: write rate to DB is limited to to options.delayed_write_rate. -* DB::GetApproximateSizes() adds a parameter to allow the estimation to include data in mem table, with default to be not to include. It is now only supported in skip list mem table. -* DB::CompactRange() now accept CompactRangeOptions instead of multiple parameters. CompactRangeOptions is defined in include/rocksdb/options.h. -* CompactRange() will now skip bottommost level compaction for level based compaction if there is no compaction filter, bottommost_level_compaction is introduced in CompactRangeOptions to control when it's possible to skip bottommost level compaction. This mean that if you want the compaction to produce a single file you need to set bottommost_level_compaction to BottommostLevelCompaction::kForce. -* Add Cache.GetPinnedUsage() to get the size of memory occupied by entries that are in use by the system. -* DB:Open() will fail if the compression specified in Options is not linked with the binary. If you see this failure, recompile RocksDB with compression libraries present on your system. Also, previously our default compression was snappy. This behavior is now changed. Now, the default compression is snappy only if it's available on the system. If it isn't we change the default to kNoCompression. -* We changed how we account for memory used in block cache. Previously, we only counted the sum of block sizes currently present in block cache. Now, we count the actual memory usage of the blocks. For example, a block of size 4.5KB will use 8KB memory with jemalloc. This might decrease your memory usage and possibly decrease performance. Increase block cache size if you see this happening after an upgrade. -* Add BackupEngineImpl.options_.max_background_operations to specify the maximum number of operations that may be performed in parallel. Add support for parallelized backup and restore. -* Add DB::SyncWAL() that does a WAL sync without blocking writers. - -## 3.11.0 (2015-05-19) -### New Features -* Added a new API Cache::SetCapacity(size_t capacity) to dynamically change the maximum configured capacity of the cache. If the new capacity is less than the existing cache usage, the implementation will try to lower the usage by evicting the necessary number of elements following a strict LRU policy. -* Added an experimental API for handling flashcache devices (blacklists background threads from caching their reads) -- NewFlashcacheAwareEnv -* If universal compaction is used and options.num_levels > 1, compact files are tried to be stored in none-L0 with smaller files based on options.target_file_size_base. The limitation of DB size when using universal compaction is greatly mitigated by using more levels. You can set num_levels = 1 to make universal compaction behave as before. If you set num_levels > 1 and want to roll back to a previous version, you need to compact all files to a big file in level 0 (by setting target_file_size_base to be large and CompactRange(, nullptr, nullptr, true, 0) and reopen the DB with the same version to rewrite the manifest, and then you can open it using previous releases. -* More information about rocksdb background threads are available in Env::GetThreadList(), including the number of bytes read / written by a compaction job, mem-table size and current number of bytes written by a flush job and many more. Check include/rocksdb/thread_status.h for more detail. - -### Public API changes -* TablePropertiesCollector::AddUserKey() is added to replace TablePropertiesCollector::Add(). AddUserKey() exposes key type, sequence number and file size up to now to users. -* DBOptions::bytes_per_sync used to apply to both WAL and table files. As of 3.11 it applies only to table files. If you want to use this option to sync WAL in the background, please use wal_bytes_per_sync - -## 3.10.0 (2015-03-24) -### New Features -* GetThreadStatus() is now able to report detailed thread status, including: - - Thread Operation including flush and compaction. - - The stage of the current thread operation. - - The elapsed time in micros since the current thread operation started. - More information can be found in include/rocksdb/thread_status.h. In addition, when running db_bench with --thread_status_per_interval, db_bench will also report thread status periodically. -* Changed the LRU caching algorithm so that referenced blocks (by iterators) are never evicted. This change made parameter removeScanCountLimit obsolete. Because of that NewLRUCache doesn't take three arguments anymore. table_cache_remove_scan_limit option is also removed -* By default we now optimize the compilation for the compilation platform (using -march=native). If you want to build portable binary, use 'PORTABLE=1' before the make command. -* We now allow level-compaction to place files in different paths by - specifying them in db_paths along with the target_size. - Lower numbered levels will be placed earlier in the db_paths and higher - numbered levels will be placed later in the db_paths vector. -* Potentially big performance improvements if you're using RocksDB with lots of column families (100-1000) -* Added BlockBasedTableOptions.format_version option, which allows user to specify which version of block based table he wants. As a general guideline, newer versions have more features, but might not be readable by older versions of RocksDB. -* Added new block based table format (version 2), which you can enable by setting BlockBasedTableOptions.format_version = 2. This format changes how we encode size information in compressed blocks and should help with memory allocations if you're using Zlib or BZip2 compressions. -* MemEnv (env that stores data in memory) is now available in default library build. You can create it by calling NewMemEnv(). -* Add SliceTransform.SameResultWhenAppended() to help users determine it is safe to apply prefix bloom/hash. -* Block based table now makes use of prefix bloom filter if it is a full fulter. -* Block based table remembers whether a whole key or prefix based bloom filter is supported in SST files. Do a sanity check when reading the file with users' configuration. -* Fixed a bug in ReadOnlyBackupEngine that deleted corrupted backups in some cases, even though the engine was ReadOnly -* options.level_compaction_dynamic_level_bytes, a feature to allow RocksDB to pick dynamic base of bytes for levels. With this feature turned on, we will automatically adjust max bytes for each level. The goal of this feature is to have lower bound on size amplification. For more details, see comments in options.h. -* Added an abstract base class WriteBatchBase for write batches -* Fixed a bug where we start deleting files of a dropped column families even if there are still live references to it - -### Public API changes -* Deprecated skip_log_error_on_recovery and table_cache_remove_scan_count_limit options. -* Logger method logv with log level parameter is now virtual - -### RocksJava -* Added compression per level API. -* MemEnv is now available in RocksJava via RocksMemEnv class. -* lz4 compression is now included in rocksjava static library when running `make rocksdbjavastatic`. -* Overflowing a size_t when setting rocksdb options now throws an IllegalArgumentException, which removes the necessity for a developer to catch these Exceptions explicitly. - -## 3.9.0 (2014-12-08) - -### New Features -* Add rocksdb::GetThreadList(), which in the future will return the current status of all - rocksdb-related threads. We will have more code instruments in the following RocksDB - releases. -* Change convert function in rocksdb/utilities/convenience.h to return Status instead of boolean. - Also add support for nested options in convert function - -### Public API changes -* New API to create a checkpoint added. Given a directory name, creates a new - database which is an image of the existing database. -* New API LinkFile added to Env. If you implement your own Env class, an - implementation of the API LinkFile will have to be provided. -* MemTableRep takes MemTableAllocator instead of Arena - -### Improvements -* RocksDBLite library now becomes smaller and will be compiled with -fno-exceptions flag. - -## 3.8.0 (2014-11-14) - -### Public API changes -* BackupEngine::NewBackupEngine() was deprecated; please use BackupEngine::Open() from now on. -* BackupableDB/RestoreBackupableDB have new GarbageCollect() methods, which will clean up files from corrupt and obsolete backups. -* BackupableDB/RestoreBackupableDB have new GetCorruptedBackups() methods which list corrupt backups. - -### Cleanup -* Bunch of code cleanup, some extra warnings turned on (-Wshadow, -Wshorten-64-to-32, -Wnon-virtual-dtor) - -### New features -* CompactFiles and EventListener, although they are still in experimental state -* Full ColumnFamily support in RocksJava. - -## 3.7.0 (2014-11-06) -### Public API changes -* Introduce SetOptions() API to allow adjusting a subset of options dynamically online -* Introduce 4 new convenient functions for converting Options from string: GetColumnFamilyOptionsFromMap(), GetColumnFamilyOptionsFromString(), GetDBOptionsFromMap(), GetDBOptionsFromString() -* Remove WriteBatchWithIndex.Delete() overloads using SliceParts -* When opening a DB, if options.max_background_compactions is larger than the existing low pri pool of options.env, it will enlarge it. Similarly, options.max_background_flushes is larger than the existing high pri pool of options.env, it will enlarge it. - -## 3.6.0 (2014-10-07) -### Disk format changes -* If you're using RocksDB on ARM platforms and you're using default bloom filter, there is a disk format change you need to be aware of. There are three steps you need to do when you convert to new release: 1. turn off filter policy, 2. compact the whole database, 3. turn on filter policy - -### Behavior changes -* We have refactored our system of stalling writes. Any stall-related statistics' meanings are changed. Instead of per-write stall counts, we now count stalls per-epoch, where epochs are periods between flushes and compactions. You'll find more information in our Tuning Perf Guide once we release RocksDB 3.6. -* When disableDataSync=true, we no longer sync the MANIFEST file. -* Add identity_as_first_hash property to CuckooTable. SST file needs to be rebuilt to be opened by reader properly. - -### Public API changes -* Change target_file_size_base type to uint64_t from int. -* Remove allow_thread_local. This feature was proved to be stable, so we are turning it always-on. - -## 3.5.0 (2014-09-03) -### New Features -* Add include/utilities/write_batch_with_index.h, providing a utility class to query data out of WriteBatch when building it. -* Move BlockBasedTable related options to BlockBasedTableOptions from Options. Change corresponding JNI interface. Options affected include: - no_block_cache, block_cache, block_cache_compressed, block_size, block_size_deviation, block_restart_interval, filter_policy, whole_key_filtering. filter_policy is changed to shared_ptr from a raw pointer. -* Remove deprecated options: disable_seek_compaction and db_stats_log_interval -* OptimizeForPointLookup() takes one parameter for block cache size. It now builds hash index, bloom filter, and block cache. - -### Public API changes -* The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. - -## 3.4.0 (2014-08-18) -### New Features -* Support Multiple DB paths in universal style compactions -* Add feature of storing plain table index and bloom filter in SST file. -* CompactRange() will never output compacted files to level 0. This used to be the case when all the compaction input files were at level 0. -* Added iterate_upper_bound to define the extent upto which the forward iterator will return entries. This will prevent iterating over delete markers and overwritten entries for edge cases where you want to break out the iterator anyways. This may improve performance in case there are a large number of delete markers or overwritten entries. - -### Public API changes -* DBOptions.db_paths now is a vector of a DBPath structure which indicates both of path and target size -* NewPlainTableFactory instead of bunch of parameters now accepts PlainTableOptions, which is defined in include/rocksdb/table.h -* Moved include/utilities/*.h to include/rocksdb/utilities/*.h -* Statistics APIs now take uint32_t as type instead of Tickers. Also make two access functions getTickerCount and histogramData const -* Add DB property rocksdb.estimate-num-keys, estimated number of live keys in DB. -* Add DB::GetIntProperty(), which returns DB properties that are integer as uint64_t. -* The Prefix Extractor used with V2 compaction filters is now passed user key to SliceTransform::Transform instead of unparsed RocksDB key. - -## 3.3.0 (2014-07-10) -### New Features -* Added JSON API prototype. -* HashLinklist reduces performance outlier caused by skewed bucket by switching data in the bucket from linked list to skip list. Add parameter threshold_use_skiplist in NewHashLinkListRepFactory(). -* RocksDB is now able to reclaim storage space more effectively during the compaction process. This is done by compensating the size of each deletion entry by the 2X average value size, which makes compaction to be triggered by deletion entries more easily. -* Add TimeOut API to write. Now WriteOptions have a variable called timeout_hint_us. With timeout_hint_us set to non-zero, any write associated with this timeout_hint_us may be aborted when it runs longer than the specified timeout_hint_us, and it is guaranteed that any write completes earlier than the specified time-out will not be aborted due to the time-out condition. -* Add a rate_limiter option, which controls total throughput of flush and compaction. The throughput is specified in bytes/sec. Flush always has precedence over compaction when available bandwidth is constrained. - -### Public API changes -* Removed NewTotalOrderPlainTableFactory because it is not used and implemented semantically incorrect. - -## 3.2.0 (2014-06-20) - -### Public API changes -* We removed seek compaction as a concept from RocksDB because: -1) It makes more sense for spinning disk workloads, while RocksDB is primarily designed for flash and memory, -2) It added some complexity to the important code-paths, -3) None of our internal customers were really using it. -Because of that, Options::disable_seek_compaction is now obsolete. It is still a parameter in Options, so it does not break the build, but it does not have any effect. We plan to completely remove it at some point, so we ask users to please remove this option from your code base. -* Add two parameters to NewHashLinkListRepFactory() for logging on too many entries in a hash bucket when flushing. -* Added new option BlockBasedTableOptions::hash_index_allow_collision. When enabled, prefix hash index for block-based table will not store prefix and allow hash collision, reducing memory consumption. - -### New Features -* PlainTable now supports a new key encoding: for keys of the same prefix, the prefix is only written once. It can be enabled through encoding_type parameter of NewPlainTableFactory() -* Add AdaptiveTableFactory, which is used to convert from a DB of PlainTable to BlockBasedTabe, or vise versa. It can be created using NewAdaptiveTableFactory() - -### Performance Improvements -* Tailing Iterator re-implemeted with ForwardIterator + Cascading Search Hint , see ~20% throughput improvement. - -## 3.1.0 (2014-05-21) - -### Public API changes -* Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories - -### New Features -* Hash index for block-based table will be materialized and reconstructed more efficiently. Previously hash index is constructed by scanning the whole table during every table open. -* FIFO compaction style - -## 3.0.0 (2014-05-05) - -### Public API changes -* Added _LEVEL to all InfoLogLevel enums -* Deprecated ReadOptions.prefix and ReadOptions.prefix_seek. Seek() defaults to prefix-based seek when Options.prefix_extractor is supplied. More detail is documented in https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes -* MemTableRepFactory::CreateMemTableRep() takes info logger as an extra parameter. - -### New Features -* Column family support -* Added an option to use different checksum functions in BlockBasedTableOptions -* Added ApplyToAllCacheEntries() function to Cache - -## 2.8.0 (2014-04-04) - -* Removed arena.h from public header files. -* By default, checksums are verified on every read from database -* Change default value of several options, including: paranoid_checks=true, max_open_files=5000, level0_slowdown_writes_trigger=20, level0_stop_writes_trigger=24, disable_seek_compaction=true, max_background_flushes=1 and allow_mmap_writes=false -* Added is_manual_compaction to CompactionFilter::Context -* Added "virtual void WaitForJoin()" in class Env. Default operation is no-op. -* Removed BackupEngine::DeleteBackupsNewerThan() function -* Added new option -- verify_checksums_in_compaction -* Changed Options.prefix_extractor from raw pointer to shared_ptr (take ownership) - Changed HashSkipListRepFactory and HashLinkListRepFactory constructor to not take SliceTransform object (use Options.prefix_extractor implicitly) -* Added Env::GetThreadPoolQueueLen(), which returns the waiting queue length of thread pools -* Added a command "checkconsistency" in ldb tool, which checks - if file system state matches DB state (file existence and file sizes) -* Separate options related to block based table to a new struct BlockBasedTableOptions. -* WriteBatch has a new function Count() to return total size in the batch, and Data() now returns a reference instead of a copy -* Add more counters to perf context. -* Supports several more DB properties: compaction-pending, background-errors and cur-size-active-mem-table. - -### New Features -* If we find one truncated record at the end of the MANIFEST or WAL files, - we will ignore it. We assume that writers of these records were interrupted - and that we can safely ignore it. -* A new SST format "PlainTable" is added, which is optimized for memory-only workloads. It can be created through NewPlainTableFactory() or NewTotalOrderPlainTableFactory(). -* A new mem table implementation hash linked list optimizing for the case that there are only few keys for each prefix, which can be created through NewHashLinkListRepFactory(). -* Merge operator supports a new function PartialMergeMulti() to allow users to do partial merges against multiple operands. -* Now compaction filter has a V2 interface. It buffers the kv-pairs sharing the same key prefix, process them in batches, and return the batched results back to DB. The new interface uses a new structure CompactionFilterContext for the same purpose as CompactionFilter::Context in V1. -* Geo-spatial support for locations and radial-search. - -## 2.7.0 (2014-01-28) - -### Public API changes - -* Renamed `StackableDB::GetRawDB()` to `StackableDB::GetBaseDB()`. -* Renamed `WriteBatch::Data()` `const std::string& Data() const`. -* Renamed class `TableStats` to `TableProperties`. -* Deleted class `PrefixHashRepFactory`. Please use `NewHashSkipListRepFactory()` instead. -* Supported multi-threaded `EnableFileDeletions()` and `DisableFileDeletions()`. -* Added `DB::GetOptions()`. -* Added `DB::GetDbIdentity()`. - -### New Features - -* Added [BackupableDB](https://github.com/facebook/rocksdb/wiki/How-to-backup-RocksDB%3F) -* Implemented [TailingIterator](https://github.com/facebook/rocksdb/wiki/Tailing-Iterator), a special type of iterator that - doesn't create a snapshot (can be used to read newly inserted data) - and is optimized for doing sequential reads. -* Added property block for table, which allows (1) a table to store - its metadata and (2) end user to collect and store properties they - are interested in. -* Enabled caching index and filter block in block cache (turned off by default). -* Supported error report when doing manual compaction. -* Supported additional Linux platform flavors and Mac OS. -* Put with `SliceParts` - Variant of `Put()` that gathers output like `writev(2)` -* Bug fixes and code refactor for compatibility with upcoming Column - Family feature. - -### Performance Improvements - -* Huge benchmark performance improvements by multiple efforts. For example, increase in readonly QPS from about 530k in 2.6 release to 1.1 million in 2.7 [1] -* Speeding up a way RocksDB deleted obsolete files - no longer listing the whole directory under a lock -- decrease in p99 -* Use raw pointer instead of shared pointer for statistics: [5b825d](https://github.com/facebook/rocksdb/commit/5b825d6964e26ec3b4bb6faa708ebb1787f1d7bd) -- huge increase in performance -- shared pointers are slow -* Optimized locking for `Get()` -- [1fdb3f](https://github.com/facebook/rocksdb/commit/1fdb3f7dc60e96394e3e5b69a46ede5d67fb976c) -- 1.5x QPS increase for some workloads -* Cache speedup - [e8d40c3](https://github.com/facebook/rocksdb/commit/e8d40c31b3cca0c3e1ae9abe9b9003b1288026a9) -* Implemented autovector, which allocates first N elements on stack. Most of vectors in RocksDB are small. Also, we never want to allocate heap objects while holding a mutex. -- [c01676e4](https://github.com/facebook/rocksdb/commit/c01676e46d3be08c3c140361ef1f5884f47d3b3c) -* Lots of efforts to move malloc, memcpy and IO outside of locks diff --git a/LICENSE.Apache b/LICENSE similarity index 100% rename from LICENSE.Apache rename to LICENSE diff --git a/README.md b/README.md index 8fcc4abc2..6ad228706 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,7 @@ -## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage +## ForSt: A Persistent Key-Value Store designed for Streaming processing -[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb) - -RocksDB is developed and maintained by Facebook Database Engineering Team. -It is built on earlier work on [LevelDB](https://github.com/google/leveldb) by Sanjay Ghemawat (sanjay@google.com) -and Jeff Dean (jeff@google.com) +ForSt is developed and maintained by Flink community and hosted by ververica. +It is built on top of [RocksDB](https://github.com/facebook/rocksdb) by facebook. This code is a library that forms the core building block for a fast key-value server, especially suited for storing data on flash drives. @@ -14,16 +11,14 @@ and Space-Amplification-Factor (SAF). It has multi-threaded compactions, making it especially suitable for storing multiple terabytes of data in a single database. -Start with example usage here: https://github.com/facebook/rocksdb/tree/main/examples - See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation. The public interface is in `include/`. Callers should not include or rely on the details of any other header files in this package. Those internal APIs may be changed without warning. -Questions and discussions are welcome on the [RocksDB Developers Public](https://www.facebook.com/groups/rocksdb.dev/) Facebook group and [email list](https://groups.google.com/g/rocksdb) on Google Groups. +Questions and discussions are welcome on the [Discussion](https://github.com/ververica/ForSt/discussions). ## License -RocksDB is dual-licensed under both the GPLv2 (found in the COPYING file in the root directory) and Apache 2.0 License (found in the LICENSE.Apache file in the root directory). You may select, at your option, one of the above-listed licenses. +ForSt is licensed under Apache 2.0 License. From d73053f76a73a30113c0d96683ab2a4ae862c2bc Mon Sep 17 00:00:00 2001 From: Yanfei Lei Date: Wed, 13 Mar 2024 13:06:29 +0800 Subject: [PATCH 370/386] [build] Add pr-jobs check (#10) (cherry picked from commit 0d7fea8c7e47d2bfd137c4b096b8e55f7cd3a63d) --- .github/actions/build-folly/action.yml | 7 + .../action.yml | 10 + .../install-gflags-on-macos/action.yml | 7 + .github/actions/install-gflags/action.yml | 7 + .../actions/install-jdk8-on-macos/action.yml | 9 + .github/actions/post-steps/action.yml | 38 ++++ .github/actions/pre-steps-macos/action.yml | 5 + .github/actions/pre-steps/action.yml | 18 ++ .github/actions/setup-folly/action.yml | 7 + .github/actions/setup-upstream/action.yml | 20 ++ .github/workflows/pr-jobs.yml | 173 ++++++++++++++++++ Makefile | 6 + java/Makefile | 2 - 13 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 .github/actions/build-folly/action.yml create mode 100644 .github/actions/increase-max-open-files-on-macos/action.yml create mode 100644 .github/actions/install-gflags-on-macos/action.yml create mode 100644 .github/actions/install-gflags/action.yml create mode 100644 .github/actions/install-jdk8-on-macos/action.yml create mode 100644 .github/actions/post-steps/action.yml create mode 100644 .github/actions/pre-steps-macos/action.yml create mode 100644 .github/actions/pre-steps/action.yml create mode 100644 .github/actions/setup-folly/action.yml create mode 100644 .github/actions/setup-upstream/action.yml create mode 100644 .github/workflows/pr-jobs.yml diff --git a/.github/actions/build-folly/action.yml b/.github/actions/build-folly/action.yml new file mode 100644 index 000000000..cd6cdfc06 --- /dev/null +++ b/.github/actions/build-folly/action.yml @@ -0,0 +1,7 @@ +name: build-folly +runs: + using: composite + steps: + - name: Build folly and dependencies + run: make build_folly + shell: bash \ No newline at end of file diff --git a/.github/actions/increase-max-open-files-on-macos/action.yml b/.github/actions/increase-max-open-files-on-macos/action.yml new file mode 100644 index 000000000..869cd14ed --- /dev/null +++ b/.github/actions/increase-max-open-files-on-macos/action.yml @@ -0,0 +1,10 @@ +name: increase-max-open-files-on-macos +runs: + using: composite + steps: + - name: Increase max open files + run: |- + sudo sysctl -w kern.maxfiles=1048576 + sudo sysctl -w kern.maxfilesperproc=1048576 + sudo launchctl limit maxfiles 1048576 + shell: bash \ No newline at end of file diff --git a/.github/actions/install-gflags-on-macos/action.yml b/.github/actions/install-gflags-on-macos/action.yml new file mode 100644 index 000000000..3de06f614 --- /dev/null +++ b/.github/actions/install-gflags-on-macos/action.yml @@ -0,0 +1,7 @@ +name: install-gflags-on-macos +runs: + using: composite + steps: + - name: Install gflags on macos + run: HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags + shell: bash \ No newline at end of file diff --git a/.github/actions/install-gflags/action.yml b/.github/actions/install-gflags/action.yml new file mode 100644 index 000000000..d47619722 --- /dev/null +++ b/.github/actions/install-gflags/action.yml @@ -0,0 +1,7 @@ +name: install-gflags +runs: + using: composite + steps: + - name: Install gflags + run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev + shell: bash \ No newline at end of file diff --git a/.github/actions/install-jdk8-on-macos/action.yml b/.github/actions/install-jdk8-on-macos/action.yml new file mode 100644 index 000000000..80c56da09 --- /dev/null +++ b/.github/actions/install-jdk8-on-macos/action.yml @@ -0,0 +1,9 @@ +name: install-jdk8-on-macos +runs: + using: composite + steps: + - name: Install JDK 8 on macos + run: |- + HOMEBREW_NO_AUTO_UPDATE=1 brew tap bell-sw/liberica + HOMEBREW_NO_AUTO_UPDATE=1 brew install --cask liberica-jdk8 + shell: bash \ No newline at end of file diff --git a/.github/actions/post-steps/action.yml b/.github/actions/post-steps/action.yml new file mode 100644 index 000000000..5bb7502ec --- /dev/null +++ b/.github/actions/post-steps/action.yml @@ -0,0 +1,38 @@ +name: post-steps +description: Steps that are taken after a RocksDB job +inputs: + artifact-prefix: + description: Prefix to append to the name of artifacts that are uploaded + required: true + default: "${{ github.job }}" +runs: + using: composite + steps: + - name: Upload Test Results artifact + uses: actions/upload-artifact@v4.0.0 + with: + name: "${{ inputs.artifact-prefix }}-test-results" + path: "${{ runner.temp }}/test-results/**" + - name: Upload DB LOG file artifact + uses: actions/upload-artifact@v4.0.0 + with: + name: "${{ inputs.artifact-prefix }}-db-log-file" + path: LOG + - name: Copy Test Logs (on Failure) + if: ${{ failure() }} + run: | + mkdir -p ${{ runner.temp }}/failure-test-logs + cp -r t/* ${{ runner.temp }}/failure-test-logs + shell: bash + - name: Upload Test Logs (on Failure) artifact + uses: actions/upload-artifact@v4.0.0 + with: + name: "${{ inputs.artifact-prefix }}-failure-test-logs" + path: ${{ runner.temp }}/failure-test-logs/** + if-no-files-found: ignore + - name: Upload Core Dumps artifact + uses: actions/upload-artifact@v4.0.0 + with: + name: "${{ inputs.artifact-prefix }}-core-dumps" + path: "core.*" + if-no-files-found: ignore \ No newline at end of file diff --git a/.github/actions/pre-steps-macos/action.yml b/.github/actions/pre-steps-macos/action.yml new file mode 100644 index 000000000..86c83b3b4 --- /dev/null +++ b/.github/actions/pre-steps-macos/action.yml @@ -0,0 +1,5 @@ +name: pre-steps-macos +runs: + using: composite + steps: + - uses: "./.github/actions/pre-steps" \ No newline at end of file diff --git a/.github/actions/pre-steps/action.yml b/.github/actions/pre-steps/action.yml new file mode 100644 index 000000000..d40254610 --- /dev/null +++ b/.github/actions/pre-steps/action.yml @@ -0,0 +1,18 @@ +name: pre-steps +runs: + using: composite + steps: + - name: Setup Environment Variables + run: |- + echo "GTEST_THROW_ON_FAILURE=0" >> "$GITHUB_ENV" + echo "GTEST_OUTPUT=\"xml:${{ runner.temp }}/test-results/\"" >> "$GITHUB_ENV" + echo "SKIP_FORMAT_BUCK_CHECKS=1" >> "$GITHUB_ENV" + echo "GTEST_COLOR=1" >> "$GITHUB_ENV" + echo "CTEST_OUTPUT_ON_FAILURE=1" >> "$GITHUB_ENV" + echo "CTEST_TEST_TIMEOUT=300" >> "$GITHUB_ENV" + echo "ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> "$GITHUB_ENV" + echo "BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> "$GITHUB_ENV" + echo "SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> "$GITHUB_ENV" + echo "LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> "$GITHUB_ENV" + echo "ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> "$GITHUB_ENV" + shell: bash \ No newline at end of file diff --git a/.github/actions/setup-folly/action.yml b/.github/actions/setup-folly/action.yml new file mode 100644 index 000000000..cf2b2900b --- /dev/null +++ b/.github/actions/setup-folly/action.yml @@ -0,0 +1,7 @@ +name: setup-folly +runs: + using: composite + steps: + - name: Checkout folly sources + run: make checkout_folly + shell: bash \ No newline at end of file diff --git a/.github/actions/setup-upstream/action.yml b/.github/actions/setup-upstream/action.yml new file mode 100644 index 000000000..6cbe22771 --- /dev/null +++ b/.github/actions/setup-upstream/action.yml @@ -0,0 +1,20 @@ +name: build-folly +runs: + using: composite + steps: + - name: Fix repo ownership + # Needed in some cases, as safe.directory setting doesn't take effect + # under env -i + run: chown `whoami` . || true + shell: bash + - name: Set upstream + run: git remote add upstream https://github.com/facebook/rocksdb.git + shell: bash + - name: Fetch upstream + run: git fetch upstream + shell: bash + - name: Git status + # NOTE: some old branch builds under check_format_compatible.sh invoke + # git under env -i + run: git status && git remote -v && env -i git branch + shell: bash \ No newline at end of file diff --git a/.github/workflows/pr-jobs.yml b/.github/workflows/pr-jobs.yml new file mode 100644 index 000000000..385bd2dde --- /dev/null +++ b/.github/workflows/pr-jobs.yml @@ -0,0 +1,173 @@ +name: ververica/forst/pr-jobs +on: [push, pull_request] +jobs: + # ======================== Fast Initial Checks ====================== # + check-format-and-targets: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4.1.0 + with: + fetch-depth: 0 # Need full checkout to determine merge base + fetch-tags: true + - uses: "./.github/actions/setup-upstream" + - name: Setup Python + uses: actions/setup-python@v5 + - name: Install Dependencies + run: python -m pip install --upgrade pip + - name: Install argparse + run: pip install argparse + - name: Download clang-format-diff.py + run: wget https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py + - name: Check format + run: VERBOSE_CHECK=1 make check-format + - name: Simple source code checks + run: make check-sources + # ========================= Linux With Tests ======================== # + build-linux: + runs-on: ubuntu-latest + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4.1.0 + - uses: "./.github/actions/pre-steps" + - uses: "./.github/actions/install-gflags" + - run: echo "JAVA_HOME=${JAVA_HOME}" + - run: DISABLE_WARNING_AS_ERROR=1 make V=1 J=8 -j8 check + - uses: "./.github/actions/post-steps" + # ======================== Linux No Test Runs ======================= # + build-linux-release: + runs-on: ubuntu-latest + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4.1.0 + - uses: "./.github/actions/install-gflags" + - run: echo "JAVA_HOME=${JAVA_HOME}" + - run: echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $GITHUB_PATH + - run: DISABLE_WARNING_AS_ERROR=1 make V=1 -j32 LIB_MODE=shared release + - run: ls librocksdb.so + - run: "./db_stress --version" + - run: DISABLE_WARNING_AS_ERROR=1 make clean + - run: DISABLE_WARNING_AS_ERROR=1 make V=1 -j32 release + - run: ls librocksdb.a + - run: "./db_stress --version" + - run: DISABLE_WARNING_AS_ERROR=1 make clean + - run: sudo apt-get remove -y libgflags-dev + - run: DISABLE_WARNING_AS_ERROR=1 make V=1 -j32 LIB_MODE=shared release + - run: ls librocksdb.so + - run: if ./db_stress --version; then false; else true; fi + - run: DISABLE_WARNING_AS_ERROR=1 make clean + - run: DISABLE_WARNING_AS_ERROR=1 make V=1 -j32 release + - run: ls librocksdb.a + - run: if ./db_stress --version; then false; else true; fi + - uses: "./.github/actions/post-steps" + # ============================ Java Jobs ============================ # + build-linux-java: + runs-on: ubuntu-latest + container: evolvedbinary/rocksjava:centos6_x64-be + steps: + # The docker image is intentionally based on an OS that has an older GLIBC version. + # That GLIBC is incompatibile with GitHub's actions/checkout. Thus we implement a manual checkout step. + - name: Checkout + env: + GH_TOKEN: ${{ github.token }} + run: | + chown `whoami` . || true + git clone --no-checkout https://oath2:$GH_TOKEN@github.com/${{ github.repository }}.git . + git -c protocol.version=2 fetch --update-head-ok --no-tags --prune --no-recurse-submodules --depth=1 origin +${{ github.sha }}:${{ github.ref }} + git checkout --progress --force ${{ github.ref }} + git log -1 --format='%H' + - uses: "./.github/actions/pre-steps" + - name: Set Java Environment + run: |- + echo "JAVA_HOME=${JAVA_HOME}" + echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $GITHUB_PATH + which java && java -version + which javac && javac -version + - name: Test RocksDBJava + run: scl enable devtoolset-7 'DISABLE_WARNING_AS_ERROR=1 make V=1 J=8 -j8 jtest' + # NOTE: post-steps skipped because of compatibility issues with docker image + build-linux-java-static: + runs-on: ubuntu-latest + container: evolvedbinary/rocksjava:centos6_x64-be + steps: + # The docker image is intentionally based on an OS that has an older GLIBC version. + # That GLIBC is incompatibile with GitHub's actions/checkout. Thus we implement a manual checkout step. + - name: Checkout + env: + GH_TOKEN: ${{ github.token }} + run: | + chown `whoami` . || true + git clone --no-checkout https://oath2:$GH_TOKEN@github.com/${{ github.repository }}.git . + git -c protocol.version=2 fetch --update-head-ok --no-tags --prune --no-recurse-submodules --depth=1 origin +${{ github.sha }}:${{ github.ref }} + git checkout --progress --force ${{ github.ref }} + git log -1 --format='%H' + - uses: "./.github/actions/pre-steps" + - name: Set Java Environment + run: |- + echo "JAVA_HOME=${JAVA_HOME}" + which java && java -version + which javac && javac -version + - name: Build RocksDBJava Static Library + run: scl enable devtoolset-7 'DISABLE_WARNING_AS_ERROR=1 make V=1 J=8 -j8 rocksdbjavastatic' + # NOTE: post-steps skipped because of compatibility issues with docker image + + # ========================= MacOS build only ======================== # + build-macos: + runs-on: macos-13 + timeout-minutes: 120 + env: + ROCKSDB_DISABLE_JEMALLOC: 1 + steps: + - uses: actions/checkout@v4.1.0 + - uses: maxim-lobanov/setup-xcode@v1.6.0 + with: + xcode-version: 14.3.1 + - uses: "./.github/actions/increase-max-open-files-on-macos" + - uses: "./.github/actions/install-gflags-on-macos" + - uses: "./.github/actions/pre-steps-macos" + - name: Build + run: ulimit -S -n `ulimit -H -n` && DISABLE_WARNING_AS_ERROR=1 make V=1 J=16 -j16 all + - uses: "./.github/actions/post-steps" + # ========================= MacOS with java ======================== # + build-macos-java: + runs-on: macos-13 + env: + JAVA_HOME: "/Library/Java/JavaVirtualMachines/liberica-jdk-8.jdk/Contents/Home" + ROCKSDB_DISABLE_JEMALLOC: 1 + steps: + - uses: actions/checkout@v4.1.0 + - uses: maxim-lobanov/setup-xcode@v1.6.0 + with: + xcode-version: 14.3.1 + - uses: "./.github/actions/increase-max-open-files-on-macos" + - uses: "./.github/actions/install-gflags-on-macos" + - uses: "./.github/actions/install-jdk8-on-macos" + - uses: "./.github/actions/pre-steps-macos" + - name: Set Java Environment + run: |- + echo "JAVA_HOME=${JAVA_HOME}" + which java && java -version + which javac && javac -version + - name: Test RocksDBJava + run: DISABLE_WARNING_AS_ERROR=1 make V=1 J=16 -j16 jtest + - uses: "./.github/actions/post-steps" + build-macos-java-static: + runs-on: macos-13 + env: + JAVA_HOME: "/Library/Java/JavaVirtualMachines/liberica-jdk-8.jdk/Contents/Home" + steps: + - uses: actions/checkout@v4.1.0 + - uses: maxim-lobanov/setup-xcode@v1.6.0 + with: + xcode-version: 14.3.1 + - uses: "./.github/actions/increase-max-open-files-on-macos" + - uses: "./.github/actions/install-gflags-on-macos" + - uses: "./.github/actions/install-jdk8-on-macos" + - uses: "./.github/actions/pre-steps-macos" + - name: Set Java Environment + run: |- + echo "JAVA_HOME=${JAVA_HOME}" + which java && java -version + which javac && javac -version + - name: Build RocksDBJava x86 and ARM Static Libraries + run: DISABLE_WARNING_AS_ERROR=1 make V=1 J=16 -j16 rocksdbjavastaticosx + - uses: "./.github/actions/post-steps" \ No newline at end of file diff --git a/Makefile b/Makefile index bb39c2350..e35a9feb7 100644 --- a/Makefile +++ b/Makefile @@ -12,9 +12,15 @@ BASH_EXISTS := $(shell which bash) SHELL := $(shell which bash) include common.mk +MY_JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/linux +ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") + MY_JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin +endif + CLEAN_FILES = # deliberately empty, so we can append below. CFLAGS += ${EXTRA_CFLAGS} CXXFLAGS += ${EXTRA_CXXFLAGS} +CXXFLAGS += ${MY_JAVA_INCLUDE} LDFLAGS += $(EXTRA_LDFLAGS) MACHINE ?= $(shell uname -m) ARFLAGS = ${EXTRA_ARFLAGS} rs diff --git a/java/Makefile b/java/Makefile index 1a3dd3643..e80c8130b 100644 --- a/java/Makefile +++ b/java/Makefile @@ -349,8 +349,6 @@ javalib: java java_test javadocs java: java-version $(AM_V_GEN)mkdir -p $(MAIN_CLASSES) $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -h $(NATIVE_INCLUDE) -d $(MAIN_CLASSES) $(SOURCES) - $(AM_V_at)@cp ../HISTORY.md ./HISTORY-CPP.md - $(AM_V_at)@rm -f ./HISTORY-CPP.md sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) From e1d10833253155f2a7ec69100d74a4df84f267e1 Mon Sep 17 00:00:00 2001 From: Jinzhong Li Date: Mon, 18 Mar 2024 15:01:42 +0800 Subject: [PATCH 371/386] [env] Fix jvm_util unused parameter error (#14) (cherry picked from commit d4e8ef1b41d3042bca39d6e0da96d483f2f5a91e) --- env/flink/jvm_util.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/env/flink/jvm_util.cc b/env/flink/jvm_util.cc index 8e2c6f07a..ecd6f9677 100644 --- a/env/flink/jvm_util.cc +++ b/env/flink/jvm_util.cc @@ -18,11 +18,14 @@ #include "env/flink/jvm_util.h" +#define UNUSED(x) (void)(x) + namespace ROCKSDB_NAMESPACE { std::atomic jvm_ = std::atomic(nullptr); JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) { + UNUSED(reserved); JNIEnv* env = nullptr; if (vm->GetEnv((void**)&env, JNI_VERSION_1_8) != JNI_OK) { return -1; @@ -33,6 +36,8 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) { } JNIEXPORT void JNICALL JNI_OnUnload(JavaVM* vm, void* reserved) { + UNUSED(vm); + UNUSED(reserved); jvm_.store(nullptr); } From f845fe48eee62bef6b3abb76127e15c87466cfd2 Mon Sep 17 00:00:00 2001 From: Hangxiang Yu Date: Fri, 15 Mar 2024 09:38:58 +0800 Subject: [PATCH 372/386] [env] Implement all methods of env_flink (#13) (cherry picked from commit 7c0c8da59ae8d27b7db68752ac84ec3004efba87) --- env/flink/env_flink.cc | 843 +++++++++++++++++++++++++++++++++++++++- env/flink/env_flink.h | 37 +- env/flink/jni_helper.cc | 325 ++++++++++++++-- env/flink/jni_helper.h | 103 ++++- 4 files changed, 1243 insertions(+), 65 deletions(-) diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc index 87183f131..8987084d0 100644 --- a/env/flink/env_flink.cc +++ b/env/flink/env_flink.cc @@ -3,8 +3,843 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -// TODO: -// 1. Register flink env to ObjectLibrary -// 2. Implement all methods of env_flink.h +#include "env_flink.h" -#include "env_flink.h" \ No newline at end of file +#include "jvm_util.h" + +// +// This file defines a Flink environment for ForSt. It uses the JNI call +// to access Flink FileSystem. All files created by one instance of ForSt +// will reside on the actual Flink FileSystem. +// +namespace ROCKSDB_NAMESPACE { + +// Appends to an existing file in Flink FileSystem. +class FlinkWritableFile : public FSWritableFile { + private: + const std::string file_path_; + const jobject file_system_instance_; + jobject fs_data_output_stream_instance_; + JavaClassCache* class_cache_; + + public: + FlinkWritableFile(jobject file_system_instance, + JavaClassCache* java_class_cache, + const std::string& file_path, const FileOptions& options) + : FSWritableFile(options), + file_path_(file_path), + file_system_instance_(file_system_instance), + class_cache_(java_class_cache) {} + + ~FlinkWritableFile() override { + JNIEnv* jniEnv = getJNIEnv(); + if (fs_data_output_stream_instance_ != nullptr) { + jniEnv->DeleteGlobalRef(fs_data_output_stream_instance_); + } + } + + IOStatus Init() { + JNIEnv* jniEnv = getJNIEnv(); + // Construct Path Instance + jobject pathInstance; + IOStatus status = + class_cache_->ConstructPathInstance(file_path_, &pathInstance); + if (!status.ok()) { + return status; + } + + JavaClassCache::JavaMethodContext fileSystemCreateMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_SYSTEM_CREATE); + jobject fsDataOutputStream = jniEnv->CallObjectMethod( + file_system_instance_, fileSystemCreateMethod.javaMethod, pathInstance); + jniEnv->DeleteLocalRef(pathInstance); + if (fsDataOutputStream == nullptr) { + return CheckThenError( + std::string( + "CallObjectMethod Exception when Init FlinkWritableFile, ") + .append(fileSystemCreateMethod.ToString()) + .append(", args: Path(") + .append(file_path_) + .append(")")); + } + fs_data_output_stream_instance_ = jniEnv->NewGlobalRef(fsDataOutputStream); + jniEnv->DeleteLocalRef(fsDataOutputStream); + return IOStatus::OK(); + } + + IOStatus Append(const Slice& data, const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + JNIEnv* jniEnv = getJNIEnv(); + if (data.size() > static_cast(LONG_MAX)) { + return IOStatus::IOError( + std::string("Append too big data to file, data: ") + .append(data.ToString())); + } + jobject directByteBuffer = jniEnv->NewDirectByteBuffer( + (void*)data.data(), static_cast(data.size())); + + JavaClassCache::JavaMethodContext writeMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FS_OUTPUT_STREAM_WRITE); + jniEnv->CallVoidMethod(fs_data_output_stream_instance_, + writeMethod.javaMethod, directByteBuffer); + jniEnv->DeleteLocalRef(directByteBuffer); + + std::string filePath = file_path_; + return CurrentStatus([filePath]() { + return std::string("Exception when Appending file, path: ") + .append(filePath); + }); + } + + IOStatus Append(const Slice& data, const IOOptions& options, + const DataVerificationInfo& /* verification_info */, + IODebugContext* dbg) override { + return Append(data, options, dbg); + } + + IOStatus Flush(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + JavaClassCache::JavaMethodContext flushMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FS_OUTPUT_STREAM_FLUSH); + JNIEnv* jniEnv = getJNIEnv(); + jniEnv->CallVoidMethod(fs_data_output_stream_instance_, + flushMethod.javaMethod); + + std::string filePath = file_path_; + return CurrentStatus([filePath]() { + return std::string("Exception when Flush file, path: ").append(filePath); + }); + } + + IOStatus Sync(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + JavaClassCache::JavaMethodContext flushMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FS_OUTPUT_STREAM_SYNC); + JNIEnv* jniEnv = getJNIEnv(); + jniEnv->CallVoidMethod(fs_data_output_stream_instance_, + flushMethod.javaMethod); + + std::string filePath = file_path_; + return CurrentStatus([filePath]() { + return std::string("Exception when Sync file, path: ").append(filePath); + }); + } + + IOStatus Close(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + JavaClassCache::JavaMethodContext closeMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FS_OUTPUT_STREAM_CLOSE); + JNIEnv* jniEnv = getJNIEnv(); + jniEnv->CallVoidMethod(fs_data_output_stream_instance_, + closeMethod.javaMethod); + + std::string filePath = file_path_; + return CurrentStatus([filePath]() { + return std::string("Exception when Close file, path: ").append(filePath); + }); + } +}; + +// Used for reading a file from Flink FileSystem. It implements both +// sequential-read access methods and random read access methods. +class FlinkReadableFile : virtual public FSSequentialFile, + virtual public FSRandomAccessFile { + private: + const std::string file_path_; + const jobject file_system_instance_; + jobject fs_data_input_stream_instance_; + JavaClassCache* class_cache_; + + public: + FlinkReadableFile(jobject file_system_instance, + JavaClassCache* java_class_cache, + const std::string& file_path) + : file_path_(file_path), + file_system_instance_(file_system_instance), + class_cache_(java_class_cache) {} + + ~FlinkReadableFile() override { + JNIEnv* jniEnv = getJNIEnv(); + if (fs_data_input_stream_instance_ != nullptr) { + jniEnv->DeleteGlobalRef(fs_data_input_stream_instance_); + } + } + + IOStatus Init() { + JNIEnv* jniEnv = getJNIEnv(); + // Construct Path Instance + jobject pathInstance; + IOStatus status = + class_cache_->ConstructPathInstance(file_path_, &pathInstance); + if (!status.ok()) { + return status; + } + + JavaClassCache::JavaMethodContext openMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_SYSTEM_OPEN); + jobject fsDataInputStream = jniEnv->CallObjectMethod( + file_system_instance_, openMethod.javaMethod, pathInstance); + jniEnv->DeleteLocalRef(pathInstance); + if (fsDataInputStream == nullptr) { + return CheckThenError( + std::string( + "CallObjectMethod Exception when Init FlinkReadableFile, ") + .append(openMethod.ToString()) + .append(", args: Path(") + .append(file_path_) + .append(")")); + } + + fs_data_input_stream_instance_ = jniEnv->NewGlobalRef(fsDataInputStream); + jniEnv->DeleteLocalRef(fsDataInputStream); + return IOStatus::OK(); + } + + // sequential access, read data at current offset in file + IOStatus Read(size_t n, const IOOptions& /*options*/, Slice* result, + char* scratch, IODebugContext* /*dbg*/) override { + JNIEnv* jniEnv = getJNIEnv(); + if (n > static_cast(LONG_MAX)) { + return IOStatus::IOError( + std::string("Read too big data to file, data size: ") + .append(std::to_string(n))); + } + jobject directByteBuffer = + jniEnv->NewDirectByteBuffer((void*)scratch, static_cast(n)); + + JavaClassCache::JavaMethodContext readMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FS_INPUT_STREAM_SEQ_READ); + jint totalBytesRead = + jniEnv->CallIntMethod(fs_data_input_stream_instance_, + readMethod.javaMethod, directByteBuffer); + + jniEnv->DeleteLocalRef(directByteBuffer); + + std::string filePath = file_path_; + IOStatus status = CurrentStatus([filePath]() { + return std::string("Exception when Reading file, path: ") + .append(filePath); + }); + if (!status.ok()) { + return status; + } + + *result = Slice(scratch, totalBytesRead == -1 ? 0 : totalBytesRead); + return IOStatus::OK(); + } + + // random access, read data from specified offset in file + IOStatus Read(uint64_t offset, size_t n, const IOOptions& /*options*/, + Slice* result, char* scratch, + IODebugContext* /*dbg*/) const override { + JNIEnv* jniEnv = getJNIEnv(); + if (n > static_cast(LONG_MAX)) { + return IOStatus::IOError( + std::string("Read too big data to file, data size: ") + .append(std::to_string(n))); + } + jobject directByteBuffer = + jniEnv->NewDirectByteBuffer((void*)scratch, static_cast(n)); + + JavaClassCache::JavaMethodContext readMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FS_INPUT_STREAM_RANDOM_READ); + jint totalBytesRead = + jniEnv->CallIntMethod(fs_data_input_stream_instance_, + readMethod.javaMethod, offset, directByteBuffer); + + jniEnv->DeleteLocalRef(directByteBuffer); + + std::string filePath = file_path_; + IOStatus status = CurrentStatus([filePath]() { + return std::string("Exception when Reading file, path: ") + .append(filePath); + }); + if (!status.ok()) { + return status; + } + + *result = Slice(scratch, totalBytesRead == -1 ? 0 : totalBytesRead); + return IOStatus::OK(); + } + + IOStatus Skip(uint64_t n) override { + JNIEnv* jniEnv = getJNIEnv(); + JavaClassCache::JavaMethodContext skipMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FS_INPUT_STREAM_SKIP); + jniEnv->CallVoidMethod(fs_data_input_stream_instance_, + skipMethod.javaMethod, n); + + std::string filePath = file_path_; + return CurrentStatus([filePath]() { + return std::string("Exception when skipping file, path: ") + .append(filePath); + }); + } +}; + +// Simple implementation of FSDirectory, Shouldn't influence the normal usage +class FlinkDirectory : public FSDirectory { + public: + explicit FlinkDirectory() = default; + ~FlinkDirectory() override = default; + + IOStatus Fsync(const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + // TODO: Syncing directory is managed by specific flink file system + // currently, consider to implement in the future + return IOStatus::OK(); + } +}; + +FlinkFileSystem::FlinkFileSystem(const std::shared_ptr& base_fs, + const std::string& base_path) + : FileSystemWrapper(base_fs), base_path_(base_path) {} + +FlinkFileSystem::~FlinkFileSystem() { + if (file_system_instance_ != nullptr) { + JNIEnv* env = getJNIEnv(); + env->DeleteGlobalRef(file_system_instance_); + } + delete class_cache_; +} + +Status FlinkFileSystem::Init() { + JNIEnv* jniEnv = getJNIEnv(); + std::unique_ptr javaClassCache; + Status status = JavaClassCache::Create(jniEnv, &javaClassCache); + if (!status.ok()) { + return status; + } + class_cache_ = javaClassCache.release(); + + // Delegate Flink to load real FileSystem (e.g. + // S3FileSystem/OSSFileSystem/...) + JavaClassCache::JavaClassContext fileSystemClass = + class_cache_->GetJClass(JavaClassCache::JC_FLINK_FILE_SYSTEM); + JavaClassCache::JavaMethodContext fileSystemGetMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_SYSTEM_GET); + + JavaClassCache::JavaClassContext uriClass = + class_cache_->GetJClass(JavaClassCache::JC_URI); + JavaClassCache::JavaMethodContext uriConstructor = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_URI_CONSTRUCTOR); + + // Construct URI + jstring uriStringArg = jniEnv->NewStringUTF(base_path_.c_str()); + jobject uriInstance = jniEnv->NewObject( + uriClass.javaClass, uriConstructor.javaMethod, uriStringArg); + jniEnv->DeleteLocalRef(uriStringArg); + if (uriInstance == nullptr) { + return CheckThenError( + std::string("NewObject Exception when Init FlinkFileSystem, ") + .append(uriClass.ToString()) + .append(uriConstructor.ToString()) + .append(", args: ") + .append(base_path_)); + } + + // Construct FileSystem + jobject fileSystemInstance = jniEnv->CallStaticObjectMethod( + fileSystemClass.javaClass, fileSystemGetMethod.javaMethod, uriInstance); + jniEnv->DeleteLocalRef(uriInstance); + if (fileSystemInstance == nullptr) { + return CheckThenError( + std::string( + "CallStaticObjectMethod Exception when Init FlinkFileSystem, ") + .append(fileSystemClass.ToString()) + .append(fileSystemGetMethod.ToString()) + .append(", args: URI(") + .append(base_path_) + .append(")")); + } + file_system_instance_ = jniEnv->NewGlobalRef(fileSystemInstance); + jniEnv->DeleteLocalRef(fileSystemInstance); + return Status::OK(); +} + +std::string FlinkFileSystem::ConstructPath(const std::string& fname) { + return fname.at(0) == '/' ? base_path_ + fname : base_path_ + "/" + fname; +} + +// open a file for sequential reading +IOStatus FlinkFileSystem::NewSequentialFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* dbg) { + result->reset(); + IOStatus status = FileExists(fname, IOOptions(), dbg); + if (!status.ok()) { + return status; + } + + auto f = new FlinkReadableFile(file_system_instance_, class_cache_, + ConstructPath(fname)); + IOStatus valid = f->Init(); + if (!valid.ok()) { + delete f; + return valid; + } + result->reset(f); + return IOStatus::OK(); +} + +// open a file for random reading +IOStatus FlinkFileSystem::NewRandomAccessFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* dbg) { + result->reset(); + IOStatus status = FileExists(fname, IOOptions(), dbg); + if (!status.ok()) { + return status; + } + + auto f = new FlinkReadableFile(file_system_instance_, class_cache_, + ConstructPath(fname)); + IOStatus valid = f->Init(); + if (!valid.ok()) { + delete f; + return valid; + } + result->reset(f); + return IOStatus::OK(); +} + +// create a new file for writing +IOStatus FlinkFileSystem::NewWritableFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* /*dbg*/) { + result->reset(); + auto f = new FlinkWritableFile(file_system_instance_, class_cache_, + ConstructPath(fname), options); + IOStatus valid = f->Init(); + if (!valid.ok()) { + delete f; + return valid; + } + result->reset(f); + return IOStatus::OK(); +} + +IOStatus FlinkFileSystem::NewDirectory(const std::string& name, + const IOOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) { + result->reset(); + IOStatus s = FileExists(name, options, dbg); + if (s.ok()) { + result->reset(new FlinkDirectory()); + } + return s; +} + +IOStatus FlinkFileSystem::FileExists(const std::string& file_name, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) { + std::string filePath = ConstructPath(file_name); + // Construct Path Instance + jobject pathInstance; + IOStatus status = + class_cache_->ConstructPathInstance(filePath, &pathInstance); + if (!status.ok()) { + return status; + } + + // Call exist method + JNIEnv* jniEnv = getJNIEnv(); + JavaClassCache::JavaMethodContext existsMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_SYSTEM_EXISTS); + jboolean exists = jniEnv->CallBooleanMethod( + file_system_instance_, existsMethod.javaMethod, pathInstance); + jniEnv->DeleteLocalRef(pathInstance); + + status = CurrentStatus([filePath]() { + return std::string("Exception when FileExists, path: ").append(filePath); + }); + if (!status.ok()) { + return status; + } + + return exists == JNI_TRUE ? IOStatus::OK() : IOStatus::NotFound(); +} + +// TODO: Not Efficient! Consider adding usable methods in FLink FileSystem +IOStatus FlinkFileSystem::GetChildren(const std::string& file_name, + const IOOptions& options, + std::vector* result, + IODebugContext* dbg) { + IOStatus fileExistsStatus = FileExists(file_name, options, dbg); + if (!fileExistsStatus.ok()) { + return fileExistsStatus.IsNotFound() + ? IOStatus::PathNotFound( + std::string("Could not find path when GetChildren, path: ") + .append(ConstructPath(file_name))) + : fileExistsStatus; + } + + std::string filePath = ConstructPath(file_name); + // Construct Path Instance + jobject pathInstance; + IOStatus status = + class_cache_->ConstructPathInstance(filePath, &pathInstance); + if (!status.ok()) { + return status; + } + + JNIEnv* jniEnv = getJNIEnv(); + JavaClassCache::JavaMethodContext listStatusMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FILE_SYSTEM_LIST_STATUS); + + auto fileStatusArray = (jobjectArray)jniEnv->CallObjectMethod( + file_system_instance_, listStatusMethod.javaMethod, pathInstance); + jniEnv->DeleteLocalRef(pathInstance); + if (fileStatusArray == nullptr) { + return CheckThenError( + std::string("Exception when CallObjectMethod in GetChildren, ") + .append(listStatusMethod.ToString()) + .append(", args: Path(") + .append(filePath) + .append(")")); + } + + jsize fileStatusArrayLen = jniEnv->GetArrayLength(fileStatusArray); + for (jsize i = 0; i < fileStatusArrayLen; i++) { + jobject fileStatusObj = jniEnv->GetObjectArrayElement(fileStatusArray, i); + if (fileStatusObj == nullptr) { + jniEnv->DeleteLocalRef(fileStatusArray); + return CheckThenError( + "Exception when GetObjectArrayElement in GetChildren"); + } + + JavaClassCache::JavaMethodContext getPathMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_STATUS_GET_PATH); + jobject subPath = + jniEnv->CallObjectMethod(fileStatusObj, getPathMethod.javaMethod); + jniEnv->DeleteLocalRef(fileStatusObj); + if (subPath == nullptr) { + jniEnv->DeleteLocalRef(fileStatusArray); + return CheckThenError( + std::string("Exception when CallObjectMethod in GetChildren, ") + .append(getPathMethod.ToString())); + } + + JavaClassCache::JavaMethodContext pathToStringMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_PATH_TO_STRING); + auto subPathStr = (jstring)jniEnv->CallObjectMethod( + subPath, pathToStringMethod.javaMethod); + jniEnv->DeleteLocalRef(subPath); + const char* str = jniEnv->GetStringUTFChars(subPathStr, nullptr); + result->emplace_back(str); + jniEnv->ReleaseStringUTFChars(subPathStr, str); + jniEnv->DeleteLocalRef(subPathStr); + } + + jniEnv->DeleteLocalRef(fileStatusArray); + return IOStatus::OK(); +} + +IOStatus FlinkFileSystem::DeleteDir(const std::string& file_name, + const IOOptions& options, + IODebugContext* dbg) { + return Delete(file_name, options, dbg, true); +}; + +IOStatus FlinkFileSystem::DeleteFile(const std::string& file_name, + const IOOptions& options, + IODebugContext* dbg) { + return Delete(file_name, options, dbg, false); +} + +IOStatus FlinkFileSystem::Delete(const std::string& file_name, + const IOOptions& options, IODebugContext* dbg, + bool recursive) { + IOStatus fileExistsStatus = FileExists(file_name, options, dbg); + if (!fileExistsStatus.ok()) { + return fileExistsStatus.IsNotFound() + ? IOStatus::PathNotFound( + std::string("Could not find path when Delete, path: ") + .append(ConstructPath(file_name))) + : fileExistsStatus; + } + + std::string filePath = ConstructPath(file_name); + // Construct Path Instance + jobject pathInstance; + IOStatus status = + class_cache_->ConstructPathInstance(filePath, &pathInstance); + if (!status.ok()) { + return status; + } + + // Call delete method + JNIEnv* jniEnv = getJNIEnv(); + JavaClassCache::JavaMethodContext deleteMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_SYSTEM_DELETE); + jboolean deleted = jniEnv->CallBooleanMethod( + file_system_instance_, deleteMethod.javaMethod, pathInstance, recursive); + jniEnv->DeleteLocalRef(pathInstance); + + status = CurrentStatus([filePath]() { + return std::string("Exception when Delete, path: ").append(filePath); + }); + if (!status.ok()) { + return status; + } + + return deleted + ? IOStatus::OK() + : IOStatus::IOError(std::string("Exception when Delete, path: ") + .append(filePath)); +} + +IOStatus FlinkFileSystem::CreateDir(const std::string& file_name, + const IOOptions& options, + IODebugContext* dbg) { + IOStatus s = FileExists(file_name, options, dbg); + if (!s.ok()) { + return CreateDirIfMissing(file_name, options, dbg); + } + return IOStatus::IOError(std::string("Exception when CreateDir because Dir (") + .append(file_name) + .append(") exists")); +} + +IOStatus FlinkFileSystem::CreateDirIfMissing(const std::string& file_name, + const IOOptions& options, + IODebugContext* dbg) { + JNIEnv* jniEnv = getJNIEnv(); + + std::string filePath = ConstructPath(file_name); + // Construct Path Instance + jobject pathInstance; + IOStatus status = + class_cache_->ConstructPathInstance(filePath, &pathInstance); + if (!status.ok()) { + return status; + } + + // Call mkdirs method + JavaClassCache::JavaMethodContext mkdirMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_SYSTEM_MKDIR); + jboolean created = jniEnv->CallBooleanMethod( + file_system_instance_, mkdirMethod.javaMethod, pathInstance); + jniEnv->DeleteLocalRef(pathInstance); + status = CurrentStatus([filePath]() { + return std::string("Exception when CreateDirIfMissing, path: ") + .append(filePath); + }); + if (!status.ok()) { + return status; + } + + return created ? IOStatus::OK() + : IOStatus::IOError( + std::string("Exception when CreateDirIfMissing, path: ") + .append(filePath)); +} + +IOStatus FlinkFileSystem::GetFileSize(const std::string& file_name, + const IOOptions& options, uint64_t* size, + IODebugContext* dbg) { + JNIEnv* jniEnv = getJNIEnv(); + jobject fileStatus; + IOStatus status = GetFileStatus(file_name, options, dbg, &fileStatus); + if (!status.ok()) { + return status; + } + + JavaClassCache::JavaMethodContext getLenMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_STATUS_GET_LEN); + jlong fileSize = jniEnv->CallLongMethod(fileStatus, getLenMethod.javaMethod); + jniEnv->DeleteLocalRef(fileStatus); + + status = CurrentStatus([file_name]() { + return std::string("Exception when GetFileSize, file name: ") + .append(file_name); + }); + if (!status.ok()) { + return status; + } + + *size = fileSize; + return IOStatus::OK(); +} + +// The life cycle of fileStatus is maintained by caller. +IOStatus FlinkFileSystem::GetFileStatus(const std::string& file_name, + const IOOptions& options, + IODebugContext* dbg, + jobject* fileStatus) { + IOStatus status = FileExists(file_name, options, dbg); + if (!status.ok()) { + return status.IsNotFound() + ? IOStatus::PathNotFound( + std::string( + "Could not find path when GetFileStatus, path: ") + .append(ConstructPath(file_name))) + : status; + } + + std::string filePath = ConstructPath(file_name); + // Construct Path Instance + jobject pathInstance; + status = class_cache_->ConstructPathInstance(filePath, &pathInstance); + if (!status.ok()) { + return status; + } + + // Call getFileStatus method + JNIEnv* jniEnv = getJNIEnv(); + JavaClassCache::JavaMethodContext getFileStatusMethod = + class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FILE_SYSTEM_GET_FILE_STATUS); + *fileStatus = jniEnv->CallObjectMethod( + file_system_instance_, getFileStatusMethod.javaMethod, pathInstance); + jniEnv->DeleteLocalRef(pathInstance); + + return CurrentStatus([filePath]() { + return std::string("Exception when GetFileStatus, path: ").append(filePath); + }); +} + +IOStatus FlinkFileSystem::GetFileModificationTime(const std::string& file_name, + const IOOptions& options, + uint64_t* time, + IODebugContext* dbg) { + JNIEnv* jniEnv = getJNIEnv(); + jobject fileStatus; + IOStatus status = GetFileStatus(file_name, options, dbg, &fileStatus); + if (!status.ok()) { + return status; + } + + JavaClassCache::JavaMethodContext getModificationTimeMethod = + class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FILE_STATUS_GET_MODIFICATION_TIME); + jlong fileModificationTime = + jniEnv->CallLongMethod(fileStatus, getModificationTimeMethod.javaMethod); + jniEnv->DeleteLocalRef(fileStatus); + + status = CurrentStatus([file_name]() { + return std::string("Exception when GetFileModificationTime, file name: ") + .append(file_name); + }); + if (!status.ok()) { + return status; + } + + *time = fileModificationTime; + return IOStatus::OK(); +} + +IOStatus FlinkFileSystem::IsDirectory(const std::string& path, + const IOOptions& options, bool* is_dir, + IODebugContext* dbg) { + JNIEnv* jniEnv = getJNIEnv(); + jobject fileStatus; + IOStatus status = GetFileStatus(path, options, dbg, &fileStatus); + if (!status.ok()) { + return status; + } + + JavaClassCache::JavaMethodContext isDirMethod = + class_cache_->GetJMethod(JavaClassCache::JM_FLINK_FILE_STATUS_IS_DIR); + jboolean isDir = + jniEnv->CallBooleanMethod(fileStatus, isDirMethod.javaMethod); + jniEnv->DeleteLocalRef(fileStatus); + + status = CurrentStatus([path]() { + return std::string("Exception when IsDirectory, file name: ").append(path); + }); + if (!status.ok()) { + return status; + } + + *is_dir = isDir; + return IOStatus::OK(); +} + +IOStatus FlinkFileSystem::RenameFile(const std::string& src, + const std::string& target, + const IOOptions& options, + IODebugContext* dbg) { + IOStatus status = FileExists(src, options, dbg); + if (!status.ok()) { + return status.IsNotFound() + ? IOStatus::PathNotFound( + std::string( + "Could not find src path when RenameFile, path: ") + .append(ConstructPath(src))) + : status; + } + + JNIEnv* jniEnv = getJNIEnv(); + + std::string srcFilePath = ConstructPath(src); + // Construct src Path Instance + jobject srcPathInstance; + status = class_cache_->ConstructPathInstance(srcFilePath, &srcPathInstance); + if (!status.ok()) { + return status; + } + + std::string targetFilePath = ConstructPath(target); + // Construct target Path Instance + jobject targetPathInstance; + status = + class_cache_->ConstructPathInstance(targetFilePath, &targetPathInstance); + if (!status.ok()) { + jniEnv->DeleteLocalRef(srcPathInstance); + return status; + } + + JavaClassCache::JavaMethodContext renameMethod = class_cache_->GetJMethod( + JavaClassCache::JM_FLINK_FILE_SYSTEM_RENAME_FILE); + jboolean renamed = + jniEnv->CallBooleanMethod(file_system_instance_, renameMethod.javaMethod, + srcPathInstance, targetPathInstance); + jniEnv->DeleteLocalRef(srcPathInstance); + jniEnv->DeleteLocalRef(targetPathInstance); + + status = CurrentStatus([srcFilePath, targetFilePath]() { + return std::string("Exception when RenameFile, src: ") + .append(srcFilePath) + .append(", target: ") + .append(targetFilePath); + }); + if (!status.ok()) { + return status; + } + + return renamed + ? IOStatus::OK() + : IOStatus::IOError(std::string("Exception when RenameFile, src: ") + .append(srcFilePath) + .append(", target: ") + .append(targetFilePath)); +} + +IOStatus FlinkFileSystem::LockFile(const std::string& /*file_name*/, + const IOOptions& /*options*/, + FileLock** lock, IODebugContext* /*dbg*/) { + // There isn't a very good way to atomically check and create a file, + // Since it will not influence the usage of Flink, just leave it OK() now; + *lock = nullptr; + return IOStatus::OK(); +} + +IOStatus FlinkFileSystem::UnlockFile(FileLock* /*lock*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) { + // There isn't a very good way to atomically check and create a file, + // Since it will not influence the usage of Flink, just leave it OK() now; + return IOStatus::OK(); +} + +Status FlinkFileSystem::Create(const std::shared_ptr& base, + const std::string& uri, + std::unique_ptr* result) { + auto* fileSystem = new FlinkFileSystem(base, uri); + Status status = fileSystem->Init(); + result->reset(fileSystem); + return status; +} +} // namespace ROCKSDB_NAMESPACE diff --git a/env/flink/env_flink.h b/env/flink/env_flink.h index d1912a3de..a4d1892b4 100644 --- a/env/flink/env_flink.h +++ b/env/flink/env_flink.h @@ -5,6 +5,7 @@ #pragma once +#include "jni_helper.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" #include "rocksdb/status.h" @@ -28,16 +29,9 @@ class FlinkFileSystem : public FileSystemWrapper { static const char* kNickName() { return "flink"; } const char* NickName() const override { return kNickName(); } - // Constructor and Destructor - explicit FlinkFileSystem(const std::shared_ptr& base, - const std::string& fsname); ~FlinkFileSystem() override; // Several methods current FileSystem must implement - - std::string GetId() const override; - Status ValidateOptions(const DBOptions& /*db_opts*/, - const ColumnFamilyOptions& /*cf_opts*/) const override; IOStatus NewSequentialFile(const std::string& /*fname*/, const FileOptions& /*options*/, std::unique_ptr* /*result*/, @@ -54,14 +48,14 @@ class FlinkFileSystem : public FileSystemWrapper { const IOOptions& /*options*/, std::unique_ptr* /*result*/, IODebugContext* /*dbg*/) override; - IOStatus FileExists(const std::string& /*fname*/, + IOStatus FileExists(const std::string& /*file_name*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override; - IOStatus GetChildren(const std::string& /*path*/, + IOStatus GetChildren(const std::string& /*file_name*/, const IOOptions& /*options*/, std::vector* /*result*/, IODebugContext* /*dbg*/) override; - IOStatus DeleteFile(const std::string& /*fname*/, + IOStatus DeleteFile(const std::string& /*file_name*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override; IOStatus CreateDir(const std::string& /*name*/, const IOOptions& /*options*/, @@ -69,9 +63,10 @@ class FlinkFileSystem : public FileSystemWrapper { IOStatus CreateDirIfMissing(const std::string& /*name*/, const IOOptions& /*options*/, IODebugContext* /*dbg*/) override; - IOStatus DeleteDir(const std::string& /*name*/, const IOOptions& /*options*/, + IOStatus DeleteDir(const std::string& /*file_name*/, + const IOOptions& /*options*/, IODebugContext* /*dbg*/) override; - IOStatus GetFileSize(const std::string& /*fname*/, + IOStatus GetFileSize(const std::string& /*file_name*/, const IOOptions& /*options*/, uint64_t* /*size*/, IODebugContext* /*dbg*/) override; IOStatus GetFileModificationTime(const std::string& /*fname*/, @@ -90,7 +85,23 @@ class FlinkFileSystem : public FileSystemWrapper { IODebugContext* /*dbg*/) override; private: - std::string base_path_; + const std::string base_path_; + JavaClassCache* class_cache_; + jobject file_system_instance_; + + explicit FlinkFileSystem(const std::shared_ptr& base, + const std::string& fsname); + + // Init FileSystem + Status Init(); + + IOStatus Delete(const std::string& /*file_name*/, + const IOOptions& /*options*/, IODebugContext* /*dbg*/, + bool /*recursive*/); + IOStatus GetFileStatus(const std::string& /*file_name*/, + const IOOptions& /*options*/, IODebugContext* /*dbg*/, + jobject* /*fileStatus*/); + std::string ConstructPath(const std::string& /*file_name*/); }; // Returns a `FlinkEnv` with base_path diff --git a/env/flink/jni_helper.cc b/env/flink/jni_helper.cc index 8d1ac5acf..6d18219cb 100644 --- a/env/flink/jni_helper.cc +++ b/env/flink/jni_helper.cc @@ -5,72 +5,325 @@ #include "jni_helper.h" +#include "jvm_util.h" + namespace ROCKSDB_NAMESPACE { -JavaClassCache::JavaClassCache(JNIEnv *env) : jni_env_(env) { +JavaClassCache::JavaClassCache(JNIEnv* env) : jni_env_(env) {} + +JavaClassCache::~JavaClassCache() { + // Release all global ref of cached jclasses + for (const auto& item : cached_java_classes_) { + if (item.javaClass) { + jni_env_->DeleteGlobalRef(item.javaClass); + } + } +} + +IOStatus JavaClassCache::Create(JNIEnv* env, + std::unique_ptr* result) { + auto classCache = new JavaClassCache(env); + IOStatus status = classCache->Init(); + if (!status.ok()) { + delete classCache; + result->reset(); + return status; + } + result->reset(classCache); + return status; +} + +IOStatus JavaClassCache::Init() { // Set all class names - cached_java_classes_[JavaClassCache::JC_URI].className = "java/net/URI"; - cached_java_classes_[JavaClassCache::JC_BYTE_BUFFER].className = + cached_java_classes_[CachedJavaClass::JC_URI].className = "java/net/URI"; + cached_java_classes_[CachedJavaClass::JC_BYTE_BUFFER].className = "java/nio/ByteBuffer"; - cached_java_classes_[JavaClassCache::JC_THROWABLE].className = + cached_java_classes_[CachedJavaClass::JC_THROWABLE].className = "java/lang/Throwable"; - cached_java_classes_[JavaClassCache::JC_FLINK_PATH].className = + cached_java_classes_[CachedJavaClass::JC_FLINK_PATH].className = "org/apache/flink/core/fs/Path"; - cached_java_classes_[JavaClassCache::JC_FLINK_FILE_SYSTEM].className = + cached_java_classes_[CachedJavaClass::JC_FLINK_FILE_SYSTEM].className = "org/apache/flink/state/forst/fs/ForStFlinkFileSystem"; - cached_java_classes_[JavaClassCache::JC_FLINK_FILE_STATUS].className = + cached_java_classes_[CachedJavaClass::JC_FLINK_FILE_STATUS].className = "org/apache/flink/core/fs/FileStatus"; - cached_java_classes_[JavaClassCache::JC_FLINK_FS_INPUT_STREAM].className = + cached_java_classes_[CachedJavaClass::JC_FLINK_FS_INPUT_STREAM].className = "org/apache/flink/state/forst/fs/ByteBufferReadableFSDataInputStream"; - cached_java_classes_[JavaClassCache::JC_FLINK_FS_OUTPUT_STREAM].className = + cached_java_classes_[CachedJavaClass::JC_FLINK_FS_OUTPUT_STREAM].className = "org/apache/flink/state/forst/fs/ByteBufferWritableFSDataOutputStream"; - // Try best to create and set the jclass objects based on the class names set - // above + // Create and set the jclass objects based on the class names set above int numCachedClasses = - sizeof(cached_java_classes_) / sizeof(javaClassAndName); + sizeof(cached_java_classes_) / sizeof(JavaClassContext); for (int i = 0; i < numCachedClasses; i++) { - initCachedClass(cached_java_classes_[i].className, - &cached_java_classes_[i].javaClass); + IOStatus status = initCachedClass(cached_java_classes_[i].className, + &cached_java_classes_[i].javaClass); + if (!status.ok()) { + return status; + } } -} -JavaClassCache::~JavaClassCache() { - // Release all global ref of cached jclasses - for (const auto &item : cached_java_classes_) { - if (item.javaClass) { - jni_env_->DeleteGlobalRef(item.javaClass); + // Set all method names, signatures and class infos + cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_CONSTRUCTOR] + .javaClassAndName = cached_java_classes_[JC_FLINK_PATH]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_CONSTRUCTOR].methodName = + ""; + cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_CONSTRUCTOR].signature = + "(Lorg/apache/flink/core/fs/Path;)Z"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_TO_STRING] + .javaClassAndName = cached_java_classes_[JC_FLINK_PATH]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_TO_STRING].methodName = + "toString"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_TO_STRING].signature = + "()Ljava/lang/String;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_URI_CONSTRUCTOR] + .javaClassAndName = cached_java_classes_[JC_URI]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_URI_CONSTRUCTOR].methodName = + ""; + cached_java_methods_[CachedJavaMethod::JM_FLINK_URI_CONSTRUCTOR].signature = + "(Ljava/lang/String;)V"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET].methodName = + "get"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET].signature = + "(Ljava/net/URI;)Lorg/apache/flink/core/fs/FileSystem;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_EXISTS] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_EXISTS] + .methodName = "exists"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_EXISTS] + .signature = "(Lorg/apache/flink/core/fs/Path;)Z"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_LIST_STATUS] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_LIST_STATUS] + .methodName = "listStatus"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_LIST_STATUS] + .signature = + "(Lorg/apache/flink/core/fs/Path;)[Lorg/apache/flink/core/fs/FileStatus;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET_FILE_STATUS] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET_FILE_STATUS] + .methodName = "getFileStatus"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET_FILE_STATUS] + .signature = + "(Lorg/apache/flink/core/fs/Path;)Lorg/apache/flink/core/fs/FileStatus;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_DELETE] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_DELETE] + .methodName = "delete"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_DELETE] + .signature = "(Lorg/apache/flink/core/fs/Path;Z)Z"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_MKDIR] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_MKDIR] + .methodName = "mkdirs"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_MKDIR].signature = + "(Lorg/apache/flink/core/fs/Path;)Z"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_RENAME_FILE] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_RENAME_FILE] + .methodName = "rename"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_RENAME_FILE] + .signature = + "(Lorg/apache/flink/core/fs/Path;Lorg/apache/flink/core/fs/Path;)Z"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_OPEN] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_OPEN].methodName = + "open"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_OPEN].signature = + "(Lorg/apache/flink/core/fs/Path;)Lorg/apache/flink/state/forst/fs/" + "ByteBufferReadableFSDataInputStream;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_SEQ_READ] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_INPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_SEQ_READ] + .methodName = "readFully"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_SEQ_READ] + .signature = "(Ljava/nio/ByteBuffer;)I"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_RANDOM_READ] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_INPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_RANDOM_READ] + .methodName = "readFully"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_RANDOM_READ] + .signature = "(JLjava/nio/ByteBuffer;)I"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_SKIP] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_INPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_SKIP] + .methodName = "skip"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_INPUT_STREAM_SKIP] + .signature = "(J)J"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_WRITE] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_OUTPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_WRITE] + .methodName = "write"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_WRITE] + .signature = "(Ljava/nio/ByteBuffer;)V"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_FLUSH] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_OUTPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_FLUSH] + .methodName = "flush"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_FLUSH] + .signature = "()V"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_SYNC] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_OUTPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_SYNC] + .methodName = "sync"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_SYNC] + .signature = "()V"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_CLOSE] + .javaClassAndName = cached_java_classes_[JC_FLINK_FS_OUTPUT_STREAM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_CLOSE] + .methodName = "close"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FS_OUTPUT_STREAM_CLOSE] + .signature = "()V"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_CREATE] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_CREATE] + .methodName = "create"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_CREATE] + .signature = + "(Lorg/apache/flink/core/fs/Path;)Lorg/apache/flink/state/forst/fs/" + "ByteBufferWritableFSDataOutputStream;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_PATH] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_STATUS]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_PATH] + .methodName = "getPath"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_PATH] + .signature = "()Lorg/apache/flink/core/fs/Path;"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_LEN] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_STATUS]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_LEN] + .methodName = "getLen"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_LEN] + .signature = "()J"; + + cached_java_methods_ + [CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_MODIFICATION_TIME] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_STATUS]; + cached_java_methods_ + [CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_MODIFICATION_TIME] + .methodName = "getModificationTime"; + cached_java_methods_ + [CachedJavaMethod::JM_FLINK_FILE_STATUS_GET_MODIFICATION_TIME] + .signature = "()J"; + + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_IS_DIR] + .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_STATUS]; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_IS_DIR] + .methodName = "isDir"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_STATUS_IS_DIR] + .signature = "()Z"; + + // Create and set the jmethod based on the method names and signatures set + // above + int numCachedMethods = + sizeof(cached_java_methods_) / sizeof(JavaMethodContext); + for (int i = 0; i < numCachedMethods; i++) { + cached_java_methods_[i].javaMethod = jni_env_->GetMethodID( + cached_java_methods_[i].javaClassAndName.javaClass, + cached_java_methods_[i].methodName, cached_java_methods_[i].signature); + + if (!cached_java_methods_[i].javaMethod) { + return IOStatus::IOError(std::string("Exception when GetMethodID, ") + .append(cached_java_methods_[i].ToString())); } } + return IOStatus::OK(); } -Status JavaClassCache::initCachedClass(const char *className, - jclass *cachedJclass) { +IOStatus JavaClassCache::initCachedClass(const char* className, + jclass* cachedJclass) { jclass tempLocalClassRef = jni_env_->FindClass(className); if (!tempLocalClassRef) { - return Status::IOError("Exception when FindClass, class name: " + - std::string(className)); + return IOStatus::IOError("Exception when FindClass, class name: " + + std::string(className)); } *cachedJclass = (jclass)jni_env_->NewGlobalRef(tempLocalClassRef); if (!*cachedJclass) { - return Status::IOError("Exception when NewGlobalRef, class name " + - std::string(className)); + return IOStatus::IOError("Exception when NewGlobalRef, class name " + + std::string(className)); } jni_env_->DeleteLocalRef(tempLocalClassRef); - return Status::OK(); + return IOStatus::OK(); +} + +JavaClassCache::JavaClassContext JavaClassCache::GetJClass( + CachedJavaClass cachedJavaClass) { + return cached_java_classes_[cachedJavaClass]; +} + +JavaClassCache::JavaMethodContext JavaClassCache::GetJMethod( + CachedJavaMethod cachedJavaMethod) { + return cached_java_methods_[cachedJavaMethod]; } -Status JavaClassCache::GetJClass(CachedJavaClass cachedJavaClass, - jclass *javaClass) { - jclass targetClass = cached_java_classes_[cachedJavaClass].javaClass; - Status status = Status::OK(); - if (!targetClass) { - status = initCachedClass(cached_java_classes_[cachedJavaClass].className, - &targetClass); +IOStatus JavaClassCache::ConstructPathInstance(const std::string& file_path, + jobject* pathInstance) { + JNIEnv* jniEnv = getJNIEnv(); + JavaClassCache::JavaClassContext pathClass = + GetJClass(JavaClassCache::JC_FLINK_PATH); + JavaClassCache::JavaMethodContext pathConstructor = + GetJMethod(JavaClassCache::JM_FLINK_PATH_CONSTRUCTOR); + jstring pathString = jniEnv->NewStringUTF(file_path.c_str()); + jobject tempPathInstance = jniEnv->NewObject( + pathClass.javaClass, pathConstructor.javaMethod, pathString); + jniEnv->DeleteLocalRef(pathString); + if (tempPathInstance == nullptr) { + return CheckThenError(std::string("Exception when ConstructPathInstance, ") + .append(pathClass.ToString()) + .append(pathConstructor.ToString()) + .append(", args: Path(") + .append(file_path) + .append(")")); } - *javaClass = targetClass; - return status; + *pathInstance = tempPathInstance; + return IOStatus::OK(); +} + +IOStatus CurrentStatus( + const std::function& exceptionMessageIfError) { + JNIEnv* jniEnv = getJNIEnv(); + if (jniEnv->ExceptionCheck()) { + // Throw Exception to Java side, stop any call from Java. + jthrowable throwable = jniEnv->ExceptionOccurred(); + jniEnv->ExceptionDescribe(); + jniEnv->ExceptionClear(); + jniEnv->Throw(throwable); + return IOStatus::IOError(exceptionMessageIfError()); + } + return IOStatus::OK(); +} + +IOStatus CheckThenError(const std::string& exceptionMessageIfError) { + JNIEnv* jniEnv = getJNIEnv(); + if (jniEnv->ExceptionCheck()) { + // Throw Exception to Java side, stop any call from Java. + jthrowable throwable = jniEnv->ExceptionOccurred(); + jniEnv->ExceptionDescribe(); + jniEnv->ExceptionClear(); + jniEnv->Throw(throwable); + } + return IOStatus::IOError(exceptionMessageIfError); } } // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/env/flink/jni_helper.h b/env/flink/jni_helper.h index 39d9e9f9a..fefaea8fb 100644 --- a/env/flink/jni_helper.h +++ b/env/flink/jni_helper.h @@ -3,8 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#include +#include + #include "jni.h" -#include "rocksdb/status.h" +#include "rocksdb/io_status.h" namespace ROCKSDB_NAMESPACE { @@ -24,22 +27,98 @@ class JavaClassCache { NUM_CACHED_CLASSES } CachedJavaClass; - // Constructor and Destructor - explicit JavaClassCache(JNIEnv* env); - ~JavaClassCache(); - - // Get jclass by specific CachedJavaClass - Status GetJClass(CachedJavaClass cachedJavaClass, jclass* javaClass); + // Frequently-used method type representing jmethods which will be cached. + typedef enum { + JM_FLINK_PATH_CONSTRUCTOR, + JM_FLINK_PATH_TO_STRING, + JM_FLINK_URI_CONSTRUCTOR, + JM_FLINK_FILE_SYSTEM_GET, + JM_FLINK_FILE_SYSTEM_EXISTS, + JM_FLINK_FILE_SYSTEM_LIST_STATUS, + JM_FLINK_FILE_SYSTEM_GET_FILE_STATUS, + JM_FLINK_FILE_SYSTEM_DELETE, + JM_FLINK_FILE_SYSTEM_MKDIR, + JM_FLINK_FILE_SYSTEM_RENAME_FILE, + JM_FLINK_FILE_SYSTEM_OPEN, + JM_FLINK_FS_INPUT_STREAM_SEQ_READ, + JM_FLINK_FS_INPUT_STREAM_RANDOM_READ, + JM_FLINK_FS_INPUT_STREAM_SKIP, + JM_FLINK_FS_OUTPUT_STREAM_WRITE, + JM_FLINK_FS_OUTPUT_STREAM_FLUSH, + JM_FLINK_FS_OUTPUT_STREAM_SYNC, + JM_FLINK_FS_OUTPUT_STREAM_CLOSE, + JM_FLINK_FILE_SYSTEM_CREATE, + JM_FLINK_FILE_STATUS_GET_PATH, + JM_FLINK_FILE_STATUS_GET_LEN, + JM_FLINK_FILE_STATUS_GET_MODIFICATION_TIME, + JM_FLINK_FILE_STATUS_IS_DIR, + NUM_CACHED_METHODS + } CachedJavaMethod; - private: - typedef struct { + // jclass with its context description + struct JavaClassContext { jclass javaClass; const char* className; - } javaClassAndName; + std::string ToString() const { + return std::string("className: ").append(className); + } + }; + + // jmethod with its context description + struct JavaMethodContext { + JavaClassContext javaClassAndName; + jmethodID javaMethod; + const char* methodName; + const char* signature; + + std::string ToString() const { + return javaClassAndName.ToString() + .append(", methodName: ") + .append(methodName) + .append(", signature: ") + .append(signature); + } + }; + + ~JavaClassCache(); + + // Create a unique instance which inits necessary cached classes and methods. + // Return Status representing whether these classes and methods are inited + // correctly or not. + static IOStatus Create(JNIEnv* env, + std::unique_ptr* javaClassCache); + + // Get JavaClassContext by specific CachedJavaClass. + JavaClassContext GetJClass(CachedJavaClass cachedJavaClass); + + // Get JavaMethodContext by specific CachedJavaMethod. + JavaMethodContext GetJMethod(CachedJavaMethod cachedJavaMethod); + + // Construct Java Path Instance based on cached classes and method related to + // Path. + IOStatus ConstructPathInstance(const std::string& /*file_path*/, + jobject* /*pathInstance*/); + + private: JNIEnv* jni_env_; - javaClassAndName cached_java_classes_[JavaClassCache::NUM_CACHED_CLASSES]; + JavaClassContext cached_java_classes_[CachedJavaClass::NUM_CACHED_CLASSES]; + JavaMethodContext cached_java_methods_[CachedJavaMethod::NUM_CACHED_METHODS]; - Status initCachedClass(const char* className, jclass* cachedClass); + explicit JavaClassCache(JNIEnv* env); + + // Init all classes and methods. + IOStatus Init(); + + // Init cached class. + IOStatus initCachedClass(const char* className, jclass* cachedClass); }; + +// Return current status of JNIEnv. +IOStatus CurrentStatus( + const std::function& /*exceptionMessageIfError*/); + +// Wrap error status of JNIEnv. +IOStatus CheckThenError(const std::string& /*exceptionMessageIfError*/); + } // namespace ROCKSDB_NAMESPACE \ No newline at end of file From d749df58731691bd5d23bbf00415cf96f986da8a Mon Sep 17 00:00:00 2001 From: Hangxiang Yu Date: Mon, 18 Mar 2024 16:40:03 +0800 Subject: [PATCH 373/386] [env] Modify the license (#13) (cherry picked from commit a5c920d35dcf7a5a9a09bdd00b06cffdbff8a919) --- env/flink/env_flink.cc | 21 +++++++++++++++++---- env/flink/env_flink.h | 21 +++++++++++++++++---- env/flink/jni_helper.cc | 21 +++++++++++++++++---- env/flink/jni_helper.h | 21 +++++++++++++++++---- 4 files changed, 68 insertions(+), 16 deletions(-) diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc index 8987084d0..290aa215b 100644 --- a/env/flink/env_flink.cc +++ b/env/flink/env_flink.cc @@ -1,7 +1,20 @@ -// Copyright (c) 2021-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "env_flink.h" diff --git a/env/flink/env_flink.h b/env/flink/env_flink.h index a4d1892b4..2b937b050 100644 --- a/env/flink/env_flink.h +++ b/env/flink/env_flink.h @@ -1,7 +1,20 @@ -// Copyright (c) 2021-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #pragma once diff --git a/env/flink/jni_helper.cc b/env/flink/jni_helper.cc index 6d18219cb..de82978e3 100644 --- a/env/flink/jni_helper.cc +++ b/env/flink/jni_helper.cc @@ -1,7 +1,20 @@ -// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include "jni_helper.h" diff --git a/env/flink/jni_helper.h b/env/flink/jni_helper.h index fefaea8fb..1927a2c07 100644 --- a/env/flink/jni_helper.h +++ b/env/flink/jni_helper.h @@ -1,7 +1,20 @@ -// Copyright (c) 2019-present, Facebook, Inc. All rights reserved. -// This source code is licensed under both the GPLv2 (found in the -// COPYING file in the root directory) and Apache 2.0 License -// (found in the LICENSE.Apache file in the root directory). +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include #include From 40bf82a40fa90961a5d040d3d32bc0046b39e992 Mon Sep 17 00:00:00 2001 From: Jinzhong Li Date: Thu, 21 Mar 2024 16:35:09 +0800 Subject: [PATCH 374/386] [env] Support JNI of FlinkEnv (#12) * [env] Support JNI of FlinkEnv (cherry picked from commit ec88681c32e5f9d80b0bf331070bd05d740d685c) --- env/flink/env_flink.cc | 21 +++++++ java/CMakeLists.txt | 3 + java/rocksjni/env_flink.cc | 63 ++++++++++++++++++++ java/src/main/java/org/rocksdb/FlinkEnv.java | 41 +++++++++++++ src.mk | 1 + 5 files changed, 129 insertions(+) create mode 100644 java/rocksjni/env_flink.cc create mode 100644 java/src/main/java/org/rocksdb/FlinkEnv.java diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc index 290aa215b..9ff8f5b6d 100644 --- a/env/flink/env_flink.cc +++ b/env/flink/env_flink.cc @@ -855,4 +855,25 @@ Status FlinkFileSystem::Create(const std::shared_ptr& base, result->reset(fileSystem); return status; } + +Status NewFlinkEnv(const std::string& uri, + std::unique_ptr* flinkFileSystem) { + std::shared_ptr fs; + Status s = NewFlinkFileSystem(uri, &fs); + if (s.ok()) { + *flinkFileSystem = NewCompositeEnv(fs); + } + return s; +} + +Status NewFlinkFileSystem(const std::string& uri, + std::shared_ptr* fs) { + std::unique_ptr flinkFileSystem; + Status s = + FlinkFileSystem::Create(FileSystem::Default(), uri, &flinkFileSystem); + if (s.ok()) { + fs->reset(flinkFileSystem.release()); + } + return s; +} } // namespace ROCKSDB_NAMESPACE diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index f9fd9b564..fc9e0cfe7 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -37,6 +37,7 @@ set(JNI_NATIVE_SOURCES rocksjni/concurrent_task_limiter.cc rocksjni/config_options.cc rocksjni/env.cc + rocksjni/env_flink.cc rocksjni/env_options.cc rocksjni/event_listener.cc rocksjni/event_listener_jnicallback.cc @@ -167,6 +168,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/FilterPolicyType.java src/main/java/org/rocksdb/FileOperationInfo.java src/main/java/org/rocksdb/FlinkCompactionFilter.java + src/main/java/org/rocksdb/FlinkEnv.java src/main/java/org/rocksdb/FlushJobInfo.java src/main/java/org/rocksdb/FlushReason.java src/main/java/org/rocksdb/FlushOptions.java @@ -688,6 +690,7 @@ if(${CMAKE_VERSION} VERSION_LESS "3.11.4") org.rocksdb.EnvOptions org.rocksdb.Filter org.rocksdb.FlinkCompactionFilter + org.rocksdb.FlinkEnv org.rocksdb.FlushOptions org.rocksdb.HashLinkedListMemTableConfig org.rocksdb.HashSkipListMemTableConfig diff --git a/java/rocksjni/env_flink.cc b/java/rocksjni/env_flink.cc new file mode 100644 index 000000000..f6d4b44ca --- /dev/null +++ b/java/rocksjni/env_flink.cc @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "env/flink/env_flink.h" + +#include + +#include + +#include "java/rocksjni/portal.h" +#include "rocksdb/env.h" + +/* + * Class: org_rocksdb_FlinkEnv + * Method: createFlinkEnv + * Signature: (Ljava/lang/String;)J + */ +jlong Java_org_rocksdb_FlinkEnv_createFlinkEnv(JNIEnv* env, jclass, + jstring base_path) { + jboolean has_exception = JNI_FALSE; + auto path = + ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, base_path, &has_exception); + if (has_exception == JNI_TRUE) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( + env, "Could not copy jstring to std::string"); + return 0; + } + std::unique_ptr flink_env; + auto status = ROCKSDB_NAMESPACE::NewFlinkEnv(path, &flink_env); + if (!status.ok()) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, status); + return 0; + } + auto ptr_as_handle = flink_env.release(); + return reinterpret_cast(ptr_as_handle); +} + +/* + * Class: org_rocksdb_FlinkEnv + * Method: disposeInternal + * Signature: (J)V + */ +void Java_org_rocksdb_FlinkEnv_disposeInternal(JNIEnv*, jobject, + jlong jhandle) { + auto* handle = reinterpret_cast(jhandle); + assert(handle != nullptr); + delete handle; +} diff --git a/java/src/main/java/org/rocksdb/FlinkEnv.java b/java/src/main/java/org/rocksdb/FlinkEnv.java new file mode 100644 index 000000000..91e6d46b6 --- /dev/null +++ b/java/src/main/java/org/rocksdb/FlinkEnv.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.rocksdb; + +/** + * Flink Env which proxy all filesystem access to Flink FileSystem. + */ +public class FlinkEnv extends Env { + /** +

    Creates a new environment that is used for Flink environment.

    + * + *

    The caller must delete the result when it is + * no longer needed.

    + * + * @param basePath the base path string for the given Flink file system, + * formatted as "{fs-schema-supported-by-flink}://xxx" + */ + public FlinkEnv(final String basePath) { + super(createFlinkEnv(basePath)); + } + + private static native long createFlinkEnv(final String basePath); + + @Override protected final native void disposeInternal(final long handle); +} \ No newline at end of file diff --git a/src.mk b/src.mk index 4beae92a3..c58fc70fe 100644 --- a/src.mk +++ b/src.mk @@ -670,6 +670,7 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/config_options.cc \ java/rocksjni/export_import_files_metadatajni.cc \ java/rocksjni/env.cc \ + java/rocksjni/env_flink.cc \ java/rocksjni/env_options.cc \ java/rocksjni/event_listener.cc \ java/rocksjni/event_listener_jnicallback.cc \ From a4ada5b6e3195371a873c89e0b7b78ade9da97ba Mon Sep 17 00:00:00 2001 From: Jinzhong Li Date: Fri, 29 Mar 2024 11:41:31 +0800 Subject: [PATCH 375/386] [env]Introduce flink-env test suite (#17) * [env]Introduce flink-env test suite (cherry picked from commit de9582bb42d8451ec36c15521507f6a9e1c951e8) --- CMakeLists.txt | 3 +- env/flink/env_flink.cc | 2 +- env/flink/env_flink.h | 8 + env/flink/env_flink_test_suite.cc | 66 +++ env/flink/env_flink_test_suite.h | 34 ++ env/flink/jni_helper.cc | 18 +- env/flink/jni_helper.h | 5 +- java/CMakeLists.txt | 3 + java/Makefile | 20 +- .../org/apache/flink/core/fs/FileStatus.java | 79 +++ .../org/apache/flink/core/fs/FileSystem.java | 257 ++++++++++ .../flink/core/fs/LocalDataInputStream.java | 83 ++++ .../flink/core/fs/LocalDataOutputStream.java | 92 ++++ .../apache/flink/core/fs/LocalFileStatus.java | 93 ++++ .../apache/flink/core/fs/LocalFileSystem.java | 296 +++++++++++ .../java/org/apache/flink/core/fs/Path.java | 469 ++++++++++++++++++ .../ByteBufferReadableFSDataInputStream.java | 133 +++++ .../ByteBufferWritableFSDataOutputStream.java | 83 ++++ .../state/forst/fs/ForStFlinkFileSystem.java | 126 +++++ java/rocksjni/env_flink_test_suite.cc | 73 +++ .../java/org/rocksdb/EnvFlinkTestSuite.java | 50 ++ .../java/org/rocksdb/flink/FlinkEnvTest.java | 45 ++ src.mk | 2 + 23 files changed, 2030 insertions(+), 10 deletions(-) create mode 100644 env/flink/env_flink_test_suite.cc create mode 100644 env/flink/env_flink_test_suite.h create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileStatus.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileSystem.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataInputStream.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataOutputStream.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileStatus.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileSystem.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/core/fs/Path.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferReadableFSDataInputStream.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferWritableFSDataOutputStream.java create mode 100644 java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ForStFlinkFileSystem.java create mode 100644 java/rocksjni/env_flink_test_suite.cc create mode 100644 java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java create mode 100644 java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java diff --git a/CMakeLists.txt b/CMakeLists.txt index 1efcde659..0f93b43e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1022,7 +1022,8 @@ else() env/io_posix.cc env/flink/env_flink.cc env/flink/jvm_util.cc - env/flink/jni_helper.cc) + env/flink/jni_helper.cc + env/flink/env_flink_test_suite.cc) endif() if(USE_FOLLY_LITE) diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc index 9ff8f5b6d..b963fe508 100644 --- a/env/flink/env_flink.cc +++ b/env/flink/env_flink.cc @@ -306,7 +306,7 @@ class FlinkDirectory : public FSDirectory { FlinkFileSystem::FlinkFileSystem(const std::shared_ptr& base_fs, const std::string& base_path) - : FileSystemWrapper(base_fs), base_path_(base_path) {} + : FileSystemWrapper(base_fs), base_path_(TrimTrailingSlash(base_path)) {} FlinkFileSystem::~FlinkFileSystem() { if (file_system_instance_ != nullptr) { diff --git a/env/flink/env_flink.h b/env/flink/env_flink.h index 2b937b050..04295815f 100644 --- a/env/flink/env_flink.h +++ b/env/flink/env_flink.h @@ -115,6 +115,14 @@ class FlinkFileSystem : public FileSystemWrapper { const IOOptions& /*options*/, IODebugContext* /*dbg*/, jobject* /*fileStatus*/); std::string ConstructPath(const std::string& /*file_name*/); + + static std::string TrimTrailingSlash(const std::string& base_path) { + if (!base_path.empty() && base_path.back() == '/') { + return base_path.substr(0, base_path.size() - 1); + } else { + return base_path; + } + } }; // Returns a `FlinkEnv` with base_path diff --git a/env/flink/env_flink_test_suite.cc b/env/flink/env_flink_test_suite.cc new file mode 100644 index 000000000..2b1a312ab --- /dev/null +++ b/env/flink/env_flink_test_suite.cc @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "env/flink/env_flink_test_suite.h" + +#include +#include + +#define ASSERT_TRUE(expression) \ + if (!(expression)) { \ + std::cerr << "Assertion failed: " << #expression << ", file " << __FILE__ \ + << ", line " << __LINE__ << "." << std::endl; \ + std::abort(); \ + } + +namespace ROCKSDB_NAMESPACE { + +EnvFlinkTestSuites::EnvFlinkTestSuites(const std::string& basePath) + : base_path_(basePath) {} + +void EnvFlinkTestSuites::runAllTestSuites() { + setUp(); + testFileExist(); +} + +void EnvFlinkTestSuites::setUp() { + auto status = ROCKSDB_NAMESPACE::NewFlinkEnv(base_path_, &flink_env_); + if (!status.ok()) { + throw std::runtime_error("New FlinkEnv failed"); + } +} + +void EnvFlinkTestSuites::testFileExist() { + std::string fileName("test-file"); + Status result = flink_env_->FileExists(fileName); + ASSERT_TRUE(result.IsNotFound()); + + // Generate a file manually + const std::string prefix = "file:"; + std::string writeFileName = base_path_ + fileName; + if (writeFileName.compare(0, prefix.size(), prefix) == 0) { + writeFileName = writeFileName.substr(prefix.size()); + } + std::ofstream writeFile(writeFileName); + writeFile << "testFileExist"; + writeFile.close(); + + result = flink_env_->FileExists(fileName); + ASSERT_TRUE(result.ok()); +} +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/env/flink/env_flink_test_suite.h b/env/flink/env_flink_test_suite.h new file mode 100644 index 000000000..3826060d5 --- /dev/null +++ b/env/flink/env_flink_test_suite.h @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "env_flink.h" + +namespace ROCKSDB_NAMESPACE { + +class EnvFlinkTestSuites { + public: + EnvFlinkTestSuites(const std::string& basePath); + void runAllTestSuites(); + + private: + std::unique_ptr flink_env_; + const std::string base_path_; + void setUp(); + void testFileExist(); +}; +} // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/env/flink/jni_helper.cc b/env/flink/jni_helper.cc index de82978e3..9be816c39 100644 --- a/env/flink/jni_helper.cc +++ b/env/flink/jni_helper.cc @@ -81,7 +81,7 @@ IOStatus JavaClassCache::Init() { cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_CONSTRUCTOR].methodName = ""; cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_CONSTRUCTOR].signature = - "(Lorg/apache/flink/core/fs/Path;)Z"; + "(Ljava/lang/String;)V"; cached_java_methods_[CachedJavaMethod::JM_FLINK_PATH_TO_STRING] .javaClassAndName = cached_java_classes_[JC_FLINK_PATH]; @@ -103,6 +103,8 @@ IOStatus JavaClassCache::Init() { "get"; cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET].signature = "(Ljava/net/URI;)Lorg/apache/flink/core/fs/FileSystem;"; + cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_GET].isStatic = + true; cached_java_methods_[CachedJavaMethod::JM_FLINK_FILE_SYSTEM_EXISTS] .javaClassAndName = cached_java_classes_[JC_FLINK_FILE_SYSTEM]; @@ -251,9 +253,17 @@ IOStatus JavaClassCache::Init() { int numCachedMethods = sizeof(cached_java_methods_) / sizeof(JavaMethodContext); for (int i = 0; i < numCachedMethods; i++) { - cached_java_methods_[i].javaMethod = jni_env_->GetMethodID( - cached_java_methods_[i].javaClassAndName.javaClass, - cached_java_methods_[i].methodName, cached_java_methods_[i].signature); + if (cached_java_methods_[i].isStatic) { + cached_java_methods_[i].javaMethod = jni_env_->GetStaticMethodID( + cached_java_methods_[i].javaClassAndName.javaClass, + cached_java_methods_[i].methodName, + cached_java_methods_[i].signature); + } else { + cached_java_methods_[i].javaMethod = jni_env_->GetMethodID( + cached_java_methods_[i].javaClassAndName.javaClass, + cached_java_methods_[i].methodName, + cached_java_methods_[i].signature); + } if (!cached_java_methods_[i].javaMethod) { return IOStatus::IOError(std::string("Exception when GetMethodID, ") diff --git a/env/flink/jni_helper.h b/env/flink/jni_helper.h index 1927a2c07..54a6da85b 100644 --- a/env/flink/jni_helper.h +++ b/env/flink/jni_helper.h @@ -84,13 +84,16 @@ class JavaClassCache { jmethodID javaMethod; const char* methodName; const char* signature; + bool isStatic = false; std::string ToString() const { return javaClassAndName.ToString() .append(", methodName: ") .append(methodName) .append(", signature: ") - .append(signature); + .append(signature) + .append(", isStatic:") + .append(isStatic ? "true" : "false"); } }; diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index fc9e0cfe7..c31083b6f 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -38,6 +38,7 @@ set(JNI_NATIVE_SOURCES rocksjni/config_options.cc rocksjni/env.cc rocksjni/env_flink.cc + rocksjni/env_flink_test_suite.cc rocksjni/env_options.cc rocksjni/event_listener.cc rocksjni/event_listener_jnicallback.cc @@ -159,6 +160,7 @@ set(JAVA_MAIN_CLASSES src/main/java/org/rocksdb/DirectSlice.java src/main/java/org/rocksdb/EncodingType.java src/main/java/org/rocksdb/Env.java + src/main/java/org/rocksdb/EnvFlinkTestSuite.java src/main/java/org/rocksdb/EnvOptions.java src/main/java/org/rocksdb/EventListener.java src/main/java/org/rocksdb/Experimental.java @@ -687,6 +689,7 @@ if(${CMAKE_VERSION} VERSION_LESS "3.11.4") org.rocksdb.DBOptions org.rocksdb.DirectSlice org.rocksdb.Env + org.rocksdb.EnvFlinkTestSuite org.rocksdb.EnvOptions org.rocksdb.Filter org.rocksdb.FlinkCompactionFilter diff --git a/java/Makefile b/java/Makefile index e80c8130b..aae28e0cd 100644 --- a/java/Makefile +++ b/java/Makefile @@ -209,6 +209,9 @@ JAVA_TESTS = \ org.rocksdb.WriteOptionsTest\ org.rocksdb.WriteBatchWithIndexTest +FLINK_TESTS = \ + org.rocksdb.flink.FlinkEnvTest + MAIN_SRC = src/main/java TEST_SRC = src/test/java OUTPUT = target @@ -303,14 +306,15 @@ PLUGIN_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, or CORE_SOURCES = $(foreach pkg, org/rocksdb/util org/rocksdb, $(MAIN_SRC)/$(pkg)/*.java) SOURCES = $(wildcard $(CORE_SOURCES) $(PLUGIN_SOURCES)) PLUGIN_TEST_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb, $(root)/$(TEST_SRC)/$(pkg)/*.java)) -CORE_TEST_SOURCES = $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb, $(TEST_SRC)/$(pkg)/*.java) +CORE_TEST_SOURCES = $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb/flink org/rocksdb, $(TEST_SRC)/$(pkg)/*.java) TEST_SOURCES = $(wildcard $(CORE_TEST_SOURCES) $(PLUGIN_TEST_SOURCES)) +MOCK_FLINK_TEST_SOURCES = $(foreach pkg, org/apache/flink/core/fs org/apache/flink/state/forst/fs, flinktestmock/src/main/java/$(pkg)/*.java) # Configure the plugin tests and java classes ROCKSDB_PLUGIN_NATIVE_JAVA_CLASSES = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach class, $($(plugin)_NATIVE_JAVA_CLASSES), $(class))) NATIVE_JAVA_CLASSES = $(NATIVE_JAVA_CLASSES) $(ROCKSDB_PLUGIN_NATIVE_JAVA_CLASSES) ROCKSDB_PLUGIN_JAVA_TESTS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach testclass, $($(plugin)_JAVA_TESTS), $(testclass))) -ALL_JAVA_TESTS = $(JAVA_TESTS) $(ROCKSDB_PLUGIN_JAVA_TESTS) +ALL_JAVA_TESTS = $(FLINK_TESTS) $(JAVA_TESTS) $(ROCKSDB_PLUGIN_JAVA_TESTS) # When debugging add -Xcheck:jni to the java args ifneq ($(DEBUG_LEVEL),0) @@ -450,7 +454,7 @@ java_test: java resolve_test_deps $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE) -d $(TEST_CLASSES)\ $(TEST_SOURCES) -test: java java_test +test: java mock_flink_fs java_test $(MAKE) run_test run_test: @@ -466,3 +470,13 @@ db_bench: java pmd: $(MAVEN_CMD) pmd:pmd pmd:cpd pmd:check + +mock_flink_fs: + $(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE) -d $(TEST_CLASSES) \ + $(MOCK_FLINK_TEST_SOURCES) + +flink_test: java java_test mock_flink_fs + $(MAKE) run_flink_test + +run_flink_test: + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(FLINK_TESTS) diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileStatus.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileStatus.java new file mode 100644 index 000000000..52d3360b7 --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileStatus.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is based on source code from the Hadoop Project (http://hadoop.apache.org/), licensed + * by the Apache Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. + */ + +package org.apache.flink.core.fs; + +/** + * Interface that represents the client side information for a file independent of the file system. + */ +public interface FileStatus { + /** + * Return the length of this file. + * + * @return the length of this file + */ + long getLen(); + + /** + * Get the block size of the file. + * + * @return the number of bytes + */ + long getBlockSize(); + + /** + * Get the replication factor of a file. + * + * @return the replication factor of a file. + */ + short getReplication(); + + /** + * Get the modification time of the file. + * + * @return the modification time of file in milliseconds since January 1, 1970 UTC. + */ + long getModificationTime(); + + /** + * Get the access time of the file. + * + * @return the access time of file in milliseconds since January 1, 1970 UTC. + */ + long getAccessTime(); + + /** + * Checks if this object represents a directory. + * + * @return true if this is a directory, false otherwise + */ + boolean isDir(); + + /** + * Returns the corresponding Path to the FileStatus. + * + * @return the corresponding Path to the FileStatus + */ + Path getPath(); +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileSystem.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileSystem.java new file mode 100644 index 000000000..5fef72b42 --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/FileSystem.java @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file is based on source code from the Hadoop Project (http://hadoop.apache.org/), licensed + * by the Apache Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. + */ + +package org.apache.flink.core.fs; + +import static org.apache.flink.core.fs.LocalFileSystem.LOCAL_URI; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.util.Objects; + +/** + * Abstract base class of all file systems used by Flink. This class may be extended to implement + * distributed file systems, or local file systems. The abstraction by this file system is very + * simple, and the set of available operations quite limited, to support the common denominator of a + * wide range of file systems. For example, appending to or mutating existing files is not + * supported. + */ +public abstract class FileSystem { + /** + * The possible write modes. The write mode decides what happens if a file should be created, + * but already exists. + */ + public enum WriteMode { + + /** + * Creates the target file only if no file exists at that path already. Does not overwrite + * existing files and directories. + */ + NO_OVERWRITE, + + /** + * Creates a new target file regardless of any existing files or directories. Existing files + * and directories will be deleted (recursively) automatically before creating the new file. + */ + OVERWRITE + } + + /** + * Returns a reference to the {@link FileSystem} instance for accessing the local file system. + * + * @return a reference to the {@link FileSystem} instance for accessing the local file system. + */ + public static FileSystem getLocalFileSystem() { + return LocalFileSystem.getSharedInstance(); + } + + /** + * Returns a reference to the {@link FileSystem} instance for accessing the file system + * identified by the given {@link URI}. + * + * @param uri the {@link URI} identifying the file system + * @return a reference to the {@link FileSystem} instance for accessing the file system + * identified by the given {@link URI}. + * @throws IOException thrown if a reference to the file system instance could not be obtained + */ + public static FileSystem get(URI uri) throws IOException { + if (Objects.equals(LOCAL_URI.getScheme(), uri.getScheme()) + && Objects.equals(LOCAL_URI.getAuthority(), LOCAL_URI.getAuthority())) { + return getLocalFileSystem(); + } + throw new UnsupportedOperationException("Unsupported URI pattern:" + uri); + } + + // ------------------------------------------------------------------------ + // File System Methods + // ------------------------------------------------------------------------ + + /** + * Returns the path of the file system's current working directory. + * + * @return the path of the file system's current working directory + */ + public abstract Path getWorkingDirectory(); + + /** + * Returns the path of the user's home directory in this file system. + * + * @return the path of the user's home directory in this file system. + */ + public abstract Path getHomeDirectory(); + + /** + * Returns a URI whose scheme and authority identify this file system. + * + * @return a URI whose scheme and authority identify this file system + */ + public abstract URI getUri(); + + /** + * Return a file status object that represents the path. + * + * @param f The path we want information from + * @return a FileStatus object + * @throws FileNotFoundException when the path does not exist; IOException see specific + * implementation + */ + public abstract FileStatus getFileStatus(Path f) throws IOException; + + /** + * Opens an FSDataInputStream at the indicated Path. + * + * @param f the file name to open + * @param bufferSize the size of the buffer to be used. + */ + public abstract InputStream open(Path f, int bufferSize) throws IOException; + + /** + * Opens an FSDataInputStream at the indicated Path. + * + * @param f the file to open + */ + public abstract InputStream open(Path f) throws IOException; + + /** + * List the statuses of the files/directories in the given path if the path is a directory. + * + * @param f given path + * @return the statuses of the files/directories in the given path + * @throws IOException + */ + public abstract FileStatus[] listStatus(Path f) throws IOException; + + /** + * Check if exists. + * + * @param f source file + */ + public boolean exists(final Path f) throws IOException { + try { + return (getFileStatus(f) != null); + } catch (FileNotFoundException e) { + return false; + } + } + + /** + * Delete a file. + * + * @param f the path to delete + * @param recursive if path is a directory and set to true, the directory is + * deleted else throws an exception. In case of a file the recursive can be set to either + * true or false + * @return true if delete is successful, false otherwise + * @throws IOException + */ + public abstract boolean delete(Path f, boolean recursive) throws IOException; + + /** + * Make the given file and all non-existent parents into directories. Has the semantics of Unix + * 'mkdir -p'. Existence of the directory hierarchy is not an error. + * + * @param f the directory/directories to be created + * @return true if at least one new directory has been created, false + * otherwise + * @throws IOException thrown if an I/O error occurs while creating the directory + */ + public abstract boolean mkdirs(Path f) throws IOException; + + /** + * Opens an FSDataOutputStream at the indicated Path. + * + *

    This method is deprecated, because most of its parameters are ignored by most file + * systems. To control for example the replication factor and block size in the Hadoop + * Distributed File system, make sure that the respective Hadoop configuration file is either + * linked from the Flink configuration, or in the classpath of either Flink or the user code. + * + * @param f the file name to open + * @param overwrite if a file with this name already exists, then if true, the file will be + * overwritten, and if false an error will be thrown. + * @param bufferSize the size of the buffer to be used. + * @param replication required block replication for the file. + * @param blockSize the size of the file blocks + * @throws IOException Thrown, if the stream could not be opened because of an I/O, or because a + * file already exists at that path and the write mode indicates to not overwrite the file. + * @deprecated Deprecated because not well supported across types of file systems. Control the + * behavior of specific file systems via configurations instead. + */ + @Deprecated + public OutputStream create(Path f, boolean overwrite, int bufferSize, short replication, + long blockSize) throws IOException { + return create(f, overwrite ? WriteMode.OVERWRITE : WriteMode.NO_OVERWRITE); + } + + /** + * Opens an FSDataOutputStream at the indicated Path. + * + * @param f the file name to open + * @param overwrite if a file with this name already exists, then if true, the file will be + * overwritten, and if false an error will be thrown. + * @throws IOException Thrown, if the stream could not be opened because of an I/O, or because a + * file already exists at that path and the write mode indicates to not overwrite the file. + * @deprecated Use {@link #create(Path, WriteMode)} instead. + */ + @Deprecated + public OutputStream create(Path f, boolean overwrite) throws IOException { + return create(f, overwrite ? WriteMode.OVERWRITE : WriteMode.NO_OVERWRITE); + } + + /** + * Opens an FSDataOutputStream to a new file at the given path. + * + *

    If the file already exists, the behavior depends on the given {@code WriteMode}. If the + * mode is set to {@link WriteMode#NO_OVERWRITE}, then this method fails with an exception. + * + * @param f The file path to write to + * @param overwriteMode The action to take if a file or directory already exists at the given + * path. + * @return The stream to the new file at the target path. + * @throws IOException Thrown, if the stream could not be opened because of an I/O, or because a + * file already exists at that path and the write mode indicates to not overwrite the file. + */ + public abstract OutputStream create(Path f, WriteMode overwriteMode) throws IOException; + + /** + * Renames the file/directory src to dst. + * + * @param src the file/directory to rename + * @param dst the new name of the file/directory + * @return true if the renaming was successful, false otherwise + * @throws IOException + */ + public abstract boolean rename(Path src, Path dst) throws IOException; + + /** + * Returns true if this is a distributed file system. A distributed file system here means that + * the file system is shared among all Flink processes that participate in a cluster or job and + * that all these processes can see the same files. + * + * @return True, if this is a distributed file system, false otherwise. + */ + public abstract boolean isDistributedFS(); +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataInputStream.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataInputStream.java new file mode 100644 index 000000000..64706ba8d --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataInputStream.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.core.fs; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.channels.FileChannel; + +/** + * The LocalDataInputStream class is a wrapper class for a data input stream to the + * local file system. + */ +public class LocalDataInputStream extends InputStream { + /** The file input stream used to read data from. */ + private final FileInputStream fis; + + private final FileChannel fileChannel; + + /** + * Constructs a new LocalDataInputStream object from a given {@link File} object. + * + * @param file The File the data stream is read from + * @throws IOException Thrown if the data input stream cannot be created. + */ + public LocalDataInputStream(File file) throws IOException { + this.fis = new FileInputStream(file); + this.fileChannel = fis.getChannel(); + } + + public void seek(long desired) throws IOException { + if (desired != getPos()) { + this.fileChannel.position(desired); + } + } + + public long getPos() throws IOException { + return this.fileChannel.position(); + } + + @Override + public int read() throws IOException { + return this.fis.read(); + } + + @Override + public int read(byte[] buffer, int offset, int length) throws IOException { + return this.fis.read(buffer, offset, length); + } + + @Override + public void close() throws IOException { + // According to javadoc, this also closes the channel + this.fis.close(); + } + + @Override + public int available() throws IOException { + return this.fis.available(); + } + + @Override + public long skip(final long n) throws IOException { + return this.fis.skip(n); + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataOutputStream.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataOutputStream.java new file mode 100644 index 000000000..aabfcaa98 --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalDataOutputStream.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.core.fs; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.channels.ClosedChannelException; + +/** + * The LocalDataOutputStream class is a wrapper class for a data output stream to the + * local file system. + */ +public class LocalDataOutputStream extends OutputStream { + /** The file output stream used to write data. */ + private final FileOutputStream fos; + + private boolean closed = false; + + /** + * Constructs a new LocalDataOutputStream object from a given {@link File} object. + * + * @param file the {@link File} object the data stream is read from + * @throws IOException thrown if the data output stream cannot be created + */ + public LocalDataOutputStream(final File file) throws IOException { + this.fos = new FileOutputStream(file); + } + + @Override + public void write(final int b) throws IOException { + checkOpen(); + fos.write(b); + } + + @Override + public void write(final byte[] b) throws IOException { + checkOpen(); + fos.write(b); + } + + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + checkOpen(); + fos.write(b, off, len); + } + + @Override + public void close() throws IOException { + closed = true; + fos.close(); + } + + @Override + public void flush() throws IOException { + checkOpen(); + fos.flush(); + } + + public void sync() throws IOException { + checkOpen(); + fos.getFD().sync(); + } + + public long getPos() throws IOException { + checkOpen(); + return fos.getChannel().position(); + } + + private void checkOpen() throws IOException { + if (closed) { + throw new ClosedChannelException(); + } + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileStatus.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileStatus.java new file mode 100644 index 000000000..b79f112ce --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileStatus.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.core.fs; + +import java.io.File; + +/** + * The class LocalFileStatus provides an implementation of the {@link FileStatus} + * interface for the local file system. + */ +public class LocalFileStatus implements FileStatus { + /** The file this file status belongs to. */ + private final File file; + + /** The path of this file this file status belongs to. */ + private final Path path; + + /** Cached length field, to avoid repeated native/syscalls. */ + private final long len; + + /** + * Creates a LocalFileStatus object from a given {@link File} object. + * + * @param f the {@link File} object this LocalFileStatus refers to + * @param fs the file system the corresponding file has been read from + */ + public LocalFileStatus(final File f, final FileSystem fs) { + this.file = f; + this.path = new Path(fs.getUri().getScheme() + ":" + f.toURI().getPath()); + this.len = f.length(); + } + + @Override + public long getAccessTime() { + return 0; // We don't have access files for local files + } + + @Override + public long getBlockSize() { + return this.len; + } + + @Override + public long getLen() { + return this.len; + } + + @Override + public long getModificationTime() { + return this.file.lastModified(); + } + + @Override + public short getReplication() { + return 1; // For local files replication is always 1 + } + + @Override + public boolean isDir() { + return this.file.isDirectory(); + } + + @Override + public Path getPath() { + return this.path; + } + + public File getFile() { + return this.file; + } + + @Override + public String toString() { + return "LocalFileStatus{" + + "file=" + file + ", path=" + path + '}'; + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileSystem.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileSystem.java new file mode 100644 index 000000000..863d689f3 --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/LocalFileSystem.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Parts of earlier versions of this file were based on source code from the + * Hadoop Project (http://hadoop.apache.org/), licensed by the Apache Software Foundation (ASF) + * under the Apache License, Version 2.0. See the NOTICE file distributed with this work for + * additional information regarding copyright ownership. + */ + +package org.apache.flink.core.fs; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.nio.file.AccessDeniedException; +import java.nio.file.DirectoryNotEmptyException; +import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.StandardCopyOption; + +/** + * The class {@code LocalFileSystem} is an implementation of the {@link FileSystem} interface for + * the local file system of the machine where the JVM runs. + */ +public class LocalFileSystem extends FileSystem { + /** The URI representing the local file system. */ + public static final URI LOCAL_URI = URI.create("file:///"); + + /** The shared instance of the local file system. */ + private static final LocalFileSystem INSTANCE = new LocalFileSystem(); + + /** + * Path pointing to the current working directory. Because Paths are not immutable, we cannot + * cache the proper path here + */ + private final URI workingDir; + + /** + * Path pointing to the current user home directory. Because Paths are not immutable, we cannot + * cache the proper path here. + */ + private final URI homeDir; + + /** Constructs a new LocalFileSystem object. */ + public LocalFileSystem() { + this.workingDir = new File(System.getProperty("user.dir")).toURI(); + this.homeDir = new File(System.getProperty("user.home")).toURI(); + } + + // ------------------------------------------------------------------------ + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + final File path = pathToFile(f); + if (path.exists()) { + return new LocalFileStatus(path, this); + } else { + throw new FileNotFoundException("File " + f + " does not exist or the user running " + + "Flink ('" + System.getProperty("user.name") + + "') has insufficient permissions to access it."); + } + } + + @Override + public URI getUri() { + return LOCAL_URI; + } + + @Override + public Path getWorkingDirectory() { + return new Path(workingDir); + } + + @Override + public Path getHomeDirectory() { + return new Path(homeDir); + } + + @Override + public InputStream open(final Path f, final int bufferSize) throws IOException { + return open(f); + } + + @Override + public InputStream open(final Path f) throws IOException { + final File file = pathToFile(f); + return new LocalDataInputStream(file); + } + + @Override + public boolean exists(Path f) throws IOException { + final File path = pathToFile(f); + return path.exists(); + } + + @Override + public FileStatus[] listStatus(final Path f) throws IOException { + final File localf = pathToFile(f); + FileStatus[] results; + + if (!localf.exists()) { + return null; + } + if (localf.isFile()) { + return new FileStatus[] {new LocalFileStatus(localf, this)}; + } + + final String[] names = localf.list(); + if (names == null) { + return null; + } + results = new FileStatus[names.length]; + for (int i = 0; i < names.length; i++) { + results[i] = getFileStatus(new Path(f, names[i])); + } + + return results; + } + + @Override + public boolean delete(final Path f, final boolean recursive) throws IOException { + final File file = pathToFile(f); + if (file.isFile()) { + return file.delete(); + } else if ((!recursive) && file.isDirectory()) { + File[] containedFiles = file.listFiles(); + if (containedFiles == null) { + throw new IOException( + "Directory " + file.toString() + " does not exist or an I/O error occurred"); + } else if (containedFiles.length != 0) { + throw new IOException("Directory " + file.toString() + " is not empty"); + } + } + + return delete(file); + } + + /** + * Deletes the given file or directory. + * + * @param f the file to be deleted + * @return true if all files were deleted successfully, false + * otherwise + * @throws IOException thrown if an error occurred while deleting the files/directories + */ + private boolean delete(final File f) throws IOException { + if (f.isDirectory()) { + final File[] files = f.listFiles(); + if (files != null) { + for (File file : files) { + final boolean del = delete(file); + if (!del) { + return false; + } + } + } + } else { + return f.delete(); + } + + // Now directory is empty + return f.delete(); + } + + /** + * Recursively creates the directory specified by the provided path. + * + * @return trueif the directories either already existed or have been created + * successfully, false otherwise + * @throws IOException thrown if an error occurred while creating the directory/directories + */ + @Override + public boolean mkdirs(final Path f) throws IOException { + assert f != null; + return mkdirsInternal(pathToFile(f)); + } + + private boolean mkdirsInternal(File file) throws IOException { + if (file.isDirectory()) { + return true; + } else if (file.exists() && !file.isDirectory()) { + // Important: The 'exists()' check above must come before the 'isDirectory()' check to + // be safe when multiple parallel instances try to create the directory + + // exists and is not a directory -> is a regular file + throw new FileAlreadyExistsException(file.getAbsolutePath()); + } else { + File parent = file.getParentFile(); + return (parent == null || mkdirsInternal(parent)) && (file.mkdir() || file.isDirectory()); + } + } + + @Override + public OutputStream create(final Path filePath, final WriteMode overwrite) throws IOException { + // checkNotNull(filePath, "filePath"); + + if (exists(filePath) && overwrite == WriteMode.NO_OVERWRITE) { + throw new FileAlreadyExistsException("File already exists: " + filePath); + } + + final Path parent = filePath.getParent(); + if (parent != null && !mkdirs(parent)) { + throw new IOException("Mkdirs failed to create " + parent); + } + + final File file = pathToFile(filePath); + return new LocalDataOutputStream(file); + } + + @Override + public boolean rename(final Path src, final Path dst) throws IOException { + final File srcFile = pathToFile(src); + final File dstFile = pathToFile(dst); + + final File dstParent = dstFile.getParentFile(); + + // Files.move fails if the destination directory doesn't exist + // noinspection ResultOfMethodCallIgnored -- we don't care if the directory existed or was + // created + dstParent.mkdirs(); + + try { + Files.move(srcFile.toPath(), dstFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + return true; + } catch (NoSuchFileException | AccessDeniedException | DirectoryNotEmptyException + | SecurityException ex) { + // catch the errors that are regular "move failed" exceptions and return false + return false; + } + } + + @Override + public boolean isDistributedFS() { + return false; + } + + // ------------------------------------------------------------------------ + + /** + * Converts the given Path to a File for this file system. If the path is empty, we will return + * new File(".") instead of new File(""), since the latter returns + * false for isDirectory judgement (See issue + * https://issues.apache.org/jira/browse/FLINK-18612). + */ + public File pathToFile(Path path) { + String localPath = path.getPath(); + // checkState(localPath != null, "Cannot convert a null path to File"); + + if (localPath.length() == 0) { + return new File("."); + } + + return new File(localPath); + } + + // ------------------------------------------------------------------------ + + /** + * Gets the URI that represents the local file system. That URI is {@code "file:/"} on Windows + * platforms and {@code "file:///"} on other UNIX family platforms. + * + * @return The URI that represents the local file system. + */ + public static URI getLocalFsURI() { + return LOCAL_URI; + } + + /** + * Gets the shared instance of this file system. + * + * @return The shared instance of this file system. + */ + public static LocalFileSystem getSharedInstance() { + return INSTANCE; + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/core/fs/Path.java b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/Path.java new file mode 100644 index 000000000..1d06ae4be --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/core/fs/Path.java @@ -0,0 +1,469 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file is based on source code from the Hadoop Project (http://hadoop.apache.org/), licensed + * by the Apache Software Foundation (ASF) under the Apache License, Version 2.0. See the NOTICE + * file distributed with this work for additional information regarding copyright ownership. */ + +package org.apache.flink.core.fs; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.regex.Pattern; + +/** + * Names a file or directory in a {@link FileSystem}. Path strings use slash as the directory + * separator. A path string is absolute if it begins with a slash. + * + *

    Tailing slashes are removed from the path. + * + *

    Note: Path will no longer implement {@link IOReadableWritable} in future versions. Please use + * {@code serializeToDataOutputView} and {@code deserializeFromDataInputView} instead. + */ +public class Path implements Serializable { + private static final long serialVersionUID = 1L; + + /** The directory separator, a slash. */ + public static final String SEPARATOR = "/"; + + /** The directory separator, a slash (character). */ + public static final char SEPARATOR_CHAR = '/'; + + /** Character denoting the current directory. */ + public static final String CUR_DIR = "."; + + /** A pre-compiled regex/state-machine to match the windows drive pattern. */ + private static final Pattern WINDOWS_ROOT_DIR_REGEX = Pattern.compile("/\\p{Alpha}+:/"); + + /** The internal representation of the path, a hierarchical URI. */ + private URI uri; + + /** Constructs a new (empty) path object (used to reconstruct path object after RPC call). */ + public Path() {} + + /** + * Constructs a path object from a given URI. + * + * @param uri the URI to construct the path object from + */ + public Path(URI uri) { + this.uri = uri; + } + + /** + * Resolve a child path against a parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(String parent, String child) { + this(new Path(parent), new Path(child)); + } + + /** + * Resolve a child path against a parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(Path parent, String child) { + this(parent, new Path(child)); + } + + /** + * Resolve a child path against a parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(String parent, Path child) { + this(new Path(parent), child); + } + + /** + * Resolve a child path against a parent path. + * + * @param parent the parent path + * @param child the child path + */ + public Path(Path parent, Path child) { + // Add a slash to parent's path so resolution is compatible with URI's + URI parentUri = parent.uri; + final String parentPath = parentUri.getPath(); + if (!(parentPath.equals("/") || parentPath.equals(""))) { + try { + parentUri = new URI( + parentUri.getScheme(), parentUri.getAuthority(), parentUri.getPath() + "/", null, null); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + if (child.uri.getPath().startsWith(Path.SEPARATOR)) { + child = new Path( + child.uri.getScheme(), child.uri.getAuthority(), child.uri.getPath().substring(1)); + } + + final URI resolved = parentUri.resolve(child.uri); + initialize(resolved.getScheme(), resolved.getAuthority(), resolved.getPath()); + } + + /** + * Checks if the provided path string is either null or has zero length and throws a {@link + * IllegalArgumentException} if any of the two conditions apply. + * + * @param path the path string to be checked + * @return The checked path. + */ + private String checkPathArg(String path) { + // disallow construction of a Path from an empty string + if (path == null) { + throw new IllegalArgumentException("Can not create a Path from a null string"); + } + if (path.length() == 0) { + throw new IllegalArgumentException("Can not create a Path from an empty string"); + } + return path; + } + + /** + * Construct a path from a String. Path strings are URIs, but with unescaped elements and some + * additional normalization. + * + * @param pathString the string to construct a path from + */ + public Path(String pathString) { + pathString = checkPathArg(pathString); + + // We can't use 'new URI(String)' directly, since it assumes things are + // escaped, which we don't require of Paths. + + // add a slash in front of paths with Windows drive letters + if (hasWindowsDrive(pathString, false)) { + pathString = "/" + pathString; + } + + // parse uri components + String scheme = null; + String authority = null; + + int start = 0; + + // parse uri scheme, if any + final int colon = pathString.indexOf(':'); + final int slash = pathString.indexOf('/'); + if ((colon != -1) && ((slash == -1) || (colon < slash))) { // has a + // scheme + scheme = pathString.substring(0, colon); + start = colon + 1; + } + + // parse uri authority, if any + if (pathString.startsWith("//", start) && (pathString.length() - start > 2)) { // has authority + final int nextSlash = pathString.indexOf('/', start + 2); + final int authEnd = nextSlash > 0 ? nextSlash : pathString.length(); + authority = pathString.substring(start + 2, authEnd); + start = authEnd; + } + + // uri path is the rest of the string -- query & fragment not supported + final String path = pathString.substring(start, pathString.length()); + + initialize(scheme, authority, path); + } + + /** + * Construct a Path from a scheme, an authority and a path string. + * + * @param scheme the scheme string + * @param authority the authority string + * @param path the path string + */ + public Path(String scheme, String authority, String path) { + path = checkPathArg(path); + initialize(scheme, authority, path); + } + + /** + * Initializes a path object given the scheme, authority and path string. + * + * @param scheme the scheme string. + * @param authority the authority string. + * @param path the path string. + */ + private void initialize(String scheme, String authority, String path) { + try { + this.uri = new URI(scheme, authority, normalizePath(path), null, null).normalize(); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Normalizes a path string. + * + * @param path the path string to normalize + * @return the normalized path string + */ + private String normalizePath(String path) { + // remove consecutive slashes & backslashes + path = path.replace("\\", "/"); + path = path.replaceAll("/+", "/"); + + // remove tailing separator + if (path.endsWith(SEPARATOR) && !path.equals(SEPARATOR) && // UNIX root path + !WINDOWS_ROOT_DIR_REGEX.matcher(path).matches()) { // Windows root path) + + // remove tailing slash + path = path.substring(0, path.length() - SEPARATOR.length()); + } + + return path; + } + + /** + * Converts the path object to a {@link URI}. + * + * @return the {@link URI} object converted from the path object + */ + public URI toUri() { + return uri; + } + + /** + * Returns the FileSystem that owns this Path. + * + * @return the FileSystem that owns this Path + * @throws IOException thrown if the file system could not be retrieved + */ + public FileSystem getFileSystem() throws IOException { + return FileSystem.get(this.toUri()); + } + + /** + * Checks if the directory of this path is absolute. + * + * @return true if the directory of this path is absolute, false + * otherwise + */ + public boolean isAbsolute() { + final int start = hasWindowsDrive(uri.getPath(), true) ? 3 : 0; + return uri.getPath().startsWith(SEPARATOR, start); + } + + /** + * Returns the final component of this path, i.e., everything that follows the last separator. + * + * @return the final component of the path + */ + public String getName() { + final String path = uri.getPath(); + final int slash = path.lastIndexOf(SEPARATOR); + return path.substring(slash + 1); + } + + /** + * Return full path. + * + * @return full path + */ + public String getPath() { + return uri.getPath(); + } + + /** + * Returns the parent of a path, i.e., everything that precedes the last separator or null + * if at root. + * + * @return the parent of a path or null if at root. + */ + public Path getParent() { + final String path = uri.getPath(); + final int lastSlash = path.lastIndexOf('/'); + final int start = hasWindowsDrive(path, true) ? 3 : 0; + if ((path.length() == start) || // empty path + (lastSlash == start && path.length() == start + 1)) { // at root + return null; + } + String parent; + if (lastSlash == -1) { + parent = CUR_DIR; + } else { + final int end = hasWindowsDrive(path, true) ? 3 : 0; + parent = path.substring(0, lastSlash == end ? end + 1 : lastSlash); + } + return new Path(uri.getScheme(), uri.getAuthority(), parent); + } + + /** + * Adds a suffix to the final name in the path. + * + * @param suffix The suffix to be added + * @return the new path including the suffix + */ + public Path suffix(String suffix) { + return new Path(getParent(), getName() + suffix); + } + + @Override + public String toString() { + // we can't use uri.toString(), which escapes everything, because we want + // illegal characters unescaped in the string, for glob processing, etc. + final StringBuilder buffer = new StringBuilder(); + if (uri.getScheme() != null) { + buffer.append(uri.getScheme()); + buffer.append(":"); + } + if (uri.getAuthority() != null) { + buffer.append("//"); + buffer.append(uri.getAuthority()); + } + if (uri.getPath() != null) { + String path = uri.getPath(); + if (path.indexOf('/') == 0 && hasWindowsDrive(path, true) && // has windows drive + uri.getScheme() == null && // but no scheme + uri.getAuthority() == null) { // or authority + path = path.substring(1); // remove slash before drive + } + buffer.append(path); + } + return buffer.toString(); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Path)) { + return false; + } + Path that = (Path) o; + return this.uri.equals(that.uri); + } + + @Override + public int hashCode() { + return uri.hashCode(); + } + + public int compareTo(Object o) { + Path that = (Path) o; + return this.uri.compareTo(that.uri); + } + + /** + * Returns the number of elements in this path. + * + * @return the number of elements in this path + */ + public int depth() { + String path = uri.getPath(); + int depth = 0; + int slash = path.length() == 1 && path.charAt(0) == '/' ? -1 : 0; + while (slash != -1) { + depth++; + slash = path.indexOf(SEPARATOR, slash + 1); + } + return depth; + } + + /** + * Returns a qualified path object. + * + * @param fs the FileSystem that should be used to obtain the current working directory + * @return the qualified path object + */ + public Path makeQualified(FileSystem fs) { + Path path = this; + if (!isAbsolute()) { + path = new Path(fs.getWorkingDirectory(), this); + } + + final URI pathUri = path.toUri(); + final URI fsUri = fs.getUri(); + + String scheme = pathUri.getScheme(); + String authority = pathUri.getAuthority(); + + if (scheme != null && (authority != null || fsUri.getAuthority() == null)) { + return path; + } + + if (scheme == null) { + scheme = fsUri.getScheme(); + } + + if (authority == null) { + authority = fsUri.getAuthority(); + if (authority == null) { + authority = ""; + } + } + + return new Path(scheme + ":" + + "//" + authority + pathUri.getPath()); + } + + // ------------------------------------------------------------------------ + // Utilities + // ------------------------------------------------------------------------ + + /** + * Checks if the provided path string contains a windows drive letter. + * + * @return True, if the path string contains a windows drive letter, false otherwise. + */ + public boolean hasWindowsDrive() { + return hasWindowsDrive(uri.getPath(), true); + } + + /** + * Checks if the provided path string contains a windows drive letter. + * + * @param path the path to check + * @param slashed true to indicate the first character of the string is a slash, false otherwise + * @return true if the path string contains a windows drive letter, false otherwise + */ + private boolean hasWindowsDrive(String path, boolean slashed) { + final int start = slashed ? 1 : 0; + return path.length() >= start + 2 && (!slashed || path.charAt(0) == '/') + && path.charAt(start + 1) == ':' + && ((path.charAt(start) >= 'A' && path.charAt(start) <= 'Z') + || (path.charAt(start) >= 'a' && path.charAt(start) <= 'z')); + } + + // ------------------------------------------------------------------------ + // Utilities + // ------------------------------------------------------------------------ + + /** + * Creates a path for the given local file. + * + *

    This method is useful to make sure the path creation for local files works seamlessly + * across different operating systems. Especially Windows has slightly different rules for + * slashes between schema and a local file path, making it sometimes tricky to produce + * cross-platform URIs for local files. + * + * @param file The file that the path should represent. + * @return A path representing the local file URI of the given file. + */ + public static Path fromLocalFile(File file) { + return new Path(file.toURI()); + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferReadableFSDataInputStream.java b/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferReadableFSDataInputStream.java new file mode 100644 index 000000000..b38a518bc --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferReadableFSDataInputStream.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.state.forst.fs; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import org.apache.flink.core.fs.LocalDataInputStream; +import org.apache.flink.core.fs.Path; + +/** + * ByteBufferReadableFSDataInputStream. + */ +public class ByteBufferReadableFSDataInputStream extends InputStream { + private final LocalDataInputStream localDataInputStream; + private final Path path; + private final long totalFileSize; + + public ByteBufferReadableFSDataInputStream( + Path path, InputStream inputStream, long totalFileSize) { + if (!(inputStream instanceof LocalDataInputStream)) { + throw new UnsupportedOperationException("Unsupported input stream type"); + } + this.localDataInputStream = (LocalDataInputStream) inputStream; + this.path = path; + this.totalFileSize = totalFileSize; + } + + public void seek(long desired) throws IOException { + localDataInputStream.seek(desired); + } + + public long getPos() throws IOException { + return localDataInputStream.getPos(); + } + + @Override + public int read() throws IOException { + return localDataInputStream.read(); + } + + @Override + public int read(byte[] b) throws IOException { + return localDataInputStream.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return localDataInputStream.read(b, off, len); + } + + /** + * Return the total number of bytes read into the buffer. + * REQUIRES: External synchronization + */ + public int readFully(ByteBuffer bb) throws IOException { + return readFullyFromFSDataInputStream(localDataInputStream, bb); + } + + private int readFullyFromFSDataInputStream(LocalDataInputStream fsdis, ByteBuffer bb) + throws IOException { + byte[] tmp = new byte[bb.remaining()]; + int n = 0; + long pos = fsdis.getPos(); + while (n < tmp.length) { + int read = fsdis.read(tmp, n, tmp.length - n); + if (read == -1) { + break; + } + n += read; + } + if (n > 0) { + bb.put(tmp, 0, n); + } + return n; + } + + /** + * Return the total number of bytes read into the buffer. + * Safe for concurrent use by multiple threads. + */ + public int readFully(long position, ByteBuffer bb) throws IOException { + localDataInputStream.seek(position); + return readFullyFromFSDataInputStream(localDataInputStream, bb); + } + + @Override + public long skip(long n) throws IOException { + seek(getPos() + n); + return getPos(); + } + + @Override + public int available() throws IOException { + return localDataInputStream.available(); + } + + @Override + public void close() throws IOException { + localDataInputStream.close(); + } + + @Override + public synchronized void mark(int readlimit) { + localDataInputStream.mark(readlimit); + } + + @Override + public synchronized void reset() throws IOException { + localDataInputStream.reset(); + } + + @Override + public boolean markSupported() { + return localDataInputStream.markSupported(); + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferWritableFSDataOutputStream.java b/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferWritableFSDataOutputStream.java new file mode 100644 index 000000000..9c59fda3b --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ByteBufferWritableFSDataOutputStream.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.state.forst.fs; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import org.apache.flink.core.fs.LocalDataOutputStream; +import org.apache.flink.core.fs.Path; + +/** + * ByteBufferWritableFSDataOutputStream. + */ +public class ByteBufferWritableFSDataOutputStream extends OutputStream { + private final Path path; + private final LocalDataOutputStream localDataOutputStream; + + public ByteBufferWritableFSDataOutputStream(Path path, OutputStream fsdos) { + if (!(fsdos instanceof LocalDataOutputStream)) { + throw new UnsupportedOperationException("Unsupported output stream type"); + } + this.path = path; + this.localDataOutputStream = (LocalDataOutputStream) fsdos; + } + + public long getPos() throws IOException { + return localDataOutputStream.getPos(); + } + + @Override + public void write(int b) throws IOException { + localDataOutputStream.write(b); + } + + public void write(byte[] b) throws IOException { + localDataOutputStream.write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + localDataOutputStream.write(b, off, len); + } + + public void write(ByteBuffer bb) throws IOException { + if (bb.hasArray()) { + write(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()); + } else { + byte[] tmp = new byte[bb.remaining()]; + bb.get(tmp); + write(tmp, 0, tmp.length); + } + } + + @Override + public void flush() throws IOException { + localDataOutputStream.flush(); + } + + public void sync() throws IOException { + localDataOutputStream.sync(); + } + + @Override + public void close() throws IOException { + localDataOutputStream.close(); + } +} diff --git a/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ForStFlinkFileSystem.java b/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ForStFlinkFileSystem.java new file mode 100644 index 000000000..afb32d754 --- /dev/null +++ b/java/flinktestmock/src/main/java/org/apache/flink/state/forst/fs/ForStFlinkFileSystem.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.state.forst.fs; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import org.apache.flink.core.fs.FileStatus; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; + +/** + * RemoteRocksdbFlinkFileSystem, used to expose flink fileSystem interface to frocksdb. + */ +public class ForStFlinkFileSystem extends FileSystem { + private final FileSystem flinkFS; + + public ForStFlinkFileSystem(FileSystem flinkFS) { + this.flinkFS = flinkFS; + } + + public static FileSystem get(URI uri) throws IOException { + return new ForStFlinkFileSystem(FileSystem.get(uri)); + } + + @Override + public Path getWorkingDirectory() { + return flinkFS.getWorkingDirectory(); + } + + @Override + public Path getHomeDirectory() { + return flinkFS.getHomeDirectory(); + } + + @Override + public URI getUri() { + return flinkFS.getUri(); + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + return flinkFS.getFileStatus(f); + } + + @Override + public ByteBufferReadableFSDataInputStream open(Path f, int bufferSize) throws IOException { + InputStream original = flinkFS.open(f, bufferSize); + long fileSize = flinkFS.getFileStatus(f).getLen(); + return new ByteBufferReadableFSDataInputStream(f, original, fileSize); + } + + @Override + public ByteBufferReadableFSDataInputStream open(Path f) throws IOException { + InputStream original = flinkFS.open(f); + long fileSize = flinkFS.getFileStatus(f).getLen(); + return new ByteBufferReadableFSDataInputStream(f, original, fileSize); + } + + @Override + public FileStatus[] listStatus(Path f) throws IOException { + return flinkFS.listStatus(f); + } + + @Override + public boolean exists(final Path f) throws IOException { + return flinkFS.exists(f); + } + + @Override + public boolean delete(Path f, boolean recursive) throws IOException { + return flinkFS.delete(f, recursive); + } + + @Override + public boolean mkdirs(Path f) throws IOException { + return flinkFS.mkdirs(f); + } + + public ByteBufferWritableFSDataOutputStream create(Path f) throws IOException { + return create(f, WriteMode.OVERWRITE); + } + + @Override + public ByteBufferWritableFSDataOutputStream create(Path f, WriteMode overwriteMode) + throws IOException { + OutputStream original = flinkFS.create(f, overwriteMode); + return new ByteBufferWritableFSDataOutputStream(f, original); + } + + @Override + public boolean rename(Path src, Path dst) throws IOException { + // The rename is not atomic for RocksDB. Some FileSystems e.g. HDFS, OSS does not allow a + // renaming if the target already exists. So, we delete the target before attempting the + // rename. + if (flinkFS.exists(dst)) { + boolean deleted = flinkFS.delete(dst, false); + if (!deleted) { + throw new IOException("Fail to delete dst path: " + dst); + } + } + return flinkFS.rename(src, dst); + } + + @Override + public boolean isDistributedFS() { + return flinkFS.isDistributedFS(); + } +} diff --git a/java/rocksjni/env_flink_test_suite.cc b/java/rocksjni/env_flink_test_suite.cc new file mode 100644 index 000000000..5e66ca746 --- /dev/null +++ b/java/rocksjni/env_flink_test_suite.cc @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "env/flink/env_flink_test_suite.h" + +#include + +#include "include/org_rocksdb_EnvFlinkTestSuite.h" +#include "java/rocksjni/portal.h" + +/* + * Class: org_rocksdb_EnvFlinkTestSuite + * Method: buildNativeObject + * Signature: (Ljava/lang/String;)J + */ +jlong Java_org_rocksdb_EnvFlinkTestSuite_buildNativeObject(JNIEnv* env, jobject, + jstring basePath) { + jboolean has_exception = JNI_FALSE; + auto path = + ROCKSDB_NAMESPACE::JniUtil::copyStdString(env, basePath, &has_exception); + if (has_exception == JNI_TRUE) { + ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( + env, "Could not copy jstring to std::string"); + return 0; + } + auto env_flink_test_suites = new ROCKSDB_NAMESPACE::EnvFlinkTestSuites(path); + return reinterpret_cast(env_flink_test_suites); +} + +/* + * Class: org_rocksdb_EnvFlinkTestSuite + * Method: runAllTestSuites + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_rocksdb_EnvFlinkTestSuite_runAllTestSuites( + JNIEnv* jniEnv, jobject, jlong objectHandle) { + auto env_flink_test_suites = + reinterpret_cast(objectHandle); + env_flink_test_suites->runAllTestSuites(); + if (jniEnv->ExceptionCheck()) { + jthrowable throwable = jniEnv->ExceptionOccurred(); + jniEnv->ExceptionDescribe(); + jniEnv->ExceptionClear(); + jniEnv->Throw(throwable); + } +} + +/* + * Class: org_rocksdb_EnvFlinkTestSuite + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_rocksdb_EnvFlinkTestSuite_disposeInternal( + JNIEnv*, jobject, jlong objectHandle) { + auto test_suites = + reinterpret_cast(objectHandle); + delete test_suites; +} \ No newline at end of file diff --git a/java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java b/java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java new file mode 100644 index 000000000..92e503509 --- /dev/null +++ b/java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.rocksdb; + +/** + * The test suite used for flink-env interfaces testing. You could define and implement test + * procedures in the "env/flink/env_flink_test_suite.h" and "env/flink/env_flink_test_suite.cc", and + * these tests will be executed by EnvFlinkTestSuite#runAllTestSuites. + */ +public class EnvFlinkTestSuite implements AutoCloseable { + private final String basePath; + + private final long nativeObjectHandle; + + public EnvFlinkTestSuite(String basePath) { + this.basePath = basePath; + this.nativeObjectHandle = buildNativeObject(basePath); + } + + private native long buildNativeObject(String basePath); + + private native void runAllTestSuites(long nativeObjectHandle); + + private native void disposeInternal(long nativeObjectHandle); + + public void runAllTestSuites() { + runAllTestSuites(nativeObjectHandle); + } + + @Override + public void close() throws Exception { + disposeInternal(nativeObjectHandle); + } +} \ No newline at end of file diff --git a/java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java b/java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java new file mode 100644 index 000000000..5c7166557 --- /dev/null +++ b/java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.rocksdb.flink; + +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.rocksdb.EnvFlinkTestSuite; +import org.rocksdb.RocksNativeLibraryResource; + +/** + * Unit test for env/flink/env_flink.cc. + */ +public class FlinkEnvTest { + @ClassRule + public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE = + new RocksNativeLibraryResource(); + + @Rule public TemporaryFolder parentFolder = new TemporaryFolder(); + + @Test + public void runEnvFlinkTestSuites() throws Exception { + String basePath = parentFolder.newFolder().toURI().toString(); + try (EnvFlinkTestSuite testSuite = new EnvFlinkTestSuite(basePath)) { + testSuite.runAllTestSuites(); + } + } +} \ No newline at end of file diff --git a/src.mk b/src.mk index c58fc70fe..e168fcd3e 100644 --- a/src.mk +++ b/src.mk @@ -116,6 +116,7 @@ LIB_SOURCES = \ env/flink/env_flink.cc \ env/flink/jvm_util.cc \ env/flink/jni_helper.cc \ + env/flink/env_flink_test_suite.cc \ file/delete_scheduler.cc \ file/file_prefetch_buffer.cc \ file/file_util.cc \ @@ -671,6 +672,7 @@ JNI_NATIVE_SOURCES = \ java/rocksjni/export_import_files_metadatajni.cc \ java/rocksjni/env.cc \ java/rocksjni/env_flink.cc \ + java/rocksjni/env_flink_test_suite.cc \ java/rocksjni/env_options.cc \ java/rocksjni/event_listener.cc \ java/rocksjni/event_listener_jnicallback.cc \ From ca371b1f9defdd6c1b77b9906db26462becb56e3 Mon Sep 17 00:00:00 2001 From: Hangxiang Yu Date: Mon, 1 Apr 2024 16:27:27 +0800 Subject: [PATCH 376/386] [env] Add test cases in flink-env test suite (cherry picked from commit 729cf5c764c5c40a3990912c2860f9319f8c354a) --- env/flink/env_flink.cc | 19 +++-- env/flink/env_flink_test_suite.cc | 127 +++++++++++++++++++++++++++--- env/flink/env_flink_test_suite.h | 7 +- 3 files changed, 137 insertions(+), 16 deletions(-) diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc index b963fe508..26deba9e7 100644 --- a/env/flink/env_flink.cc +++ b/env/flink/env_flink.cc @@ -66,7 +66,7 @@ class FlinkWritableFile : public FSWritableFile { jobject fsDataOutputStream = jniEnv->CallObjectMethod( file_system_instance_, fileSystemCreateMethod.javaMethod, pathInstance); jniEnv->DeleteLocalRef(pathInstance); - if (fsDataOutputStream == nullptr) { + if (fsDataOutputStream == nullptr || jniEnv->ExceptionCheck()) { return CheckThenError( std::string( "CallObjectMethod Exception when Init FlinkWritableFile, ") @@ -193,7 +193,7 @@ class FlinkReadableFile : virtual public FSSequentialFile, jobject fsDataInputStream = jniEnv->CallObjectMethod( file_system_instance_, openMethod.javaMethod, pathInstance); jniEnv->DeleteLocalRef(pathInstance); - if (fsDataInputStream == nullptr) { + if (fsDataInputStream == nullptr || jniEnv->ExceptionCheck()) { return CheckThenError( std::string( "CallObjectMethod Exception when Init FlinkReadableFile, ") @@ -355,7 +355,7 @@ Status FlinkFileSystem::Init() { jobject fileSystemInstance = jniEnv->CallStaticObjectMethod( fileSystemClass.javaClass, fileSystemGetMethod.javaMethod, uriInstance); jniEnv->DeleteLocalRef(uriInstance); - if (fileSystemInstance == nullptr) { + if (fileSystemInstance == nullptr || jniEnv->ExceptionCheck()) { return CheckThenError( std::string( "CallStaticObjectMethod Exception when Init FlinkFileSystem, ") @@ -504,7 +504,7 @@ IOStatus FlinkFileSystem::GetChildren(const std::string& file_name, auto fileStatusArray = (jobjectArray)jniEnv->CallObjectMethod( file_system_instance_, listStatusMethod.javaMethod, pathInstance); jniEnv->DeleteLocalRef(pathInstance); - if (fileStatusArray == nullptr) { + if (fileStatusArray == nullptr || jniEnv->ExceptionCheck()) { return CheckThenError( std::string("Exception when CallObjectMethod in GetChildren, ") .append(listStatusMethod.ToString()) @@ -516,7 +516,7 @@ IOStatus FlinkFileSystem::GetChildren(const std::string& file_name, jsize fileStatusArrayLen = jniEnv->GetArrayLength(fileStatusArray); for (jsize i = 0; i < fileStatusArrayLen; i++) { jobject fileStatusObj = jniEnv->GetObjectArrayElement(fileStatusArray, i); - if (fileStatusObj == nullptr) { + if (fileStatusObj == nullptr || jniEnv->ExceptionCheck()) { jniEnv->DeleteLocalRef(fileStatusArray); return CheckThenError( "Exception when GetObjectArrayElement in GetChildren"); @@ -527,7 +527,7 @@ IOStatus FlinkFileSystem::GetChildren(const std::string& file_name, jobject subPath = jniEnv->CallObjectMethod(fileStatusObj, getPathMethod.javaMethod); jniEnv->DeleteLocalRef(fileStatusObj); - if (subPath == nullptr) { + if (subPath == nullptr || jniEnv->ExceptionCheck()) { jniEnv->DeleteLocalRef(fileStatusArray); return CheckThenError( std::string("Exception when CallObjectMethod in GetChildren, ") @@ -539,6 +539,13 @@ IOStatus FlinkFileSystem::GetChildren(const std::string& file_name, auto subPathStr = (jstring)jniEnv->CallObjectMethod( subPath, pathToStringMethod.javaMethod); jniEnv->DeleteLocalRef(subPath); + if (subPathStr == nullptr || jniEnv->ExceptionCheck()) { + jniEnv->DeleteLocalRef(fileStatusArray); + return CheckThenError( + std::string("Exception when CallObjectMethod in GetChildren, ") + .append(pathToStringMethod.ToString())); + } + const char* str = jniEnv->GetStringUTFChars(subPathStr, nullptr); result->emplace_back(str); jniEnv->ReleaseStringUTFChars(subPathStr, str); diff --git a/env/flink/env_flink_test_suite.cc b/env/flink/env_flink_test_suite.cc index 2b1a312ab..4db7f6968 100644 --- a/env/flink/env_flink_test_suite.cc +++ b/env/flink/env_flink_test_suite.cc @@ -18,6 +18,7 @@ #include "env/flink/env_flink_test_suite.h" +#include #include #include @@ -28,6 +29,10 @@ std::abort(); \ } +#define ASSERT_FALSE(condition) ASSERT_TRUE(!(condition)) + +#define LOG(message) (std::cout << (message) << std::endl) + namespace ROCKSDB_NAMESPACE { EnvFlinkTestSuites::EnvFlinkTestSuites(const std::string& basePath) @@ -35,7 +40,15 @@ EnvFlinkTestSuites::EnvFlinkTestSuites(const std::string& basePath) void EnvFlinkTestSuites::runAllTestSuites() { setUp(); - testFileExist(); + LOG("Stage 1: setUp OK"); + testDirOperation(); + LOG("Stage 2: testDirOperation OK"); + testFileOperation(); + LOG("Stage 3: testFileOperation OK"); + testGetChildren(); + LOG("Stage 4: testGetChildren OK"); + testFileReadAndWrite(); + LOG("Stage 5: testFileReadAndWrite OK"); } void EnvFlinkTestSuites::setUp() { @@ -45,11 +58,110 @@ void EnvFlinkTestSuites::setUp() { } } -void EnvFlinkTestSuites::testFileExist() { - std::string fileName("test-file"); - Status result = flink_env_->FileExists(fileName); - ASSERT_TRUE(result.IsNotFound()); +void EnvFlinkTestSuites::testDirOperation() { + const std::string dir_name = "test-dir"; + ASSERT_TRUE(flink_env_->FileExists(dir_name).IsNotFound()); + ASSERT_TRUE(flink_env_->CreateDir(dir_name).ok()); + ASSERT_TRUE(flink_env_->CreateDirIfMissing(dir_name).ok()); + ASSERT_FALSE(flink_env_->CreateDir(dir_name).ok()); + + bool is_dir; + ASSERT_TRUE(flink_env_->IsDirectory(dir_name, &is_dir).ok() && is_dir); + ASSERT_TRUE(flink_env_->FileExists(dir_name).ok()); + ASSERT_TRUE(flink_env_->DeleteDir(dir_name).ok()); + ASSERT_TRUE(flink_env_->FileExists(dir_name).IsNotFound()); +} + +void EnvFlinkTestSuites::testFileOperation() { + const std::string file_name = "test-file"; + const std::string not_exist_file_name = "not-exist-file"; + + // test file exists + ASSERT_TRUE(flink_env_->FileExists(file_name).IsNotFound()); + generateFile(file_name); + ASSERT_TRUE(flink_env_->FileExists(file_name).ok()); + + // test file status + uint64_t file_size, file_mtime; + ASSERT_TRUE(flink_env_->GetFileSize(file_name, &file_size).ok()); + ASSERT_FALSE(flink_env_->GetFileSize(not_exist_file_name, &file_size).ok()); + ASSERT_TRUE(file_size > 0); + ASSERT_TRUE(flink_env_->GetFileModificationTime(file_name, &file_mtime).ok()); + ASSERT_FALSE( + flink_env_->GetFileModificationTime(not_exist_file_name, &file_mtime) + .ok()); + ASSERT_TRUE(file_mtime > 0); + + // test renaming file + const std::string file_name_2 = "test-file-2"; + flink_env_->RenameFile(file_name, file_name_2); + ASSERT_TRUE(flink_env_->FileExists(file_name).IsNotFound()); + ASSERT_TRUE(flink_env_->FileExists(file_name_2).ok()); + ASSERT_TRUE(flink_env_->DeleteFile(file_name_2).ok()); + ASSERT_TRUE(flink_env_->FileExists(file_name_2).IsNotFound()); +} + +void EnvFlinkTestSuites::testGetChildren() { + const std::string dir_name = "test-dir"; + const std::string sub_dir_name = dir_name + "/test-sub-dir"; + const std::string file_name_1 = dir_name + "/test-file-1"; + const std::string file_name_2 = dir_name + "/test-file-2"; + ASSERT_TRUE(flink_env_->CreateDirIfMissing(dir_name).ok()); + ASSERT_TRUE(flink_env_->CreateDirIfMissing(sub_dir_name).ok()); + generateFile(file_name_1); + generateFile(file_name_2); + std::vector result, + expected{base_path_ + sub_dir_name, base_path_ + file_name_1, + base_path_ + file_name_2}; + ASSERT_TRUE(flink_env_->GetChildren(dir_name, &result).ok()); + ASSERT_TRUE(result.size() == 3); + std::sort(result.begin(), result.end()); + std::sort(expected.begin(), expected.end()); + ASSERT_TRUE(expected == result); +} + +void EnvFlinkTestSuites::testFileReadAndWrite() { + const std::string file_name = "test-file"; + const std::string content1 = "Hello World", content2 = ", Hello ForSt", + content = content1 + content2; + + std::unique_ptr write_result; + ASSERT_TRUE( + flink_env_->NewWritableFile(file_name, &write_result, EnvOptions()).ok()); + write_result->Append(content1); + write_result->Append(content2); + write_result->Sync(); + write_result->Flush(); + write_result->Close(); + + std::unique_ptr sequential_result; + ASSERT_TRUE( + flink_env_->NewSequentialFile(file_name, &sequential_result, EnvOptions()) + .ok()); + + Slice sequential_data; + char* sequential_scratch = new char[content2.size()]; + sequential_result->Skip(content1.size()); + sequential_result->Read(content2.size(), &sequential_data, + sequential_scratch); + ASSERT_TRUE(sequential_data.data() == content2); + delete[] sequential_scratch; + + std::unique_ptr random_access_result; + ASSERT_TRUE( + flink_env_ + ->NewRandomAccessFile(file_name, &random_access_result, EnvOptions()) + .ok()); + Slice random_access_data; + char* random_access_scratch = new char[content2.size()]; + random_access_result->Read(content1.size(), content.size() - content1.size(), + &random_access_data, (char*)random_access_scratch); + ASSERT_TRUE(random_access_data.data() == content2); + delete[] random_access_scratch; +} + +void EnvFlinkTestSuites::generateFile(const std::string& fileName) { // Generate a file manually const std::string prefix = "file:"; std::string writeFileName = base_path_ + fileName; @@ -57,10 +169,7 @@ void EnvFlinkTestSuites::testFileExist() { writeFileName = writeFileName.substr(prefix.size()); } std::ofstream writeFile(writeFileName); - writeFile << "testFileExist"; + writeFile << "Hello World"; writeFile.close(); - - result = flink_env_->FileExists(fileName); - ASSERT_TRUE(result.ok()); } } // namespace ROCKSDB_NAMESPACE \ No newline at end of file diff --git a/env/flink/env_flink_test_suite.h b/env/flink/env_flink_test_suite.h index 3826060d5..c7512b031 100644 --- a/env/flink/env_flink_test_suite.h +++ b/env/flink/env_flink_test_suite.h @@ -29,6 +29,11 @@ class EnvFlinkTestSuites { std::unique_ptr flink_env_; const std::string base_path_; void setUp(); - void testFileExist(); + void testDirOperation(); + void testFileOperation(); + void testGetChildren(); + void testFileReadAndWrite(); + + void generateFile(const std::string& fileName); }; } // namespace ROCKSDB_NAMESPACE \ No newline at end of file From abe27da9bfdbdd2d3c4375cdbdd8be4d15950ee8 Mon Sep 17 00:00:00 2001 From: Hangxiang Yu Date: Mon, 1 Apr 2024 18:54:46 +0800 Subject: [PATCH 377/386] [build] Fix warning about unused parameters (cherry picked from commit 9c23507040c0efed8324ffc4dfbf0763d3884ae4) --- env/flink/env_flink.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/env/flink/env_flink.cc b/env/flink/env_flink.cc index 26deba9e7..eae1773cf 100644 --- a/env/flink/env_flink.cc +++ b/env/flink/env_flink.cc @@ -379,7 +379,7 @@ IOStatus FlinkFileSystem::NewSequentialFile( const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) { result->reset(); - IOStatus status = FileExists(fname, IOOptions(), dbg); + IOStatus status = FileExists(fname, options.io_options, dbg); if (!status.ok()) { return status; } @@ -400,7 +400,7 @@ IOStatus FlinkFileSystem::NewRandomAccessFile( const std::string& fname, const FileOptions& options, std::unique_ptr* result, IODebugContext* dbg) { result->reset(); - IOStatus status = FileExists(fname, IOOptions(), dbg); + IOStatus status = FileExists(fname, options.io_options, dbg); if (!status.ok()) { return status; } @@ -623,8 +623,8 @@ IOStatus FlinkFileSystem::CreateDir(const std::string& file_name, } IOStatus FlinkFileSystem::CreateDirIfMissing(const std::string& file_name, - const IOOptions& options, - IODebugContext* dbg) { + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) { JNIEnv* jniEnv = getJNIEnv(); std::string filePath = ConstructPath(file_name); From ae7d8214f22560d0ea9665e653d13b501b5a1f8c Mon Sep 17 00:00:00 2001 From: Hangxiang Yu Date: Sat, 6 Apr 2024 12:32:05 +0800 Subject: [PATCH 378/386] [build] Support releasing forst (cherry picked from commit 5d70ad0574987a530453cdd619ddd8aa6c476c45) --- CMakeLists.txt | 1 + FORST-RELEASE.md | 248 ++++++++++++++++++ Makefile | 27 +- java/deploysettings.xml | 18 +- java/pom.xml.template | 39 +-- ...ish-frocksdbjni.sh => publish-forstjni.sh} | 4 +- 6 files changed, 282 insertions(+), 55 deletions(-) create mode 100644 FORST-RELEASE.md rename java/{publish-frocksdbjni.sh => publish-forstjni.sh} (93%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f93b43e4..15f2d133d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1168,6 +1168,7 @@ endif() if(WITH_JNI OR JNI) message(STATUS "JNI library is enabled") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/java) + find_package(JNI) include_directories(${JNI_INCLUDE_DIRS}) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") include_directories(${JNI_INCLUDE_DIRS}/linux) diff --git a/FORST-RELEASE.md b/FORST-RELEASE.md new file mode 100644 index 000000000..f9f48fb20 --- /dev/null +++ b/FORST-RELEASE.md @@ -0,0 +1,248 @@ +# ForSt Release Process + +## Summary + +ForSt releases are a fat jar file that contain the following binaries: +* .so files for linux32 (glibc and musl-libc) +* .so files for linux64 (glibc and musl-libc) +* .so files for linux [aarch64](https://en.wikipedia.org/wiki/AArch64) (glibc and musl-libc) +* .so files for linux [ppc64le](https://en.wikipedia.org/wiki/Ppc64le) (glibc and musl-libc) +* .jnilib file for Mac OSX +* .dll for Windows x64 + +To build the binaries for a ForSt release, building on native architectures is advised. Building the binaries for ppc64le and aarch64 *can* be done using QEMU, but you may run into emulation bugs and the build times will be dramatically slower (up to x20). + +We recommend building the binaries on environments with at least 4 cores, 16GB RAM and 40GB of storage. The following environments are recommended for use in the build process: +* Windows x64 +* Linux aarch64 +* Linux ppc64le +* Mac OSX + +## Build for Windows + +For the Windows binary build, we recommend using a base [AWS Windows EC2 instance](https://aws.amazon.com/windows/products/ec2/) with 4 cores, 16GB RAM, 40GB storage for the build. + +Firstly, install [chocolatey](https://chocolatey.org/install). Once installed, the following required components can be installed using Powershell: + + choco install git.install jdk8 maven visualstudio2017community visualstudio2017-workload-nativedesktop + +Open the "Developer Command Prompt for VS 2017" and run the following commands: + + git clone git@github.com:ververica/ForSt.git + cd ForSt + java\crossbuild\build-win.bat + +The resulting native binary will be built and available at `build\java\Release\rocksdbjni-shared.dll`. You can also find it under project folder with name `librocksdbjni-win64.dll`. +The result windows jar is `build\java\rocksdbjni_classes.jar`. + +There is also a how-to in CMakeLists.txt. + +**Once finished, extract the `librocksdbjni-win64.dll` from the build environment. You will need this .dll in the final crossbuild.** + +## Build for aarch64 + +For the Linux aarch64 binary build, we recommend using a base [AWS Ubuntu Server 20.04 LTS EC2](https://aws.amazon.com/windows/products/ec2/) with a 4 core Arm processor, 16GB RAM, 40GB storage for the build. You can also attempt to build with QEMU on a non-aarch64 processor, but you may run into emulation bugs and very long build times. + +### Building in aarch64 environment + +First, install the required packages such as Java 8 and make: + + sudo apt-get update + sudo apt-get install build-essential openjdk-8-jdk + +then, install and setup [Docker](https://docs.docker.com/engine/install/ubuntu/): + + sudo apt-get install apt-transport-https ca-certificates curl gnupg lsb-release + + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + echo "deb [arch=arm64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get install docker-ce docker-ce-cli containerd.io + + sudo groupadd docker + sudo usermod -aG docker $USER + newgrp docker + +Then, clone the ForSt repo: + + git clone https://github.com/ververica/ForSt.git + cd ForSt + +First, build the glibc binary: + + make jclean clean rocksdbjavastaticdockerarm64v8 + +**Once finished, extract the `java/target/librocksdbjni-linux-aarch64.so` from the build environment. You will need this .so in the final crossbuild.** + +Next, build the musl-libc binary: + + make jclean clean rocksdbjavastaticdockerarm64v8musl + +**Once finished, extract the `java/target/librocksdbjni-linux-aarch64-musl.so` from the build environment. You will need this .so in the final crossbuild.** + +### Building via QEMU + +You can use QEMU on, for example, an `x86_64` system to build the aarch64 binaries. To set this up on an Ubuntu environment: + + sudo apt-get install qemu binfmt-support qemu-user-static + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + +To verify that you can now run aarch64 docker images: + + docker run --rm -t arm64v8/ubuntu uname -m + > aarch64 + +You can now attempt to build the aarch64 binaries as in the previous section. + +## Build in PPC64LE + +For the ppc64le binaries, we recommend building on a PowerPC machine if possible, as it can be tricky to spin up a ppc64le cloud environment. However, if a PowerPC machine is not available, [Travis-CI](https://www.travis-ci.com/) offers ppc64le build environments that work perfectly for building these binaries. If neither a machine or Travis are an option, you can use QEMU but the build may take a very long time and be prone to emulation errors. + +### Building in ppc64le environment + +As with the aarch64 environment, the ppc64le environment will require Java 8, Docker and build-essentials installed. Once installed, you can build the 2 binaries: + + make jclean clean rocksdbjavastaticdockerppc64le + +**Once finished, extract the `java/target/librocksdbjni-linux-ppc64le.so` from the build environment. You will need this .so in the final crossbuild.** + + make jclean clean rocksdbjavastaticdockerppc64lemusl + +**Once finished, extract the `java/target/librocksdbjni-linux-ppc64le-musl.so` from the build environment. You will need this .so in the final crossbuild.** + +### Building via Travis + +Travis-CI supports ppc64le build environments, and this can be a convenient way of building in the absence of a PowerPC machine. Assuming that you have an S3 bucket called **my-forst-release-artifacts**, the following Travis configuration will build the release artifacts and push them to the S3 bucket: + +``` +dist: xenial +language: cpp +os: + - linux +arch: + - ppc64le + +services: + - docker +addons: + artifacts: + paths: + - $TRAVIS_BUILD_DIR/java/target/librocksdbjni-linux-ppc64le-musl.so + - $TRAVIS_BUILD_DIR/java/target/librocksdbjni-linux-ppc64le.so + +env: + global: + - ARTIFACTS_BUCKET=my-forst-release-artifacts + jobs: + - CMD=rocksdbjavastaticdockerppc64le + - CMD=rocksdbjavastaticdockerppc64lemusl + +install: + - sudo apt-get install -y openjdk-8-jdk || exit $? + - export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH + - export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture) + - echo "JAVA_HOME=${JAVA_HOME}" + - which java && java -version + - which javac && javac -version + +script: + - make jclean clean $CMD +``` + +**Make sure to set the `ARTIFACTS_KEY` and `ARTIFACTS_SECRET` environment variables in the Travis Job with valid AWS credentials to access the S3 bucket you defined.** + +**Make sure to avoid signatureV4-only S3 regions to store the uploaded artifacts (due to unresolved https://github.com/travis-ci/artifacts/issues/57). You can just choose the S3 bucket of `us-east-1` region for 100% compatibility.** + +**Once finished, the`librocksdbjni-linux-ppce64le.so` and `librocksdbjni-linux-ppce64le-musl.so` binaries will be in the S3 bucket. You will need these .so binaries in the final crossbuild.** + + +### Building via QEMU + +You can use QEMU on, for example, an `x86_64` system to build the ppc64le binaries. To set this up on an Ubuntu environment: + + sudo apt-get install qemu binfmt-support qemu-user-static + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + +To verify that you can now run ppc64le docker images: + + docker run --rm -t ppc64le/ubuntu uname -m + > ppc64le + +You can now attempt to build the ppc64le binaries as in the previous section. + +## Final crossbuild in Mac OSX + +Documentation for the final crossbuild for Mac OSX and Linux is described in [java/RELEASE.md](java/RELEASE.md) as has information on dependencies that should be installed. As above, this tends to be Java 8, build-essentials and Docker. + +Before you run this step, you should have 5 binaries from the previous build steps: + +1. `librocksdbjni-win64.dll` from the Windows build step. +2. `librocksdbjni-linux-aarch64.so` from the aarch64 build step. +3. `librocksdbjni-linux-aarch64-musl.so` from the aarch64 build step. +4. `librocksdbjni-linux-ppc64le.so` from the ppc64le build step. +5. `librocksdbjni-linux-ppc64le-musl.so` from the ppc64le build step. + +To start the crossbuild within a Mac OSX environment: + + make jclean clean + mkdir -p java/target + cp /librocksdbjni-win64.dll java/target/librocksdbjni-win64.dll + cp /librocksdbjni-linux-ppc64le.so java/target/librocksdbjni-linux-ppc64le.so + cp /librocksdbjni-linux-ppc64le-musl.so java/target/librocksdbjni-linux-ppc64le-musl.so + cp /librocksdbjni-linux-aarch64.so java/target/librocksdbjni-linux-aarch64.so + cp /librocksdbjni-linux-aarch64-musl.so java/target/librocksdbjni-linux-aarch64-musl.so + FORST_VERSION=0.1.0-SNAPSHOT PORTABLE=1 ROCKSDB_DISABLE_JEMALLOC=true DEBUG_LEVEL=0 make forstjavastaticreleasedocker + +*Note, we disable jemalloc on mac due to https://github.com/facebook/rocksdb/issues/5787*. + +Once finished, there should be a directory at `java/target/forst-release` with the ForSt jar, javadoc jar, sources jar and pom in it. You can inspect the jar file and ensure that contains the binaries, history file, etc: + +``` +$ jar tf forstjni-$(FORST_VERSION).jar +META-INF/ +META-INF/MANIFEST.MF +HISTORY-JAVA.md +HISTORY.md +librocksdbjni-linux-aarch64-musl.so +librocksdbjni-linux-aarch64.so +librocksdbjni-linux-ppc64le-musl.so +librocksdbjni-linux-ppc64le.so +librocksdbjni-linux32-musl.so +librocksdbjni-linux32.so +librocksdbjni-linux64-musl.so +librocksdbjni-linux64.so +librocksdbjni-osx.jnilib +librocksdbjni-win64.dl +... +``` + +*Note that it contains linux32/64.so binaries as well as librocksdbjni-osx.jnilib*. + +## Push to Maven Central + +For this step, you will need the following: + +- The OSX Crossbuild artifacts built in `java/target/forst-release` as above. +- A Sonatype account with access to the staging repository. If you do not have permission, open a ticket with Sonatype, [such as this one](https://issues.sonatype.org/browse/OSSRH-72185). +- A GPG key to sign the release, with your public key available for verification (for example, by uploading it to https://keys.openpgp.org/) + +To upload the release to the Sonatype staging repository: +```bash +VERSION= \ +USER= \ +PASSWORD= \ +KEYNAME= \ +PASSPHRASE= \ +java/publish-forstjni.sh +``` + +Go to the staging repositories on Sonatype: + +https://oss.sonatype.org/#stagingRepositories + +Select the open staging repository and click on "Close". + +The staging repository will look something like `https://oss.sonatype.org/content/repositories/xxxx-1020`. You can use this staged release to test the artifacts and ensure they are correct. + +Once you have verified the artifacts are correct, press the "Release" button. **WARNING: this can not be undone**. Within 24-48 hours, the artifact will be available on Maven Central for use. \ No newline at end of file diff --git a/Makefile b/Makefile index e35a9feb7..93fae2739 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ #----------------------------------------------- -FROCKSDB_VERSION ?= 1.0 +FORST_VERSION ?= 0.1.0 BASH_EXISTS := $(shell which bash) SHELL := $(shell which bash) @@ -2356,14 +2356,14 @@ rocksdbjavastaticreleasedocker: rocksdbjavastaticosx rocksdbjavastaticdockerx86 cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 -frocksdbjavastaticreleasedocker: rocksdbjavastaticreleasedocker +forstjavastaticreleasedocker: rocksdbjavastaticreleasedocker # update apache license mkdir -p java/target/META-INF - cp LICENSE.Apache java/target/META-INF/LICENSE + cp LICENSE java/target/META-INF/LICENSE cd java/target;jar -uf $(ROCKSDB_JAR_ALL) META-INF/LICENSE # jars to be released - $(eval JAR_PREF=rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)) + $(eval JAR_PREF=forstjni-$(FORST_VERSION)) $(eval JAR_DOCS=$(JAR_PREF)-javadoc.jar) $(eval JAR_SOURCES=$(JAR_PREF)-sources.jar) @@ -2371,21 +2371,22 @@ frocksdbjavastaticreleasedocker: rocksdbjavastaticreleasedocker cd java/target;jar -uf $(JAR_DOCS) META-INF/LICENSE cd java/target;jar -uf $(JAR_SOURCES) META-INF/LICENSE - # prepare frocksdb release - cd java/target;mkdir -p frocksdb-release + # prepare forst release + cd java/target;mkdir -p forst-release - $(eval FROCKSDB_JAVA_VERSION=$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-ververica-$(FROCKSDB_VERSION)) - $(eval FJAR_PREF=frocksdbjni-$(FROCKSDB_JAVA_VERSION)) + $(eval FORST_JAVA_VERSION=$(FORST_VERSION)) + $(eval FJAR_PREF=forstjni-$(FORST_JAVA_VERSION)) $(eval FJAR=$(FJAR_PREF).jar) $(eval FJAR_DOCS=$(FJAR_PREF)-javadoc.jar) $(eval FJAR_SOURCES=$(FJAR_PREF)-sources.jar) - cd java/target;cp $(ROCKSDB_JAR_ALL) frocksdb-release/$(FJAR) - cd java/target;cp $(JAR_DOCS) frocksdb-release/$(FJAR_DOCS) - cd java/target;cp $(JAR_SOURCES) frocksdb-release/$(FJAR_SOURCES) + cd java/target;cp $(ROCKSDB_JAR_ALL) forst-release/$(FJAR) + cd java/target;cp $(JAR_DOCS) forst-release/$(FJAR_DOCS) + cd java/target;cp $(JAR_SOURCES) forst-release/$(FJAR_SOURCES) openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 - cd java;cat pom.xml.template | sed 's/\$${FROCKSDB_JAVA_VERSION}/$(FROCKSDB_JAVA_VERSION)/' > pom.xml - cd java;cp pom.xml target/frocksdb-release/$(FJAR_PREF).pom + cd java;cat pom.xml.template | sed 's/\$${FORST_JAVA_VERSION}/$(FORST_JAVA_VERSION)/' > pom.xml + cd java;cp pom.xml target/forst-release/$(FJAR_PREF).pom + rocksdbjavastaticdockerx86: mkdir -p java/target diff --git a/java/deploysettings.xml b/java/deploysettings.xml index 7b73248e0..acd06d518 100644 --- a/java/deploysettings.xml +++ b/java/deploysettings.xml @@ -1,12 +1,12 @@ - - - sonatype-nexus-staging - ${sonatype_user} - ${sonatype_pw} - - + + + sonatype-nexus-staging + ${sonatype_user} + ${sonatype_pw} + + \ No newline at end of file diff --git a/java/pom.xml.template b/java/pom.xml.template index bd882ec3a..52fabfc3e 100644 --- a/java/pom.xml.template +++ b/java/pom.xml.template @@ -3,11 +3,11 @@ 4.0.0 com.ververica - frocksdbjni - ${FROCKSDB_JAVA_VERSION} + forstjni + ${FORST_JAVA_VERSION} - RocksDB JNI - RocksDB fat jar with modifications specific for Apache Flink that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files + ForSt JNI + ForSt fat jar that contains .so files for linux32 and linux64 (glibc and musl-libc), jnilib files for Mac OSX, and a .dll for Windows x64. https://rocksdb.org @@ -22,37 +22,16 @@ - scm:git:https://github.com/ververica/frocksdb.git - scm:git:https://github.com/ververica/frocksdb.git - scm:git:https://github.com/ververica/frocksdb.git + scm:git:https://github.com/ververica/ForSt.git + scm:git:https://github.com/ververica/ForSt.git + scm:git:https://github.com/ververica/ForSt.git - Facebook - https://www.facebook.com + Ververica + https://www.ververica.com - - - Facebook - help@facebook.com - America/New_York - - architect - - - - - - - rocksdb - Google Groups - rocksdb-subscribe@googlegroups.com - rocksdb-unsubscribe@googlegroups.com - rocksdb@googlegroups.com - https://groups.google.com/forum/#!forum/rocksdb - - - 1.8 1.8 diff --git a/java/publish-frocksdbjni.sh b/java/publish-forstjni.sh similarity index 93% rename from java/publish-frocksdbjni.sh rename to java/publish-forstjni.sh index 2a6bd2865..6518206fa 100644 --- a/java/publish-frocksdbjni.sh +++ b/java/publish-forstjni.sh @@ -20,7 +20,7 @@ # fail on errors set -e -PREFIX=java/target/frocksdb-release/frocksdbjni-${VERSION} +PREFIX=java/target/forst-release/forstjni-${VERSION} function deploy() { FILE=$1 @@ -37,8 +37,6 @@ function deploy() { -Dgpg.passphrase="${PASSPHRASE}" } -PREFIX=java/target/frocksdb-release/frocksdbjni-${VERSION} - deploy ${PREFIX}-sources.jar sources deploy ${PREFIX}-javadoc.jar javadoc deploy ${PREFIX}.jar From ab5912faf684b6048056c700283a6d6f6fb57abb Mon Sep 17 00:00:00 2001 From: fredia Date: Thu, 26 Sep 2024 15:34:30 +0800 Subject: [PATCH 379/386] [FLINK-35928][build] rename namespace/jni to forst --- Makefile | 14 +- db/db_basic_test.cc | 2 +- db/db_secondary_test.cc | 2 +- include/rocksdb/rocksdb_namespace.h | 2 +- java/CMakeLists.txt | 1198 ++++---- java/Makefile | 414 +-- .../org/rocksdb/benchmark/DbBenchmark.java | 8 +- .../backup_engine_options.cc | 106 +- .../{rocksjni => forstjni}/backupenginejni.cc | 50 +- java/{rocksjni => forstjni}/cache.cc | 10 +- .../cassandra_compactionfilterjni.cc | 8 +- .../cassandra_value_operator.cc | 14 +- java/{rocksjni => forstjni}/checkpoint.cc | 22 +- java/{rocksjni => forstjni}/clock_cache.cc | 12 +- .../columnfamilyhandle.cc | 20 +- .../compact_range_options.cc | 128 +- .../compaction_filter.cc | 6 +- .../compaction_filter_factory.cc | 14 +- .../compaction_filter_factory_jnicallback.cc | 4 +- .../compaction_filter_factory_jnicallback.h | 2 +- .../compaction_job_info.cc | 62 +- .../compaction_job_stats.cc | 114 +- .../compaction_options.cc | 38 +- .../compaction_options_fifo.cc | 28 +- .../compaction_options_universal.cc | 70 +- java/{rocksjni => forstjni}/comparator.cc | 22 +- .../comparatorjnicallback.cc | 4 +- .../comparatorjnicallback.h | 2 +- .../compression_options.cc | 76 +- .../concurrent_task_limiter.cc | 30 +- java/{rocksjni => forstjni}/config_options.cc | 34 +- .../cplusplus_to_java_convert.h | 0 java/{rocksjni => forstjni}/env.cc | 62 +- java/{rocksjni => forstjni}/env_flink.cc | 12 +- .../env_flink_test_suite.cc | 16 +- java/{rocksjni => forstjni}/env_options.cc | 108 +- java/{rocksjni => forstjni}/event_listener.cc | 16 +- .../event_listener_jnicallback.cc | 4 +- .../event_listener_jnicallback.h | 2 +- .../export_import_files_metadatajni.cc | 10 +- java/{rocksjni => forstjni}/filter.cc | 16 +- .../flink_compactionfilterjni.cc | 20 +- .../hyper_clock_cache.cc | 12 +- .../import_column_family_options.cc | 20 +- .../ingest_external_file_options.cc | 64 +- java/{rocksjni => forstjni}/iterator.cc | 84 +- .../jni_perf_context.cc | 394 +-- java/{rocksjni => forstjni}/jnicallback.cc | 4 +- java/{rocksjni => forstjni}/jnicallback.h | 0 java/{rocksjni => forstjni}/kv_helper.h | 2 +- .../loggerjnicallback.cc | 28 +- .../loggerjnicallback.h | 2 +- java/{rocksjni => forstjni}/lru_cache.cc | 16 +- java/{rocksjni => forstjni}/memory_util.cc | 8 +- java/{rocksjni => forstjni}/memtablejni.cc | 28 +- java/{rocksjni => forstjni}/merge_operator.cc | 26 +- .../native_comparator_wrapper_test.cc | 8 +- .../optimistic_transaction_db.cc | 42 +- .../optimistic_transaction_options.cc | 24 +- java/{rocksjni => forstjni}/options.cc | 2726 ++++++++--------- java/{rocksjni => forstjni}/options_util.cc | 20 +- .../persistent_cache.cc | 12 +- java/{rocksjni => forstjni}/portal.h | 20 +- java/{rocksjni => forstjni}/ratelimiterjni.cc | 38 +- .../remove_emptyvalue_compactionfilterjni.cc | 8 +- java/{rocksjni => forstjni}/restorejni.cc | 14 +- .../rocks_callback_object.cc | 6 +- .../rocksdb_exception_test.cc | 28 +- java/{rocksjni => forstjni}/rocksjni.cc | 470 +-- java/{rocksjni => forstjni}/slice.cc | 102 +- java/{rocksjni => forstjni}/snapshot.cc | 8 +- .../sst_file_manager.cc | 54 +- .../sst_file_reader_iterator.cc | 84 +- .../sst_file_readerjni.cc | 30 +- .../sst_file_writerjni.cc | 58 +- .../{rocksjni => forstjni}/sst_partitioner.cc | 14 +- java/{rocksjni => forstjni}/statistics.cc | 66 +- java/{rocksjni => forstjni}/statisticsjni.cc | 2 +- java/{rocksjni => forstjni}/statisticsjni.h | 0 java/{rocksjni => forstjni}/table.cc | 14 +- java/{rocksjni => forstjni}/table_filter.cc | 10 +- .../table_filter_jnicallback.cc | 4 +- .../table_filter_jnicallback.h | 2 +- .../testable_event_listener.cc | 6 +- java/{rocksjni => forstjni}/thread_status.cc | 30 +- java/{rocksjni => forstjni}/trace_writer.cc | 10 +- .../trace_writer_jnicallback.cc | 4 +- .../trace_writer_jnicallback.h | 2 +- java/{rocksjni => forstjni}/transaction.cc | 322 +- java/{rocksjni => forstjni}/transaction_db.cc | 58 +- .../transaction_db_options.cc | 54 +- .../{rocksjni => forstjni}/transaction_log.cc | 24 +- .../transaction_notifier.cc | 14 +- .../transaction_notifier_jnicallback.cc | 6 +- .../transaction_notifier_jnicallback.h | 2 +- .../transaction_options.cc | 60 +- java/{rocksjni => forstjni}/ttl.cc | 26 +- java/{rocksjni => forstjni}/wal_filter.cc | 10 +- .../wal_filter_jnicallback.cc | 6 +- .../wal_filter_jnicallback.h | 2 +- java/{rocksjni => forstjni}/write_batch.cc | 158 +- .../write_batch_test.cc | 26 +- .../write_batch_with_index.cc | 192 +- .../write_buffer_manager.cc | 12 +- .../writebatchhandlerjnicallback.cc | 4 +- .../writebatchhandlerjnicallback.h | 2 +- .../org_forstdb_AbstractCompactionFilter.h | 21 + ..._forstdb_AbstractCompactionFilterFactory.h | 29 + java/include/org_forstdb_AbstractComparator.h | 29 + .../org_forstdb_AbstractEventListener.h | 29 + java/include/org_forstdb_AbstractSlice.h | 69 + .../include/org_forstdb_AbstractTableFilter.h | 21 + .../include/org_forstdb_AbstractTraceWriter.h | 21 + .../org_forstdb_AbstractTransactionNotifier.h | 29 + java/include/org_forstdb_AbstractWalFilter.h | 21 + java/include/org_forstdb_BackupEngine.h | 101 + .../include/org_forstdb_BackupEngineOptions.h | 213 ++ .../org_forstdb_BlockBasedTableConfig.h | 21 + java/include/org_forstdb_BloomFilter.h | 23 + java/include/org_forstdb_Cache.h | 29 + .../org_forstdb_CassandraCompactionFilter.h | 21 + .../org_forstdb_CassandraValueMergeOperator.h | 29 + java/include/org_forstdb_Checkpoint.h | 45 + java/include/org_forstdb_ClockCache.h | 29 + java/include/org_forstdb_ColumnFamilyHandle.h | 45 + .../include/org_forstdb_ColumnFamilyOptions.h | 1141 +++++++ .../include/org_forstdb_CompactRangeOptions.h | 181 ++ java/include/org_forstdb_CompactionJobInfo.h | 125 + java/include/org_forstdb_CompactionJobStats.h | 229 ++ java/include/org_forstdb_CompactionOptions.h | 77 + .../org_forstdb_CompactionOptionsFIFO.h | 61 + .../org_forstdb_CompactionOptionsUniversal.h | 141 + java/include/org_forstdb_ComparatorOptions.h | 77 + java/include/org_forstdb_CompressionOptions.h | 125 + .../org_forstdb_ConcurrentTaskLimiterImpl.h | 61 + java/include/org_forstdb_ConfigOptions.h | 69 + java/include/org_forstdb_DBOptions.h | 1343 ++++++++ java/include/org_forstdb_DirectSlice.h | 77 + java/include/org_forstdb_Env.h | 77 + java/include/org_forstdb_EnvFlinkTestSuite.h | 37 + java/include/org_forstdb_EnvOptions.h | 221 ++ .../org_forstdb_ExportImportFilesMetaData.h | 21 + java/include/org_forstdb_Filter.h | 21 + .../org_forstdb_FlinkCompactionFilter.h | 45 + java/include/org_forstdb_FlinkEnv.h | 29 + java/include/org_forstdb_FlushOptions.h | 61 + ...org_forstdb_HashLinkedListMemTableConfig.h | 31 + .../org_forstdb_HashSkipListMemTableConfig.h | 27 + java/include/org_forstdb_HyperClockCache.h | 29 + .../org_forstdb_ImportColumnFamilyOptions.h | 45 + .../org_forstdb_IngestExternalFileOptions.h | 133 + java/include/org_forstdb_LRUCache.h | 29 + java/include/org_forstdb_LiveFileMetaData.h | 21 + java/include/org_forstdb_Logger.h | 57 + java/include/org_forstdb_MemoryUtil.h | 21 + .../org_forstdb_NativeComparatorWrapper.h | 21 + ...rapperTest_NativeStringComparatorWrapper.h | 21 + .../org_forstdb_OptimisticTransactionDB.h | 87 + ...org_forstdb_OptimisticTransactionOptions.h | 53 + java/include/org_forstdb_Options.h | 2405 +++++++++++++++ java/include/org_forstdb_OptionsUtil.h | 45 + java/include/org_forstdb_PerfContext.h | 805 +++++ java/include/org_forstdb_PersistentCache.h | 29 + java/include/org_forstdb_PlainTableConfig.h | 35 + java/include/org_forstdb_RateLimiter.h | 83 + java/include/org_forstdb_ReadOptions.h | 389 +++ ...forstdb_RemoveEmptyValueCompactionFilter.h | 21 + java/include/org_forstdb_RestoreOptions.h | 29 + .../include/org_forstdb_RocksCallbackObject.h | 21 + java/include/org_forstdb_RocksDB.h | 935 ++++++ .../org_forstdb_RocksDBExceptionTest.h | 61 + java/include/org_forstdb_RocksEnv.h | 21 + java/include/org_forstdb_RocksIterator.h | 173 ++ java/include/org_forstdb_RocksMemEnv.h | 29 + .../org_forstdb_SkipListMemTableConfig.h | 23 + java/include/org_forstdb_Slice.h | 61 + java/include/org_forstdb_Snapshot.h | 21 + java/include/org_forstdb_SstFileManager.h | 117 + java/include/org_forstdb_SstFileReader.h | 61 + .../org_forstdb_SstFileReaderIterator.h | 173 ++ java/include/org_forstdb_SstFileWriter.h | 117 + ...forstdb_SstPartitionerFixedPrefixFactory.h | 29 + java/include/org_forstdb_Statistics.h | 117 + .../org_forstdb_StringAppendOperator.h | 37 + java/include/org_forstdb_ThreadStatus.h | 69 + java/include/org_forstdb_TimedEnv.h | 29 + java/include/org_forstdb_Transaction.h | 613 ++++ java/include/org_forstdb_TransactionDB.h | 119 + .../org_forstdb_TransactionDBOptions.h | 109 + .../org_forstdb_TransactionLogIterator.h | 53 + java/include/org_forstdb_TransactionOptions.h | 125 + java/include/org_forstdb_TtlDB.h | 55 + java/include/org_forstdb_UInt64AddOperator.h | 29 + .../org_forstdb_VectorMemTableConfig.h | 23 + java/include/org_forstdb_WBWIRocksIterator.h | 133 + java/include/org_forstdb_WriteBatch.h | 301 ++ java/include/org_forstdb_WriteBatchTest.h | 21 + ...org_forstdb_WriteBatchTestInternalHelper.h | 37 + .../include/org_forstdb_WriteBatchWithIndex.h | 261 ++ java/include/org_forstdb_WriteBatch_Handler.h | 21 + java/include/org_forstdb_WriteBufferManager.h | 29 + java/include/org_forstdb_WriteOptions.h | 133 + .../org_forstdb_test_TestableEventListener.h | 21 + java/jmh/pom.xml | 4 +- .../org/rocksdb/jmh/ComparatorBenchmarks.java | 12 +- .../java/org/rocksdb/jmh/GetBenchmarks.java | 8 +- .../org/rocksdb/jmh/MultiGetBenchmarks.java | 10 +- .../java/org/rocksdb/jmh/PutBenchmarks.java | 8 +- .../main/java/org/rocksdb/util/FileUtils.java | 2 +- .../main/java/org/rocksdb/util/KVUtils.java | 2 +- .../java/OptimisticTransactionSample.java | 2 +- .../main/java/RocksDBColumnFamilySample.java | 2 +- java/samples/src/main/java/RocksDBSample.java | 4 +- .../src/main/java/TransactionSample.java | 2 +- java/spotbugs-exclude.xml | 68 +- .../AbstractCompactionFilter.java | 2 +- .../AbstractCompactionFilterFactory.java | 2 +- .../AbstractComparator.java | 2 +- .../AbstractComparatorJniBridge.java | 4 +- .../AbstractEventListener.java | 4 +- .../AbstractImmutableNativeReference.java | 2 +- .../AbstractMutableOptions.java | 2 +- .../AbstractNativeReference.java | 2 +- .../AbstractRocksIterator.java | 4 +- .../{rocksdb => forstdb}/AbstractSlice.java | 14 +- .../AbstractTableFilter.java | 2 +- .../AbstractTraceWriter.java | 2 +- .../AbstractTransactionNotifier.java | 2 +- .../AbstractWalFilter.java | 2 +- .../AbstractWriteBatch.java | 2 +- .../org/{rocksdb => forstdb}/AccessHint.java | 2 +- .../AdvancedColumnFamilyOptionsInterface.java | 8 +- ...edMutableColumnFamilyOptionsInterface.java | 2 +- .../BackgroundErrorReason.java | 2 +- .../{rocksdb => forstdb}/BackupEngine.java | 2 +- .../BackupEngineOptions.java | 6 +- .../org/{rocksdb => forstdb}/BackupInfo.java | 6 +- .../BlockBasedTableConfig.java | 20 +- .../org/{rocksdb => forstdb}/BloomFilter.java | 2 +- .../BuiltinComparator.java | 2 +- .../ByteBufferGetStatus.java | 2 +- .../java/org/{rocksdb => forstdb}/Cache.java | 2 +- .../CassandraCompactionFilter.java | 2 +- .../CassandraValueMergeOperator.java | 2 +- .../org/{rocksdb => forstdb}/Checkpoint.java | 2 +- .../{rocksdb => forstdb}/ChecksumType.java | 2 +- .../org/{rocksdb => forstdb}/ClockCache.java | 2 +- .../ColumnFamilyDescriptor.java | 2 +- .../ColumnFamilyHandle.java | 2 +- .../ColumnFamilyMetaData.java | 2 +- .../ColumnFamilyOptions.java | 8 +- .../ColumnFamilyOptionsInterface.java | 4 +- .../CompactRangeOptions.java | 2 +- .../CompactionJobInfo.java | 2 +- .../CompactionJobStats.java | 2 +- .../CompactionOptions.java | 2 +- .../CompactionOptionsFIFO.java | 2 +- .../CompactionOptionsUniversal.java | 2 +- .../CompactionPriority.java | 4 +- .../CompactionReason.java | 2 +- .../CompactionStopStyle.java | 4 +- .../{rocksdb => forstdb}/CompactionStyle.java | 2 +- .../ComparatorOptions.java | 2 +- .../{rocksdb => forstdb}/ComparatorType.java | 2 +- .../CompressionOptions.java | 2 +- .../{rocksdb => forstdb}/CompressionType.java | 2 +- .../ConcurrentTaskLimiter.java | 2 +- .../ConcurrentTaskLimiterImpl.java | 2 +- .../{rocksdb => forstdb}/ConfigOptions.java | 2 +- .../org/{rocksdb => forstdb}/DBOptions.java | 8 +- .../DBOptionsInterface.java | 14 +- .../DataBlockIndexType.java | 2 +- .../java/org/{rocksdb => forstdb}/DbPath.java | 2 +- .../org/{rocksdb => forstdb}/DirectSlice.java | 4 +- .../{rocksdb => forstdb}/EncodingType.java | 2 +- .../java/org/{rocksdb => forstdb}/Env.java | 4 +- .../EnvFlinkTestSuite.java | 2 +- .../org/{rocksdb => forstdb}/EnvOptions.java | 2 +- .../{rocksdb => forstdb}/EventListener.java | 2 +- .../{rocksdb => forstdb}/Experimental.java | 2 +- .../ExportImportFilesMetaData.java | 2 +- .../ExternalFileIngestionInfo.java | 2 +- .../FileOperationInfo.java | 2 +- .../java/org/{rocksdb => forstdb}/Filter.java | 2 +- .../FilterPolicyType.java | 2 +- .../FlinkCompactionFilter.java | 2 +- .../org/{rocksdb => forstdb}/FlinkEnv.java | 2 +- .../{rocksdb => forstdb}/FlushJobInfo.java | 2 +- .../{rocksdb => forstdb}/FlushOptions.java | 4 +- .../org/{rocksdb => forstdb}/FlushReason.java | 2 +- .../org/{rocksdb => forstdb}/GetStatus.java | 2 +- .../HashLinkedListMemTableConfig.java | 2 +- .../HashSkipListMemTableConfig.java | 2 +- .../{rocksdb => forstdb}/HistogramData.java | 2 +- .../{rocksdb => forstdb}/HistogramType.java | 4 +- .../java/org/{rocksdb => forstdb}/Holder.java | 2 +- .../{rocksdb => forstdb}/HyperClockCache.java | 2 +- .../ImportColumnFamilyOptions.java | 2 +- .../IndexShorteningMode.java | 2 +- .../org/{rocksdb => forstdb}/IndexType.java | 2 +- .../{rocksdb => forstdb}/InfoLogLevel.java | 4 +- .../IngestExternalFileOptions.java | 2 +- .../org/{rocksdb => forstdb}/KeyMayExist.java | 2 +- .../org/{rocksdb => forstdb}/LRUCache.java | 2 +- .../{rocksdb => forstdb}/LevelMetaData.java | 2 +- .../LiveFileMetaData.java | 2 +- .../org/{rocksdb => forstdb}/LogFile.java | 2 +- .../java/org/{rocksdb => forstdb}/Logger.java | 22 +- .../{rocksdb => forstdb}/MemTableConfig.java | 2 +- .../{rocksdb => forstdb}/MemTableInfo.java | 2 +- .../{rocksdb => forstdb}/MemoryUsageType.java | 2 +- .../org/{rocksdb => forstdb}/MemoryUtil.java | 2 +- .../{rocksdb => forstdb}/MergeOperator.java | 2 +- .../MutableColumnFamilyOptions.java | 2 +- .../MutableColumnFamilyOptionsInterface.java | 2 +- .../MutableDBOptions.java | 2 +- .../MutableDBOptionsInterface.java | 2 +- .../MutableOptionKey.java | 2 +- .../MutableOptionValue.java | 4 +- .../NativeComparatorWrapper.java | 2 +- .../NativeLibraryLoader.java | 6 +- .../{rocksdb => forstdb}/OperationStage.java | 2 +- .../{rocksdb => forstdb}/OperationType.java | 2 +- .../OptimisticTransactionDB.java | 6 +- .../OptimisticTransactionOptions.java | 2 +- .../{rocksdb => forstdb}/OptionString.java | 2 +- .../org/{rocksdb => forstdb}/Options.java | 8 +- .../org/{rocksdb => forstdb}/OptionsUtil.java | 16 +- .../org/{rocksdb => forstdb}/PerfContext.java | 2 +- .../org/{rocksdb => forstdb}/PerfLevel.java | 2 +- .../{rocksdb => forstdb}/PersistentCache.java | 2 +- .../PlainTableConfig.java | 4 +- .../PrepopulateBlobCache.java | 2 +- .../org/{rocksdb => forstdb}/Priority.java | 4 +- .../java/org/{rocksdb => forstdb}/Range.java | 2 +- .../org/{rocksdb => forstdb}/RateLimiter.java | 2 +- .../{rocksdb => forstdb}/RateLimiterMode.java | 2 +- .../org/{rocksdb => forstdb}/ReadOptions.java | 2 +- .../org/{rocksdb => forstdb}/ReadTier.java | 4 +- .../RemoveEmptyValueCompactionFilter.java | 2 +- .../{rocksdb => forstdb}/RestoreOptions.java | 2 +- .../ReusedSynchronisationType.java | 4 +- .../RocksCallbackObject.java | 2 +- .../org/{rocksdb => forstdb}/RocksDB.java | 138 +- .../RocksDBException.java | 2 +- .../org/{rocksdb => forstdb}/RocksEnv.java | 2 +- .../{rocksdb => forstdb}/RocksIterator.java | 6 +- .../RocksIteratorInterface.java | 4 +- .../org/{rocksdb => forstdb}/RocksMemEnv.java | 2 +- .../RocksMutableObject.java | 2 +- .../org/{rocksdb => forstdb}/RocksObject.java | 2 +- .../org/{rocksdb => forstdb}/SanityLevel.java | 2 +- .../SizeApproximationFlag.java | 2 +- .../SkipListMemTableConfig.java | 2 +- .../java/org/{rocksdb => forstdb}/Slice.java | 6 +- .../org/{rocksdb => forstdb}/Snapshot.java | 2 +- .../{rocksdb => forstdb}/SstFileManager.java | 2 +- .../{rocksdb => forstdb}/SstFileMetaData.java | 2 +- .../{rocksdb => forstdb}/SstFileReader.java | 2 +- .../SstFileReaderIterator.java | 2 +- .../{rocksdb => forstdb}/SstFileWriter.java | 6 +- .../SstPartitionerFactory.java | 2 +- .../SstPartitionerFixedPrefixFactory.java | 2 +- .../org/{rocksdb => forstdb}/StateType.java | 2 +- .../org/{rocksdb => forstdb}/Statistics.java | 2 +- .../StatisticsCollector.java | 2 +- .../StatisticsCollectorCallback.java | 2 +- .../StatsCollectorInput.java | 2 +- .../org/{rocksdb => forstdb}/StatsLevel.java | 4 +- .../java/org/{rocksdb => forstdb}/Status.java | 2 +- .../StringAppendOperator.java | 2 +- .../TableFileCreationBriefInfo.java | 2 +- .../TableFileCreationInfo.java | 2 +- .../TableFileCreationReason.java | 2 +- .../TableFileDeletionInfo.java | 2 +- .../org/{rocksdb => forstdb}/TableFilter.java | 2 +- .../TableFormatConfig.java | 2 +- .../{rocksdb => forstdb}/TableProperties.java | 2 +- .../{rocksdb => forstdb}/ThreadStatus.java | 2 +- .../org/{rocksdb => forstdb}/ThreadType.java | 2 +- .../org/{rocksdb => forstdb}/TickerType.java | 8 +- .../org/{rocksdb => forstdb}/TimedEnv.java | 2 +- .../{rocksdb => forstdb}/TraceOptions.java | 2 +- .../org/{rocksdb => forstdb}/TraceWriter.java | 2 +- .../org/{rocksdb => forstdb}/Transaction.java | 68 +- .../{rocksdb => forstdb}/TransactionDB.java | 10 +- .../TransactionDBOptions.java | 2 +- .../TransactionLogIterator.java | 12 +- .../TransactionOptions.java | 2 +- .../{rocksdb => forstdb}/TransactionalDB.java | 2 +- .../TransactionalOptions.java | 2 +- .../java/org/{rocksdb => forstdb}/TtlDB.java | 10 +- .../TxnDBWritePolicy.java | 2 +- .../UInt64AddOperator.java | 2 +- .../VectorMemTableConfig.java | 2 +- .../{rocksdb => forstdb}/WALRecoveryMode.java | 2 +- .../WBWIRocksIterator.java | 10 +- .../org/{rocksdb => forstdb}/WalFileType.java | 2 +- .../org/{rocksdb => forstdb}/WalFilter.java | 2 +- .../WalProcessingOption.java | 2 +- .../org/{rocksdb => forstdb}/WriteBatch.java | 2 +- .../WriteBatchInterface.java | 6 +- .../WriteBatchWithIndex.java | 30 +- .../WriteBufferManager.java | 2 +- .../{rocksdb => forstdb}/WriteOptions.java | 2 +- .../WriteStallCondition.java | 2 +- .../{rocksdb => forstdb}/WriteStallInfo.java | 2 +- .../{rocksdb => forstdb}/util/BufferUtil.java | 2 +- .../{rocksdb => forstdb}/util/ByteUtil.java | 2 +- .../util/BytewiseComparator.java | 8 +- .../util/Environment.java | 2 +- .../util/IntComparator.java | 6 +- .../util/ReverseBytewiseComparator.java | 10 +- .../{rocksdb => forstdb}/util/SizeUnit.java | 2 +- .../AbstractTransactionTest.java | 2 +- .../BackupEngineOptionsTest.java | 2 +- .../BackupEngineTest.java | 2 +- .../{rocksdb => forstdb}/BlobOptionsTest.java | 2 +- .../BlockBasedTableConfigTest.java | 2 +- .../BuiltinComparatorTest.java | 2 +- .../ByteBufferUnsupportedOperationTest.java | 4 +- .../BytewiseComparatorRegressionTest.java | 4 +- .../{rocksdb => forstdb}/CheckPointTest.java | 2 +- .../{rocksdb => forstdb}/ClockCacheTest.java | 2 +- .../ColumnFamilyOptionsTest.java | 4 +- .../ColumnFamilyTest.java | 2 +- .../CompactRangeOptionsTest.java | 4 +- .../CompactionFilterFactoryTest.java | 4 +- .../CompactionJobInfoTest.java | 2 +- .../CompactionJobStatsTest.java | 2 +- .../CompactionOptionsFIFOTest.java | 2 +- .../CompactionOptionsTest.java | 2 +- .../CompactionOptionsUniversalTest.java | 2 +- .../CompactionPriorityTest.java | 2 +- .../CompactionStopStyleTest.java | 2 +- .../ComparatorOptionsTest.java | 2 +- .../CompressionOptionsTest.java | 2 +- .../CompressionTypesTest.java | 2 +- .../ConcurrentTaskLimiterTest.java | 2 +- .../{rocksdb => forstdb}/DBOptionsTest.java | 2 +- .../{rocksdb => forstdb}/DefaultEnvTest.java | 2 +- .../{rocksdb => forstdb}/DirectSliceTest.java | 2 +- .../{rocksdb => forstdb}/EnvOptionsTest.java | 2 +- .../EventListenerTest.java | 6 +- .../org/{rocksdb => forstdb}/FilterTest.java | 2 +- .../FlinkCompactionFilterTest.java | 6 +- .../FlushOptionsTest.java | 2 +- .../org/{rocksdb => forstdb}/FlushTest.java | 2 +- .../HyperClockCacheTest.java | 2 +- .../ImportColumnFamilyTest.java | 4 +- .../InfoLogLevelTest.java | 4 +- .../IngestExternalFileOptionsTest.java | 2 +- .../{rocksdb => forstdb}/KeyExistsTest.java | 2 +- .../{rocksdb => forstdb}/KeyMayExistTest.java | 2 +- .../{rocksdb => forstdb}/LRUCacheTest.java | 2 +- .../org/{rocksdb => forstdb}/LoggerTest.java | 2 +- .../{rocksdb => forstdb}/MemTableTest.java | 2 +- .../{rocksdb => forstdb}/MemoryUtilTest.java | 2 +- .../MergeCFVariantsTest.java | 6 +- .../org/{rocksdb => forstdb}/MergeTest.java | 2 +- .../MergeVariantsTest.java | 6 +- .../MixedOptionsTest.java | 2 +- .../MultiColumnRegressionTest.java | 2 +- .../MultiGetManyKeysTest.java | 2 +- .../{rocksdb => forstdb}/MultiGetTest.java | 4 +- .../MutableColumnFamilyOptionsTest.java | 4 +- .../MutableDBOptionsTest.java | 4 +- .../MutableOptionsGetSetTest.java | 2 +- .../NativeComparatorWrapperTest.java | 2 +- .../NativeLibraryLoaderTest.java | 4 +- .../OptimisticTransactionDBTest.java | 2 +- .../OptimisticTransactionOptionsTest.java | 4 +- .../OptimisticTransactionTest.java | 2 +- .../org/{rocksdb => forstdb}/OptionsTest.java | 4 +- .../{rocksdb => forstdb}/OptionsUtilTest.java | 2 +- .../{rocksdb => forstdb}/PerfContextTest.java | 2 +- .../{rocksdb => forstdb}/PerfLevelTest.java | 4 +- .../PlainTableConfigTest.java | 2 +- .../PlatformRandomHelper.java | 2 +- .../PutCFVariantsTest.java | 6 +- .../PutMultiplePartsTest.java | 2 +- .../{rocksdb => forstdb}/PutVariantsTest.java | 6 +- .../{rocksdb => forstdb}/RateLimiterTest.java | 4 +- .../{rocksdb => forstdb}/ReadOnlyTest.java | 2 +- .../{rocksdb => forstdb}/ReadOptionsTest.java | 2 +- .../RocksDBExceptionTest.java | 6 +- .../org/{rocksdb => forstdb}/RocksDBTest.java | 2 +- .../RocksIteratorTest.java | 2 +- .../{rocksdb => forstdb}/RocksMemEnvTest.java | 2 +- .../RocksNativeLibraryResource.java | 2 +- .../{rocksdb => forstdb}/SecondaryDBTest.java | 2 +- .../org/{rocksdb => forstdb}/SliceTest.java | 2 +- .../{rocksdb => forstdb}/SnapshotTest.java | 2 +- .../SstFileManagerTest.java | 2 +- .../SstFileReaderTest.java | 4 +- .../SstFileWriterTest.java | 4 +- .../SstPartitionerTest.java | 2 +- .../StatisticsCollectorTest.java | 2 +- .../{rocksdb => forstdb}/StatisticsTest.java | 2 +- .../StatsCallbackMock.java | 2 +- .../{rocksdb => forstdb}/TableFilterTest.java | 2 +- .../{rocksdb => forstdb}/TimedEnvTest.java | 2 +- .../TransactionDBOptionsTest.java | 2 +- .../TransactionDBTest.java | 2 +- .../TransactionLogIteratorTest.java | 2 +- .../TransactionOptionsTest.java | 2 +- .../{rocksdb => forstdb}/TransactionTest.java | 2 +- .../org/{rocksdb => forstdb}/TtlDBTest.java | 2 +- .../java/org/{rocksdb => forstdb}/Types.java | 2 +- .../VerifyChecksumsTest.java | 2 +- .../WALRecoveryModeTest.java | 2 +- .../{rocksdb => forstdb}/WalFilterTest.java | 6 +- .../WriteBatchHandlerTest.java | 8 +- .../{rocksdb => forstdb}/WriteBatchTest.java | 20 +- .../WriteBatchThreadedTest.java | 2 +- .../WriteBatchWithIndexTest.java | 4 +- .../WriteOptionsTest.java | 2 +- .../flink/FlinkEnvTest.java | 6 +- ...moveEmptyValueCompactionFilterFactory.java | 8 +- .../test/RocksJunitRunner.java | 6 +- .../test/TestableEventListener.java | 4 +- .../util/ByteBufferAllocator.java | 2 +- .../util/BytewiseComparatorIntTest.java | 4 +- .../util/BytewiseComparatorTest.java | 6 +- .../util/CapturingWriteBatchHandler.java | 6 +- .../util/DirectByteBufferAllocator.java | 2 +- .../util/EnvironmentTest.java | 2 +- .../util/HeapByteBufferAllocator.java | 2 +- .../util/IntComparatorTest.java | 4 +- .../util/JNIComparatorTest.java | 4 +- .../ReverseBytewiseComparatorIntTest.java | 4 +- .../util/SizeUnitTest.java | 2 +- .../{rocksdb => forstdb}/util/TestUtil.java | 8 +- .../util/WriteBatchGetter.java | 6 +- logging/auto_roll_logger.cc | 2 +- src.mk | 170 +- 536 files changed, 18581 insertions(+), 4944 deletions(-) rename java/{rocksjni => forstjni}/backup_engine_options.cc (77%) rename java/{rocksjni => forstjni}/backupenginejni.cc (86%) rename java/{rocksjni => forstjni}/cache.cc (78%) rename java/{rocksjni => forstjni}/cassandra_compactionfilterjni.cc (78%) rename java/{rocksjni => forstjni}/cassandra_value_operator.cc (77%) rename java/{rocksjni => forstjni}/checkpoint.cc (85%) rename java/{rocksjni => forstjni}/clock_cache.cc (81%) rename java/{rocksjni => forstjni}/columnfamilyhandle.cc (80%) rename java/{rocksjni => forstjni}/compact_range_options.cc (70%) rename java/{rocksjni => forstjni}/compaction_filter.cc (83%) rename java/{rocksjni => forstjni}/compaction_filter_factory.cc (71%) rename java/{rocksjni => forstjni}/compaction_filter_factory_jnicallback.cc (96%) rename java/{rocksjni => forstjni}/compaction_filter_factory_jnicallback.h (97%) rename java/{rocksjni => forstjni}/compaction_job_info.cc (79%) rename java/{rocksjni => forstjni}/compaction_job_stats.cc (74%) rename java/{rocksjni => forstjni}/compaction_options.cc (74%) rename java/{rocksjni => forstjni}/compaction_options_fifo.cc (73%) rename java/{rocksjni => forstjni}/compaction_options_universal.cc (71%) rename java/{rocksjni => forstjni}/comparator.cc (71%) rename java/{rocksjni => forstjni}/comparatorjnicallback.cc (99%) rename java/{rocksjni => forstjni}/comparatorjnicallback.h (99%) rename java/{rocksjni => forstjni}/compression_options.cc (73%) rename java/{rocksjni => forstjni}/concurrent_task_limiter.cc (75%) rename java/{rocksjni => forstjni}/config_options.cc (76%) rename java/{rocksjni => forstjni}/cplusplus_to_java_convert.h (100%) rename java/{rocksjni => forstjni}/env.cc (78%) rename java/{rocksjni => forstjni}/env_flink.cc (87%) rename java/{rocksjni => forstjni}/env_flink_test_suite.cc (84%) rename java/{rocksjni => forstjni}/env_options.cc (72%) rename java/{rocksjni => forstjni}/event_listener.cc (74%) rename java/{rocksjni => forstjni}/event_listener_jnicallback.cc (99%) rename java/{rocksjni => forstjni}/event_listener_jnicallback.h (99%) rename java/{rocksjni => forstjni}/export_import_files_metadatajni.cc (67%) rename java/{rocksjni => forstjni}/filter.cc (76%) rename java/{rocksjni => forstjni}/flink_compactionfilterjni.cc (94%) rename java/{rocksjni => forstjni}/hyper_clock_cache.cc (78%) rename java/{rocksjni => forstjni}/import_column_family_options.cc (71%) rename java/{rocksjni => forstjni}/ingest_external_file_options.cc (73%) rename java/{rocksjni => forstjni}/iterator.cc (82%) rename java/{rocksjni => forstjni}/jni_perf_context.cc (75%) rename java/{rocksjni => forstjni}/jnicallback.cc (96%) rename java/{rocksjni => forstjni}/jnicallback.h (100%) rename java/{rocksjni => forstjni}/kv_helper.h (99%) rename java/{rocksjni => forstjni}/loggerjnicallback.cc (92%) rename java/{rocksjni => forstjni}/loggerjnicallback.h (97%) rename java/{rocksjni => forstjni}/lru_cache.cc (78%) rename java/{rocksjni => forstjni}/memory_util.cc (95%) rename java/{rocksjni => forstjni}/memtablejni.cc (79%) rename java/{rocksjni => forstjni}/merge_operator.cc (80%) rename java/{rocksjni => forstjni}/native_comparator_wrapper_test.cc (83%) rename java/{rocksjni => forstjni}/optimistic_transaction_db.cc (88%) rename java/{rocksjni => forstjni}/optimistic_transaction_options.cc (72%) rename java/{rocksjni => forstjni}/options.cc (76%) rename java/{rocksjni => forstjni}/options_util.cc (93%) rename java/{rocksjni => forstjni}/persistent_cache.cc (85%) rename java/{rocksjni => forstjni}/portal.h (99%) rename java/{rocksjni => forstjni}/ratelimiterjni.cc (79%) rename java/{rocksjni => forstjni}/remove_emptyvalue_compactionfilterjni.cc (75%) rename java/{rocksjni => forstjni}/restorejni.cc (76%) rename java/{rocksjni => forstjni}/rocks_callback_object.cc (87%) rename java/{rocksjni => forstjni}/rocksdb_exception_test.cc (72%) rename java/{rocksjni => forstjni}/rocksjni.cc (92%) rename java/{rocksjni => forstjni}/slice.cc (79%) rename java/{rocksjni => forstjni}/snapshot.cc (81%) rename java/{rocksjni => forstjni}/sst_file_manager.cc (84%) rename java/{rocksjni => forstjni}/sst_file_reader_iterator.cc (82%) rename java/{rocksjni => forstjni}/sst_file_readerjni.cc (82%) rename java/{rocksjni => forstjni}/sst_file_writerjni.cc (86%) rename java/{rocksjni => forstjni}/sst_partitioner.cc (74%) rename java/{rocksjni => forstjni}/statistics.cc (81%) rename java/{rocksjni => forstjni}/statisticsjni.cc (96%) rename java/{rocksjni => forstjni}/statisticsjni.h (100%) rename java/{rocksjni => forstjni}/table.cc (94%) rename java/{rocksjni => forstjni}/table_filter.cc (72%) rename java/{rocksjni => forstjni}/table_filter_jnicallback.cc (96%) rename java/{rocksjni => forstjni}/table_filter_jnicallback.h (96%) rename java/{rocksjni => forstjni}/testable_event_listener.cc (98%) rename java/{rocksjni => forstjni}/thread_status.cc (83%) rename java/{rocksjni => forstjni}/trace_writer.cc (72%) rename java/{rocksjni => forstjni}/trace_writer_jnicallback.cc (97%) rename java/{rocksjni => forstjni}/trace_writer_jnicallback.h (96%) rename java/{rocksjni => forstjni}/transaction.cc (88%) rename java/{rocksjni => forstjni}/transaction_db.cc (91%) rename java/{rocksjni => forstjni}/transaction_db_options.cc (75%) rename java/{rocksjni => forstjni}/transaction_log.cc (77%) rename java/{rocksjni => forstjni}/transaction_notifier.cc (76%) rename java/{rocksjni => forstjni}/transaction_notifier_jnicallback.cc (90%) rename java/{rocksjni => forstjni}/transaction_notifier_jnicallback.h (97%) rename java/{rocksjni => forstjni}/transaction_options.cc (75%) rename java/{rocksjni => forstjni}/ttl.cc (91%) rename java/{rocksjni => forstjni}/wal_filter.cc (71%) rename java/{rocksjni => forstjni}/wal_filter_jnicallback.cc (97%) rename java/{rocksjni => forstjni}/wal_filter_jnicallback.h (97%) rename java/{rocksjni => forstjni}/write_batch.cc (83%) rename java/{rocksjni => forstjni}/write_batch_test.cc (90%) rename java/{rocksjni => forstjni}/write_batch_with_index.cc (84%) rename java/{rocksjni => forstjni}/write_buffer_manager.cc (81%) rename java/{rocksjni => forstjni}/writebatchhandlerjnicallback.cc (99%) rename java/{rocksjni => forstjni}/writebatchhandlerjnicallback.h (99%) create mode 100644 java/include/org_forstdb_AbstractCompactionFilter.h create mode 100644 java/include/org_forstdb_AbstractCompactionFilterFactory.h create mode 100644 java/include/org_forstdb_AbstractComparator.h create mode 100644 java/include/org_forstdb_AbstractEventListener.h create mode 100644 java/include/org_forstdb_AbstractSlice.h create mode 100644 java/include/org_forstdb_AbstractTableFilter.h create mode 100644 java/include/org_forstdb_AbstractTraceWriter.h create mode 100644 java/include/org_forstdb_AbstractTransactionNotifier.h create mode 100644 java/include/org_forstdb_AbstractWalFilter.h create mode 100644 java/include/org_forstdb_BackupEngine.h create mode 100644 java/include/org_forstdb_BackupEngineOptions.h create mode 100644 java/include/org_forstdb_BlockBasedTableConfig.h create mode 100644 java/include/org_forstdb_BloomFilter.h create mode 100644 java/include/org_forstdb_Cache.h create mode 100644 java/include/org_forstdb_CassandraCompactionFilter.h create mode 100644 java/include/org_forstdb_CassandraValueMergeOperator.h create mode 100644 java/include/org_forstdb_Checkpoint.h create mode 100644 java/include/org_forstdb_ClockCache.h create mode 100644 java/include/org_forstdb_ColumnFamilyHandle.h create mode 100644 java/include/org_forstdb_ColumnFamilyOptions.h create mode 100644 java/include/org_forstdb_CompactRangeOptions.h create mode 100644 java/include/org_forstdb_CompactionJobInfo.h create mode 100644 java/include/org_forstdb_CompactionJobStats.h create mode 100644 java/include/org_forstdb_CompactionOptions.h create mode 100644 java/include/org_forstdb_CompactionOptionsFIFO.h create mode 100644 java/include/org_forstdb_CompactionOptionsUniversal.h create mode 100644 java/include/org_forstdb_ComparatorOptions.h create mode 100644 java/include/org_forstdb_CompressionOptions.h create mode 100644 java/include/org_forstdb_ConcurrentTaskLimiterImpl.h create mode 100644 java/include/org_forstdb_ConfigOptions.h create mode 100644 java/include/org_forstdb_DBOptions.h create mode 100644 java/include/org_forstdb_DirectSlice.h create mode 100644 java/include/org_forstdb_Env.h create mode 100644 java/include/org_forstdb_EnvFlinkTestSuite.h create mode 100644 java/include/org_forstdb_EnvOptions.h create mode 100644 java/include/org_forstdb_ExportImportFilesMetaData.h create mode 100644 java/include/org_forstdb_Filter.h create mode 100644 java/include/org_forstdb_FlinkCompactionFilter.h create mode 100644 java/include/org_forstdb_FlinkEnv.h create mode 100644 java/include/org_forstdb_FlushOptions.h create mode 100644 java/include/org_forstdb_HashLinkedListMemTableConfig.h create mode 100644 java/include/org_forstdb_HashSkipListMemTableConfig.h create mode 100644 java/include/org_forstdb_HyperClockCache.h create mode 100644 java/include/org_forstdb_ImportColumnFamilyOptions.h create mode 100644 java/include/org_forstdb_IngestExternalFileOptions.h create mode 100644 java/include/org_forstdb_LRUCache.h create mode 100644 java/include/org_forstdb_LiveFileMetaData.h create mode 100644 java/include/org_forstdb_Logger.h create mode 100644 java/include/org_forstdb_MemoryUtil.h create mode 100644 java/include/org_forstdb_NativeComparatorWrapper.h create mode 100644 java/include/org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h create mode 100644 java/include/org_forstdb_OptimisticTransactionDB.h create mode 100644 java/include/org_forstdb_OptimisticTransactionOptions.h create mode 100644 java/include/org_forstdb_Options.h create mode 100644 java/include/org_forstdb_OptionsUtil.h create mode 100644 java/include/org_forstdb_PerfContext.h create mode 100644 java/include/org_forstdb_PersistentCache.h create mode 100644 java/include/org_forstdb_PlainTableConfig.h create mode 100644 java/include/org_forstdb_RateLimiter.h create mode 100644 java/include/org_forstdb_ReadOptions.h create mode 100644 java/include/org_forstdb_RemoveEmptyValueCompactionFilter.h create mode 100644 java/include/org_forstdb_RestoreOptions.h create mode 100644 java/include/org_forstdb_RocksCallbackObject.h create mode 100644 java/include/org_forstdb_RocksDB.h create mode 100644 java/include/org_forstdb_RocksDBExceptionTest.h create mode 100644 java/include/org_forstdb_RocksEnv.h create mode 100644 java/include/org_forstdb_RocksIterator.h create mode 100644 java/include/org_forstdb_RocksMemEnv.h create mode 100644 java/include/org_forstdb_SkipListMemTableConfig.h create mode 100644 java/include/org_forstdb_Slice.h create mode 100644 java/include/org_forstdb_Snapshot.h create mode 100644 java/include/org_forstdb_SstFileManager.h create mode 100644 java/include/org_forstdb_SstFileReader.h create mode 100644 java/include/org_forstdb_SstFileReaderIterator.h create mode 100644 java/include/org_forstdb_SstFileWriter.h create mode 100644 java/include/org_forstdb_SstPartitionerFixedPrefixFactory.h create mode 100644 java/include/org_forstdb_Statistics.h create mode 100644 java/include/org_forstdb_StringAppendOperator.h create mode 100644 java/include/org_forstdb_ThreadStatus.h create mode 100644 java/include/org_forstdb_TimedEnv.h create mode 100644 java/include/org_forstdb_Transaction.h create mode 100644 java/include/org_forstdb_TransactionDB.h create mode 100644 java/include/org_forstdb_TransactionDBOptions.h create mode 100644 java/include/org_forstdb_TransactionLogIterator.h create mode 100644 java/include/org_forstdb_TransactionOptions.h create mode 100644 java/include/org_forstdb_TtlDB.h create mode 100644 java/include/org_forstdb_UInt64AddOperator.h create mode 100644 java/include/org_forstdb_VectorMemTableConfig.h create mode 100644 java/include/org_forstdb_WBWIRocksIterator.h create mode 100644 java/include/org_forstdb_WriteBatch.h create mode 100644 java/include/org_forstdb_WriteBatchTest.h create mode 100644 java/include/org_forstdb_WriteBatchTestInternalHelper.h create mode 100644 java/include/org_forstdb_WriteBatchWithIndex.h create mode 100644 java/include/org_forstdb_WriteBatch_Handler.h create mode 100644 java/include/org_forstdb_WriteBufferManager.h create mode 100644 java/include/org_forstdb_WriteOptions.h create mode 100644 java/include/org_forstdb_test_TestableEventListener.h rename java/src/main/java/org/{rocksdb => forstdb}/AbstractCompactionFilter.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractCompactionFilterFactory.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractComparator.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractComparatorJniBridge.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractEventListener.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractImmutableNativeReference.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractMutableOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractNativeReference.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractRocksIterator.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractSlice.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractTableFilter.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractTraceWriter.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractTransactionNotifier.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractWalFilter.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AbstractWriteBatch.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/AccessHint.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AdvancedColumnFamilyOptionsInterface.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/AdvancedMutableColumnFamilyOptionsInterface.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/BackgroundErrorReason.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/BackupEngine.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/BackupEngineOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/BackupInfo.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/BlockBasedTableConfig.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/BloomFilter.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/BuiltinComparator.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/ByteBufferGetStatus.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/Cache.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/CassandraCompactionFilter.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/CassandraValueMergeOperator.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/Checkpoint.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ChecksumType.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/ClockCache.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ColumnFamilyDescriptor.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ColumnFamilyHandle.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ColumnFamilyMetaData.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/ColumnFamilyOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ColumnFamilyOptionsInterface.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactRangeOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionJobInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionJobStats.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionOptionsFIFO.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionOptionsUniversal.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionPriority.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionReason.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionStopStyle.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/CompactionStyle.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ComparatorOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ComparatorType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/CompressionOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/CompressionType.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ConcurrentTaskLimiter.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/ConcurrentTaskLimiterImpl.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/ConfigOptions.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/DBOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/DBOptionsInterface.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/DataBlockIndexType.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/DbPath.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/DirectSlice.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/EncodingType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/Env.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/EnvFlinkTestSuite.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/EnvOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/EventListener.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/Experimental.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/ExportImportFilesMetaData.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/ExternalFileIngestionInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/FileOperationInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/Filter.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/FilterPolicyType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/FlinkCompactionFilter.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/FlinkEnv.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/FlushJobInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/FlushOptions.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/FlushReason.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/GetStatus.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/HashLinkedListMemTableConfig.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/HashSkipListMemTableConfig.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/HistogramData.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/HistogramType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/Holder.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/HyperClockCache.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/ImportColumnFamilyOptions.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/IndexShorteningMode.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/IndexType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/InfoLogLevel.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/IngestExternalFileOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/KeyMayExist.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/LRUCache.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/LevelMetaData.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/LiveFileMetaData.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/LogFile.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/Logger.java (84%) rename java/src/main/java/org/{rocksdb => forstdb}/MemTableConfig.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/MemTableInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/MemoryUsageType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/MemoryUtil.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/MergeOperator.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/MutableColumnFamilyOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/MutableColumnFamilyOptionsInterface.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/MutableDBOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/MutableDBOptionsInterface.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/MutableOptionKey.java (92%) rename java/src/main/java/org/{rocksdb => forstdb}/MutableOptionValue.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/NativeComparatorWrapper.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/NativeLibraryLoader.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/OperationStage.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/OperationType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/OptimisticTransactionDB.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/OptimisticTransactionOptions.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/OptionString.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/Options.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/OptionsUtil.java (90%) rename java/src/main/java/org/{rocksdb => forstdb}/PerfContext.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/PerfLevel.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/PersistentCache.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/PlainTableConfig.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/PrepopulateBlobCache.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/Priority.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/Range.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/RateLimiter.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/RateLimiterMode.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/ReadOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ReadTier.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/RemoveEmptyValueCompactionFilter.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/RestoreOptions.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/ReusedSynchronisationType.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksCallbackObject.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksDB.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksDBException.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksEnv.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksIterator.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksIteratorInterface.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksMemEnv.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksMutableObject.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/RocksObject.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/SanityLevel.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/SizeApproximationFlag.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/SkipListMemTableConfig.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/Slice.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/Snapshot.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/SstFileManager.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/SstFileMetaData.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/SstFileReader.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/SstFileReaderIterator.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/SstFileWriter.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/SstPartitionerFactory.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/SstPartitionerFixedPrefixFactory.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/StateType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/Statistics.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/StatisticsCollector.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/StatisticsCollectorCallback.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/StatsCollectorInput.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/StatsLevel.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/Status.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/StringAppendOperator.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TableFileCreationBriefInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/TableFileCreationInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/TableFileCreationReason.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/TableFileDeletionInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/TableFilter.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TableFormatConfig.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TableProperties.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ThreadStatus.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/ThreadType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/TickerType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/TimedEnv.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TraceOptions.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TraceWriter.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/Transaction.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/TransactionDB.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/TransactionDBOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/TransactionLogIterator.java (89%) rename java/src/main/java/org/{rocksdb => forstdb}/TransactionOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/TransactionalDB.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/TransactionalOptions.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TtlDB.java (97%) rename java/src/main/java/org/{rocksdb => forstdb}/TxnDBWritePolicy.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/UInt64AddOperator.java (96%) rename java/src/main/java/org/{rocksdb => forstdb}/VectorMemTableConfig.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/WALRecoveryMode.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/WBWIRocksIterator.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/WalFileType.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/WalFilter.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/WalProcessingOption.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteBatch.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteBatchInterface.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteBatchWithIndex.java (94%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteBufferManager.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteOptions.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteStallCondition.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/WriteStallInfo.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/util/BufferUtil.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/util/ByteUtil.java (98%) rename java/src/main/java/org/{rocksdb => forstdb}/util/BytewiseComparator.java (95%) rename java/src/main/java/org/{rocksdb => forstdb}/util/Environment.java (99%) rename java/src/main/java/org/{rocksdb => forstdb}/util/IntComparator.java (94%) rename java/src/main/java/org/{rocksdb => forstdb}/util/ReverseBytewiseComparator.java (93%) rename java/src/main/java/org/{rocksdb => forstdb}/util/SizeUnit.java (95%) rename java/src/test/java/org/{rocksdb => forstdb}/AbstractTransactionTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/BackupEngineOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/BackupEngineTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/BlobOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/BlockBasedTableConfigTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/BuiltinComparatorTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/ByteBufferUnsupportedOperationTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/BytewiseComparatorRegressionTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/CheckPointTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/ClockCacheTest.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/ColumnFamilyOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/ColumnFamilyTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactRangeOptionsTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionFilterFactoryTest.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionJobInfoTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionJobStatsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionOptionsFIFOTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionOptionsTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionOptionsUniversalTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionPriorityTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/CompactionStopStyleTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/ComparatorOptionsTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/CompressionOptionsTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/CompressionTypesTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/ConcurrentTaskLimiterTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/DBOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/DefaultEnvTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/DirectSliceTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/EnvOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/EventListenerTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/FilterTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/FlinkCompactionFilterTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/FlushOptionsTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/FlushTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/HyperClockCacheTest.java (95%) rename java/src/test/java/org/{rocksdb => forstdb}/ImportColumnFamilyTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/InfoLogLevelTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/IngestExternalFileOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/KeyExistsTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/KeyMayExistTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/LRUCacheTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/LoggerTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MemTableTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MemoryUtilTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MergeCFVariantsTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/MergeTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MergeVariantsTest.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/MixedOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MultiColumnRegressionTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MultiGetManyKeysTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MultiGetTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MutableColumnFamilyOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/MutableDBOptionsTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/MutableOptionsGetSetTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/NativeComparatorWrapperTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/NativeLibraryLoaderTest.java (95%) rename java/src/test/java/org/{rocksdb => forstdb}/OptimisticTransactionDBTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/OptimisticTransactionOptionsTest.java (94%) rename java/src/test/java/org/{rocksdb => forstdb}/OptimisticTransactionTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/OptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/OptionsUtilTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/PerfContextTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/PerfLevelTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/PlainTableConfigTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/PlatformRandomHelper.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/PutCFVariantsTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/PutMultiplePartsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/PutVariantsTest.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/RateLimiterTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/ReadOnlyTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/ReadOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/RocksDBExceptionTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/RocksDBTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/RocksIteratorTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/RocksMemEnvTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/RocksNativeLibraryResource.java (95%) rename java/src/test/java/org/{rocksdb => forstdb}/SecondaryDBTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/SliceTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/SnapshotTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/SstFileManagerTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/SstFileReaderTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/SstFileWriterTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/SstPartitionerTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/StatisticsCollectorTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/StatisticsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/StatsCallbackMock.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/TableFilterTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/TimedEnvTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/TransactionDBOptionsTest.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/TransactionDBTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/TransactionLogIteratorTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/TransactionOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/TransactionTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/TtlDBTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/Types.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/VerifyChecksumsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/WALRecoveryModeTest.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/WalFilterTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/WriteBatchHandlerTest.java (91%) rename java/src/test/java/org/{rocksdb => forstdb}/WriteBatchTest.java (96%) rename java/src/test/java/org/{rocksdb => forstdb}/WriteBatchThreadedTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/WriteBatchWithIndexTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/WriteOptionsTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/flink/FlinkEnvTest.java (92%) rename java/src/test/java/org/{rocksdb => forstdb}/test/RemoveEmptyValueCompactionFilterFactory.java (77%) rename java/src/test/java/org/{rocksdb => forstdb}/test/RocksJunitRunner.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/test/TestableEventListener.java (90%) rename java/src/test/java/org/{rocksdb => forstdb}/util/ByteBufferAllocator.java (94%) rename java/src/test/java/org/{rocksdb => forstdb}/util/BytewiseComparatorIntTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/util/BytewiseComparatorTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/util/CapturingWriteBatchHandler.java (98%) rename java/src/test/java/org/{rocksdb => forstdb}/util/DirectByteBufferAllocator.java (95%) rename java/src/test/java/org/{rocksdb => forstdb}/util/EnvironmentTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/util/HeapByteBufferAllocator.java (95%) rename java/src/test/java/org/{rocksdb => forstdb}/util/IntComparatorTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/util/JNIComparatorTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/util/ReverseBytewiseComparatorIntTest.java (99%) rename java/src/test/java/org/{rocksdb => forstdb}/util/SizeUnitTest.java (97%) rename java/src/test/java/org/{rocksdb => forstdb}/util/TestUtil.java (93%) rename java/src/test/java/org/{rocksdb => forstdb}/util/WriteBatchGetter.java (97%) diff --git a/Makefile b/Makefile index 93fae2739..bd636c840 100644 --- a/Makefile +++ b/Makefile @@ -2282,14 +2282,14 @@ endif rocksdbjavastaticosx: rocksdbjavastaticosx_archs cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) librocksdbjni-osx-x86_64.jnilib librocksdbjni-osx-arm64.jnilib - cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 rocksdbjavastaticosx_ub: rocksdbjavastaticosx_archs cd java/target; lipo -create -output librocksdbjni-osx.jnilib librocksdbjni-osx-x86_64.jnilib librocksdbjni-osx-arm64.jnilib cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) librocksdbjni-osx.jnilib - cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 rocksdbjavastaticosx_archs: @@ -2327,7 +2327,7 @@ rocksdbjavastatic_javalib: rocksdbjava_jar: cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) - cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 rocksdbjava_javadocs_jar: @@ -2346,14 +2346,14 @@ rocksdbjavastaticrelease: rocksdbjavastaticosx rocksdbjava_javadocs_jar rocksdbj cd java/crossbuild && (vagrant destroy -f || true) && vagrant up linux32 && vagrant halt linux32 && vagrant up linux64 && vagrant halt linux64 && vagrant up linux64-musl && vagrant halt linux64-musl cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib - cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 rocksdbjavastaticreleasedocker: rocksdbjavastaticosx rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md jar -uf java/target/$(ROCKSDB_JAR_ALL) HISTORY*.md cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib librocksdbjni-win64.dll - cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/rocksdb/*.class org/rocksdb/util/*.class + cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 forstjavastaticreleasedocker: rocksdbjavastaticreleasedocker @@ -2467,10 +2467,10 @@ ifeq ($(JAVA_HOME),) endif $(AM_V_GEN)cd java; $(MAKE) javalib; $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) - $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) + $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/forstjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) $(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) - $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/rocksdb/*.class org/rocksdb/util/*.class + $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class $(AM_V_at)openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 jclean: diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 0c8ae6033..0b0383ae0 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -4479,7 +4479,7 @@ TEST_F(DBBasicTest, FailOpenIfLoggerCreationFail) { SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( - "rocksdb::CreateLoggerFromOptions:AfterGetPath", [&](void* arg) { + "forstdb::CreateLoggerFromOptions:AfterGetPath", [&](void* arg) { auto* s = reinterpret_cast(arg); assert(s); *s = Status::IOError("Injected"); diff --git a/db/db_secondary_test.cc b/db/db_secondary_test.cc index 987756906..8353790c3 100644 --- a/db/db_secondary_test.cc +++ b/db/db_secondary_test.cc @@ -130,7 +130,7 @@ TEST_F(DBSecondaryTest, FailOpenIfLoggerCreationFail) { SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->SetCallBack( - "rocksdb::CreateLoggerFromOptions:AfterGetPath", [&](void* arg) { + "forstdb::CreateLoggerFromOptions:AfterGetPath", [&](void* arg) { auto* s = reinterpret_cast(arg); assert(s); *s = Status::IOError("Injected"); diff --git a/include/rocksdb/rocksdb_namespace.h b/include/rocksdb/rocksdb_namespace.h index a339ec2aa..856300003 100644 --- a/include/rocksdb/rocksdb_namespace.h +++ b/include/rocksdb/rocksdb_namespace.h @@ -12,5 +12,5 @@ // Normal logic #ifndef ROCKSDB_NAMESPACE -#define ROCKSDB_NAMESPACE rocksdb +#define ROCKSDB_NAMESPACE forstdb #endif diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index c31083b6f..4c9a8ff8a 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -14,507 +14,507 @@ endif() set(CMAKE_JAVA_COMPILE_FLAGS -source 8) set(JNI_NATIVE_SOURCES - rocksjni/backup_engine_options.cc - rocksjni/backupenginejni.cc - rocksjni/cassandra_compactionfilterjni.cc - rocksjni/cassandra_value_operator.cc - rocksjni/checkpoint.cc - rocksjni/clock_cache.cc - rocksjni/cache.cc - rocksjni/columnfamilyhandle.cc - rocksjni/compaction_filter.cc - rocksjni/compaction_filter_factory.cc - rocksjni/compaction_filter_factory_jnicallback.cc - rocksjni/compaction_job_info.cc - rocksjni/compaction_job_stats.cc - rocksjni/compaction_options.cc - rocksjni/compaction_options_fifo.cc - rocksjni/compaction_options_universal.cc - rocksjni/compact_range_options.cc - rocksjni/comparator.cc - rocksjni/comparatorjnicallback.cc - rocksjni/compression_options.cc - rocksjni/concurrent_task_limiter.cc - rocksjni/config_options.cc - rocksjni/env.cc - rocksjni/env_flink.cc - rocksjni/env_flink_test_suite.cc - rocksjni/env_options.cc - rocksjni/event_listener.cc - rocksjni/event_listener_jnicallback.cc - rocksjni/export_import_files_metadatajni.cc - rocksjni/flink_compactionfilterjni.cc - rocksjni/filter.cc - rocksjni/import_column_family_options.cc - rocksjni/hyper_clock_cache.cc - rocksjni/ingest_external_file_options.cc - rocksjni/iterator.cc - rocksjni/jnicallback.cc - rocksjni/loggerjnicallback.cc - rocksjni/lru_cache.cc - rocksjni/memory_util.cc - rocksjni/memtablejni.cc - rocksjni/merge_operator.cc - rocksjni/native_comparator_wrapper_test.cc - rocksjni/optimistic_transaction_db.cc - rocksjni/optimistic_transaction_options.cc - rocksjni/options.cc - rocksjni/options_util.cc - rocksjni/persistent_cache.cc - rocksjni/jni_perf_context.cc - rocksjni/ratelimiterjni.cc - rocksjni/remove_emptyvalue_compactionfilterjni.cc - rocksjni/restorejni.cc - rocksjni/rocks_callback_object.cc - rocksjni/rocksdb_exception_test.cc - rocksjni/rocksjni.cc - rocksjni/slice.cc - rocksjni/snapshot.cc - rocksjni/sst_file_manager.cc - rocksjni/sst_file_writerjni.cc - rocksjni/sst_file_readerjni.cc - rocksjni/sst_file_reader_iterator.cc - rocksjni/sst_partitioner.cc - rocksjni/statistics.cc - rocksjni/statisticsjni.cc - rocksjni/table.cc - rocksjni/table_filter.cc - rocksjni/table_filter_jnicallback.cc - rocksjni/testable_event_listener.cc - rocksjni/thread_status.cc - rocksjni/trace_writer.cc - rocksjni/trace_writer_jnicallback.cc - rocksjni/transaction.cc - rocksjni/transaction_db.cc - rocksjni/transaction_db_options.cc - rocksjni/transaction_log.cc - rocksjni/transaction_notifier.cc - rocksjni/transaction_notifier_jnicallback.cc - rocksjni/transaction_options.cc - rocksjni/ttl.cc - rocksjni/wal_filter.cc - rocksjni/wal_filter_jnicallback.cc - rocksjni/write_batch.cc - rocksjni/writebatchhandlerjnicallback.cc - rocksjni/write_batch_test.cc - rocksjni/write_batch_with_index.cc - rocksjni/write_buffer_manager.cc + forstjni/backup_engine_options.cc + forstjni/backupenginejni.cc + forstjni/cassandra_compactionfilterjni.cc + forstjni/cassandra_value_operator.cc + forstjni/checkpoint.cc + forstjni/clock_cache.cc + forstjni/cache.cc + forstjni/columnfamilyhandle.cc + forstjni/compaction_filter.cc + forstjni/compaction_filter_factory.cc + forstjni/compaction_filter_factory_jnicallback.cc + forstjni/compaction_job_info.cc + forstjni/compaction_job_stats.cc + forstjni/compaction_options.cc + forstjni/compaction_options_fifo.cc + forstjni/compaction_options_universal.cc + forstjni/compact_range_options.cc + forstjni/comparator.cc + forstjni/comparatorjnicallback.cc + forstjni/compression_options.cc + forstjni/concurrent_task_limiter.cc + forstjni/config_options.cc + forstjni/env.cc + forstjni/env_flink.cc + forstjni/env_flink_test_suite.cc + forstjni/env_options.cc + forstjni/event_listener.cc + forstjni/event_listener_jnicallback.cc + forstjni/export_import_files_metadatajni.cc + forstjni/flink_compactionfilterjni.cc + forstjni/filter.cc + forstjni/import_column_family_options.cc + forstjni/hyper_clock_cache.cc + forstjni/ingest_external_file_options.cc + forstjni/iterator.cc + forstjni/jnicallback.cc + forstjni/loggerjnicallback.cc + forstjni/lru_cache.cc + forstjni/memory_util.cc + forstjni/memtablejni.cc + forstjni/merge_operator.cc + forstjni/native_comparator_wrapper_test.cc + forstjni/optimistic_transaction_db.cc + forstjni/optimistic_transaction_options.cc + forstjni/options.cc + forstjni/options_util.cc + forstjni/persistent_cache.cc + forstjni/jni_perf_context.cc + forstjni/ratelimiterjni.cc + forstjni/remove_emptyvalue_compactionfilterjni.cc + forstjni/restorejni.cc + forstjni/rocks_callback_object.cc + forstjni/rocksdb_exception_test.cc + forstjni/rocksjni.cc + forstjni/slice.cc + forstjni/snapshot.cc + forstjni/sst_file_manager.cc + forstjni/sst_file_writerjni.cc + forstjni/sst_file_readerjni.cc + forstjni/sst_file_reader_iterator.cc + forstjni/sst_partitioner.cc + forstjni/statistics.cc + forstjni/statisticsjni.cc + forstjni/table.cc + forstjni/table_filter.cc + forstjni/table_filter_jnicallback.cc + forstjni/testable_event_listener.cc + forstjni/thread_status.cc + forstjni/trace_writer.cc + forstjni/trace_writer_jnicallback.cc + forstjni/transaction.cc + forstjni/transaction_db.cc + forstjni/transaction_db_options.cc + forstjni/transaction_log.cc + forstjni/transaction_notifier.cc + forstjni/transaction_notifier_jnicallback.cc + forstjni/transaction_options.cc + forstjni/ttl.cc + forstjni/wal_filter.cc + forstjni/wal_filter_jnicallback.cc + forstjni/write_batch.cc + forstjni/writebatchhandlerjnicallback.cc + forstjni/write_batch_test.cc + forstjni/write_batch_with_index.cc + forstjni/write_buffer_manager.cc ) set(JAVA_MAIN_CLASSES - src/main/java/org/rocksdb/AbstractCompactionFilter.java - src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java - src/main/java/org/rocksdb/AbstractComparator.java - src/main/java/org/rocksdb/AbstractEventListener.java - src/main/java/org/rocksdb/AbstractImmutableNativeReference.java - src/main/java/org/rocksdb/AbstractMutableOptions.java - src/main/java/org/rocksdb/AbstractNativeReference.java - src/main/java/org/rocksdb/AbstractRocksIterator.java - src/main/java/org/rocksdb/AbstractSlice.java - src/main/java/org/rocksdb/AbstractTableFilter.java - src/main/java/org/rocksdb/AbstractTraceWriter.java - src/main/java/org/rocksdb/AbstractTransactionNotifier.java - src/main/java/org/rocksdb/AbstractWalFilter.java - src/main/java/org/rocksdb/AbstractWriteBatch.java - src/main/java/org/rocksdb/AccessHint.java - src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/BackgroundErrorReason.java - src/main/java/org/rocksdb/BackupEngineOptions.java - src/main/java/org/rocksdb/BackupEngine.java - src/main/java/org/rocksdb/BackupInfo.java - src/main/java/org/rocksdb/BlockBasedTableConfig.java - src/main/java/org/rocksdb/BloomFilter.java - src/main/java/org/rocksdb/BuiltinComparator.java - src/main/java/org/rocksdb/ByteBufferGetStatus.java - src/main/java/org/rocksdb/Cache.java - src/main/java/org/rocksdb/CassandraCompactionFilter.java - src/main/java/org/rocksdb/CassandraValueMergeOperator.java - src/main/java/org/rocksdb/Checkpoint.java - src/main/java/org/rocksdb/ChecksumType.java - src/main/java/org/rocksdb/ClockCache.java - src/main/java/org/rocksdb/ColumnFamilyDescriptor.java - src/main/java/org/rocksdb/ColumnFamilyHandle.java - src/main/java/org/rocksdb/ColumnFamilyMetaData.java - src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/ColumnFamilyOptions.java - src/main/java/org/rocksdb/CompactionJobInfo.java - src/main/java/org/rocksdb/CompactionJobStats.java - src/main/java/org/rocksdb/CompactionOptions.java - src/main/java/org/rocksdb/CompactionOptionsFIFO.java - src/main/java/org/rocksdb/CompactionOptionsUniversal.java - src/main/java/org/rocksdb/CompactionPriority.java - src/main/java/org/rocksdb/CompactionReason.java - src/main/java/org/rocksdb/CompactRangeOptions.java - src/main/java/org/rocksdb/CompactionStopStyle.java - src/main/java/org/rocksdb/CompactionStyle.java - src/main/java/org/rocksdb/ComparatorOptions.java - src/main/java/org/rocksdb/ComparatorType.java - src/main/java/org/rocksdb/CompressionOptions.java - src/main/java/org/rocksdb/CompressionType.java - src/main/java/org/rocksdb/ConfigOptions.java - src/main/java/org/rocksdb/DataBlockIndexType.java - src/main/java/org/rocksdb/DBOptionsInterface.java - src/main/java/org/rocksdb/DBOptions.java - src/main/java/org/rocksdb/DbPath.java - src/main/java/org/rocksdb/DirectSlice.java - src/main/java/org/rocksdb/EncodingType.java - src/main/java/org/rocksdb/Env.java - src/main/java/org/rocksdb/EnvFlinkTestSuite.java - src/main/java/org/rocksdb/EnvOptions.java - src/main/java/org/rocksdb/EventListener.java - src/main/java/org/rocksdb/Experimental.java - src/main/java/org/rocksdb/ExportImportFilesMetaData.java - src/main/java/org/rocksdb/ExternalFileIngestionInfo.java - src/main/java/org/rocksdb/Filter.java - src/main/java/org/rocksdb/FilterPolicyType.java - src/main/java/org/rocksdb/FileOperationInfo.java - src/main/java/org/rocksdb/FlinkCompactionFilter.java - src/main/java/org/rocksdb/FlinkEnv.java - src/main/java/org/rocksdb/FlushJobInfo.java - src/main/java/org/rocksdb/FlushReason.java - src/main/java/org/rocksdb/FlushOptions.java - src/main/java/org/rocksdb/GetStatus.java - src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java - src/main/java/org/rocksdb/HashSkipListMemTableConfig.java - src/main/java/org/rocksdb/HistogramData.java - src/main/java/org/rocksdb/HistogramType.java - src/main/java/org/rocksdb/Holder.java - src/main/java/org/rocksdb/ImportColumnFamilyOptions.java - src/main/java/org/rocksdb/HyperClockCache.java - src/main/java/org/rocksdb/IndexShorteningMode.java - src/main/java/org/rocksdb/IndexType.java - src/main/java/org/rocksdb/InfoLogLevel.java - src/main/java/org/rocksdb/IngestExternalFileOptions.java - src/main/java/org/rocksdb/LevelMetaData.java - src/main/java/org/rocksdb/ConcurrentTaskLimiter.java - src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java - src/main/java/org/rocksdb/KeyMayExist.java - src/main/java/org/rocksdb/LiveFileMetaData.java - src/main/java/org/rocksdb/LogFile.java - src/main/java/org/rocksdb/Logger.java - src/main/java/org/rocksdb/LRUCache.java - src/main/java/org/rocksdb/MemoryUsageType.java - src/main/java/org/rocksdb/MemoryUtil.java - src/main/java/org/rocksdb/MemTableConfig.java - src/main/java/org/rocksdb/MemTableInfo.java - src/main/java/org/rocksdb/MergeOperator.java - src/main/java/org/rocksdb/MutableColumnFamilyOptions.java - src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java - src/main/java/org/rocksdb/MutableDBOptions.java - src/main/java/org/rocksdb/MutableDBOptionsInterface.java - src/main/java/org/rocksdb/MutableOptionKey.java - src/main/java/org/rocksdb/MutableOptionValue.java - src/main/java/org/rocksdb/NativeComparatorWrapper.java - src/main/java/org/rocksdb/NativeLibraryLoader.java - src/main/java/org/rocksdb/OperationStage.java - src/main/java/org/rocksdb/OperationType.java - src/main/java/org/rocksdb/OptimisticTransactionDB.java - src/main/java/org/rocksdb/OptimisticTransactionOptions.java - src/main/java/org/rocksdb/Options.java - src/main/java/org/rocksdb/OptionString.java - src/main/java/org/rocksdb/OptionsUtil.java - src/main/java/org/rocksdb/PersistentCache.java - src/main/java/org/rocksdb/PerfContext.java - src/main/java/org/rocksdb/PerfLevel.java - src/main/java/org/rocksdb/PlainTableConfig.java - src/main/java/org/rocksdb/PrepopulateBlobCache.java - src/main/java/org/rocksdb/Priority.java - src/main/java/org/rocksdb/Range.java - src/main/java/org/rocksdb/RateLimiter.java - src/main/java/org/rocksdb/RateLimiterMode.java - src/main/java/org/rocksdb/ReadOptions.java - src/main/java/org/rocksdb/ReadTier.java - src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java - src/main/java/org/rocksdb/RestoreOptions.java - src/main/java/org/rocksdb/ReusedSynchronisationType.java - src/main/java/org/rocksdb/RocksCallbackObject.java - src/main/java/org/rocksdb/RocksDBException.java - src/main/java/org/rocksdb/RocksDB.java - src/main/java/org/rocksdb/RocksEnv.java - src/main/java/org/rocksdb/RocksIteratorInterface.java - src/main/java/org/rocksdb/RocksIterator.java - src/main/java/org/rocksdb/RocksMemEnv.java - src/main/java/org/rocksdb/RocksMutableObject.java - src/main/java/org/rocksdb/RocksObject.java - src/main/java/org/rocksdb/SanityLevel.java - src/main/java/org/rocksdb/SizeApproximationFlag.java - src/main/java/org/rocksdb/SkipListMemTableConfig.java - src/main/java/org/rocksdb/Slice.java - src/main/java/org/rocksdb/Snapshot.java - src/main/java/org/rocksdb/SstFileManager.java - src/main/java/org/rocksdb/SstFileMetaData.java - src/main/java/org/rocksdb/SstFileReader.java - src/main/java/org/rocksdb/SstFileReaderIterator.java - src/main/java/org/rocksdb/SstFileWriter.java - src/main/java/org/rocksdb/SstPartitionerFactory.java - src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java - src/main/java/org/rocksdb/StateType.java - src/main/java/org/rocksdb/StatisticsCollectorCallback.java - src/main/java/org/rocksdb/StatisticsCollector.java - src/main/java/org/rocksdb/Statistics.java - src/main/java/org/rocksdb/StatsCollectorInput.java - src/main/java/org/rocksdb/StatsLevel.java - src/main/java/org/rocksdb/Status.java - src/main/java/org/rocksdb/StringAppendOperator.java - src/main/java/org/rocksdb/TableFileCreationBriefInfo.java - src/main/java/org/rocksdb/TableFileCreationInfo.java - src/main/java/org/rocksdb/TableFileCreationReason.java - src/main/java/org/rocksdb/TableFileDeletionInfo.java - src/main/java/org/rocksdb/TableFilter.java - src/main/java/org/rocksdb/TableProperties.java - src/main/java/org/rocksdb/TableFormatConfig.java - src/main/java/org/rocksdb/ThreadType.java - src/main/java/org/rocksdb/ThreadStatus.java - src/main/java/org/rocksdb/TickerType.java - src/main/java/org/rocksdb/TimedEnv.java - src/main/java/org/rocksdb/TraceOptions.java - src/main/java/org/rocksdb/TraceWriter.java - src/main/java/org/rocksdb/TransactionalDB.java - src/main/java/org/rocksdb/TransactionalOptions.java - src/main/java/org/rocksdb/TransactionDB.java - src/main/java/org/rocksdb/TransactionDBOptions.java - src/main/java/org/rocksdb/Transaction.java - src/main/java/org/rocksdb/TransactionLogIterator.java - src/main/java/org/rocksdb/TransactionOptions.java - src/main/java/org/rocksdb/TtlDB.java - src/main/java/org/rocksdb/TxnDBWritePolicy.java - src/main/java/org/rocksdb/VectorMemTableConfig.java - src/main/java/org/rocksdb/WalFileType.java - src/main/java/org/rocksdb/WalFilter.java - src/main/java/org/rocksdb/WalProcessingOption.java - src/main/java/org/rocksdb/WALRecoveryMode.java - src/main/java/org/rocksdb/WBWIRocksIterator.java - src/main/java/org/rocksdb/WriteBatch.java - src/main/java/org/rocksdb/WriteBatchInterface.java - src/main/java/org/rocksdb/WriteBatchWithIndex.java - src/main/java/org/rocksdb/WriteOptions.java - src/main/java/org/rocksdb/WriteBufferManager.java - src/main/java/org/rocksdb/WriteStallCondition.java - src/main/java/org/rocksdb/WriteStallInfo.java - src/main/java/org/rocksdb/util/BufferUtil.java - src/main/java/org/rocksdb/util/ByteUtil.java - src/main/java/org/rocksdb/util/BytewiseComparator.java - src/main/java/org/rocksdb/util/Environment.java - src/main/java/org/rocksdb/util/IntComparator.java - src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java - src/main/java/org/rocksdb/util/SizeUnit.java - src/main/java/org/rocksdb/UInt64AddOperator.java - src/test/java/org/rocksdb/NativeComparatorWrapperTest.java - src/test/java/org/rocksdb/RocksDBExceptionTest.java - src/test/java/org/rocksdb/test/TestableEventListener.java - src/test/java/org/rocksdb/WriteBatchTest.java - src/test/java/org/rocksdb/RocksNativeLibraryResource.java - src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java - src/test/java/org/rocksdb/util/WriteBatchGetter.java + src/main/java/org/forstdb/AbstractCompactionFilter.java + src/main/java/org/forstdb/AbstractCompactionFilterFactory.java + src/main/java/org/forstdb/AbstractComparator.java + src/main/java/org/forstdb/AbstractEventListener.java + src/main/java/org/forstdb/AbstractImmutableNativeReference.java + src/main/java/org/forstdb/AbstractMutableOptions.java + src/main/java/org/forstdb/AbstractNativeReference.java + src/main/java/org/forstdb/AbstractRocksIterator.java + src/main/java/org/forstdb/AbstractSlice.java + src/main/java/org/forstdb/AbstractTableFilter.java + src/main/java/org/forstdb/AbstractTraceWriter.java + src/main/java/org/forstdb/AbstractTransactionNotifier.java + src/main/java/org/forstdb/AbstractWalFilter.java + src/main/java/org/forstdb/AbstractWriteBatch.java + src/main/java/org/forstdb/AccessHint.java + src/main/java/org/forstdb/AdvancedColumnFamilyOptionsInterface.java + src/main/java/org/forstdb/AdvancedMutableColumnFamilyOptionsInterface.java + src/main/java/org/forstdb/BackgroundErrorReason.java + src/main/java/org/forstdb/BackupEngineOptions.java + src/main/java/org/forstdb/BackupEngine.java + src/main/java/org/forstdb/BackupInfo.java + src/main/java/org/forstdb/BlockBasedTableConfig.java + src/main/java/org/forstdb/BloomFilter.java + src/main/java/org/forstdb/BuiltinComparator.java + src/main/java/org/forstdb/ByteBufferGetStatus.java + src/main/java/org/forstdb/Cache.java + src/main/java/org/forstdb/CassandraCompactionFilter.java + src/main/java/org/forstdb/CassandraValueMergeOperator.java + src/main/java/org/forstdb/Checkpoint.java + src/main/java/org/forstdb/ChecksumType.java + src/main/java/org/forstdb/ClockCache.java + src/main/java/org/forstdb/ColumnFamilyDescriptor.java + src/main/java/org/forstdb/ColumnFamilyHandle.java + src/main/java/org/forstdb/ColumnFamilyMetaData.java + src/main/java/org/forstdb/ColumnFamilyOptionsInterface.java + src/main/java/org/forstdb/ColumnFamilyOptions.java + src/main/java/org/forstdb/CompactionJobInfo.java + src/main/java/org/forstdb/CompactionJobStats.java + src/main/java/org/forstdb/CompactionOptions.java + src/main/java/org/forstdb/CompactionOptionsFIFO.java + src/main/java/org/forstdb/CompactionOptionsUniversal.java + src/main/java/org/forstdb/CompactionPriority.java + src/main/java/org/forstdb/CompactionReason.java + src/main/java/org/forstdb/CompactRangeOptions.java + src/main/java/org/forstdb/CompactionStopStyle.java + src/main/java/org/forstdb/CompactionStyle.java + src/main/java/org/forstdb/ComparatorOptions.java + src/main/java/org/forstdb/ComparatorType.java + src/main/java/org/forstdb/CompressionOptions.java + src/main/java/org/forstdb/CompressionType.java + src/main/java/org/forstdb/ConfigOptions.java + src/main/java/org/forstdb/DataBlockIndexType.java + src/main/java/org/forstdb/DBOptionsInterface.java + src/main/java/org/forstdb/DBOptions.java + src/main/java/org/forstdb/DbPath.java + src/main/java/org/forstdb/DirectSlice.java + src/main/java/org/forstdb/EncodingType.java + src/main/java/org/forstdb/Env.java + src/main/java/org/forstdb/EnvFlinkTestSuite.java + src/main/java/org/forstdb/EnvOptions.java + src/main/java/org/forstdb/EventListener.java + src/main/java/org/forstdb/Experimental.java + src/main/java/org/forstdb/ExportImportFilesMetaData.java + src/main/java/org/forstdb/ExternalFileIngestionInfo.java + src/main/java/org/forstdb/Filter.java + src/main/java/org/forstdb/FilterPolicyType.java + src/main/java/org/forstdb/FileOperationInfo.java + src/main/java/org/forstdb/FlinkCompactionFilter.java + src/main/java/org/forstdb/FlinkEnv.java + src/main/java/org/forstdb/FlushJobInfo.java + src/main/java/org/forstdb/FlushReason.java + src/main/java/org/forstdb/FlushOptions.java + src/main/java/org/forstdb/GetStatus.java + src/main/java/org/forstdb/HashLinkedListMemTableConfig.java + src/main/java/org/forstdb/HashSkipListMemTableConfig.java + src/main/java/org/forstdb/HistogramData.java + src/main/java/org/forstdb/HistogramType.java + src/main/java/org/forstdb/Holder.java + src/main/java/org/forstdb/ImportColumnFamilyOptions.java + src/main/java/org/forstdb/HyperClockCache.java + src/main/java/org/forstdb/IndexShorteningMode.java + src/main/java/org/forstdb/IndexType.java + src/main/java/org/forstdb/InfoLogLevel.java + src/main/java/org/forstdb/IngestExternalFileOptions.java + src/main/java/org/forstdb/LevelMetaData.java + src/main/java/org/forstdb/ConcurrentTaskLimiter.java + src/main/java/org/forstdb/ConcurrentTaskLimiterImpl.java + src/main/java/org/forstdb/KeyMayExist.java + src/main/java/org/forstdb/LiveFileMetaData.java + src/main/java/org/forstdb/LogFile.java + src/main/java/org/forstdb/Logger.java + src/main/java/org/forstdb/LRUCache.java + src/main/java/org/forstdb/MemoryUsageType.java + src/main/java/org/forstdb/MemoryUtil.java + src/main/java/org/forstdb/MemTableConfig.java + src/main/java/org/forstdb/MemTableInfo.java + src/main/java/org/forstdb/MergeOperator.java + src/main/java/org/forstdb/MutableColumnFamilyOptions.java + src/main/java/org/forstdb/MutableColumnFamilyOptionsInterface.java + src/main/java/org/forstdb/MutableDBOptions.java + src/main/java/org/forstdb/MutableDBOptionsInterface.java + src/main/java/org/forstdb/MutableOptionKey.java + src/main/java/org/forstdb/MutableOptionValue.java + src/main/java/org/forstdb/NativeComparatorWrapper.java + src/main/java/org/forstdb/NativeLibraryLoader.java + src/main/java/org/forstdb/OperationStage.java + src/main/java/org/forstdb/OperationType.java + src/main/java/org/forstdb/OptimisticTransactionDB.java + src/main/java/org/forstdb/OptimisticTransactionOptions.java + src/main/java/org/forstdb/Options.java + src/main/java/org/forstdb/OptionString.java + src/main/java/org/forstdb/OptionsUtil.java + src/main/java/org/forstdb/PersistentCache.java + src/main/java/org/forstdb/PerfContext.java + src/main/java/org/forstdb/PerfLevel.java + src/main/java/org/forstdb/PlainTableConfig.java + src/main/java/org/forstdb/PrepopulateBlobCache.java + src/main/java/org/forstdb/Priority.java + src/main/java/org/forstdb/Range.java + src/main/java/org/forstdb/RateLimiter.java + src/main/java/org/forstdb/RateLimiterMode.java + src/main/java/org/forstdb/ReadOptions.java + src/main/java/org/forstdb/ReadTier.java + src/main/java/org/forstdb/RemoveEmptyValueCompactionFilter.java + src/main/java/org/forstdb/RestoreOptions.java + src/main/java/org/forstdb/ReusedSynchronisationType.java + src/main/java/org/forstdb/RocksCallbackObject.java + src/main/java/org/forstdb/RocksDBException.java + src/main/java/org/forstdb/RocksDB.java + src/main/java/org/forstdb/RocksEnv.java + src/main/java/org/forstdb/RocksIteratorInterface.java + src/main/java/org/forstdb/RocksIterator.java + src/main/java/org/forstdb/RocksMemEnv.java + src/main/java/org/forstdb/RocksMutableObject.java + src/main/java/org/forstdb/RocksObject.java + src/main/java/org/forstdb/SanityLevel.java + src/main/java/org/forstdb/SizeApproximationFlag.java + src/main/java/org/forstdb/SkipListMemTableConfig.java + src/main/java/org/forstdb/Slice.java + src/main/java/org/forstdb/Snapshot.java + src/main/java/org/forstdb/SstFileManager.java + src/main/java/org/forstdb/SstFileMetaData.java + src/main/java/org/forstdb/SstFileReader.java + src/main/java/org/forstdb/SstFileReaderIterator.java + src/main/java/org/forstdb/SstFileWriter.java + src/main/java/org/forstdb/SstPartitionerFactory.java + src/main/java/org/forstdb/SstPartitionerFixedPrefixFactory.java + src/main/java/org/forstdb/StateType.java + src/main/java/org/forstdb/StatisticsCollectorCallback.java + src/main/java/org/forstdb/StatisticsCollector.java + src/main/java/org/forstdb/Statistics.java + src/main/java/org/forstdb/StatsCollectorInput.java + src/main/java/org/forstdb/StatsLevel.java + src/main/java/org/forstdb/Status.java + src/main/java/org/forstdb/StringAppendOperator.java + src/main/java/org/forstdb/TableFileCreationBriefInfo.java + src/main/java/org/forstdb/TableFileCreationInfo.java + src/main/java/org/forstdb/TableFileCreationReason.java + src/main/java/org/forstdb/TableFileDeletionInfo.java + src/main/java/org/forstdb/TableFilter.java + src/main/java/org/forstdb/TableProperties.java + src/main/java/org/forstdb/TableFormatConfig.java + src/main/java/org/forstdb/ThreadType.java + src/main/java/org/forstdb/ThreadStatus.java + src/main/java/org/forstdb/TickerType.java + src/main/java/org/forstdb/TimedEnv.java + src/main/java/org/forstdb/TraceOptions.java + src/main/java/org/forstdb/TraceWriter.java + src/main/java/org/forstdb/TransactionalDB.java + src/main/java/org/forstdb/TransactionalOptions.java + src/main/java/org/forstdb/TransactionDB.java + src/main/java/org/forstdb/TransactionDBOptions.java + src/main/java/org/forstdb/Transaction.java + src/main/java/org/forstdb/TransactionLogIterator.java + src/main/java/org/forstdb/TransactionOptions.java + src/main/java/org/forstdb/TtlDB.java + src/main/java/org/forstdb/TxnDBWritePolicy.java + src/main/java/org/forstdb/VectorMemTableConfig.java + src/main/java/org/forstdb/WalFileType.java + src/main/java/org/forstdb/WalFilter.java + src/main/java/org/forstdb/WalProcessingOption.java + src/main/java/org/forstdb/WALRecoveryMode.java + src/main/java/org/forstdb/WBWIRocksIterator.java + src/main/java/org/forstdb/WriteBatch.java + src/main/java/org/forstdb/WriteBatchInterface.java + src/main/java/org/forstdb/WriteBatchWithIndex.java + src/main/java/org/forstdb/WriteOptions.java + src/main/java/org/forstdb/WriteBufferManager.java + src/main/java/org/forstdb/WriteStallCondition.java + src/main/java/org/forstdb/WriteStallInfo.java + src/main/java/org/forstdb/util/BufferUtil.java + src/main/java/org/forstdb/util/ByteUtil.java + src/main/java/org/forstdb/util/BytewiseComparator.java + src/main/java/org/forstdb/util/Environment.java + src/main/java/org/forstdb/util/IntComparator.java + src/main/java/org/forstdb/util/ReverseBytewiseComparator.java + src/main/java/org/forstdb/util/SizeUnit.java + src/main/java/org/forstdb/UInt64AddOperator.java + src/test/java/org/forstdb/NativeComparatorWrapperTest.java + src/test/java/org/forstdb/RocksDBExceptionTest.java + src/test/java/org/forstdb/test/TestableEventListener.java + src/test/java/org/forstdb/WriteBatchTest.java + src/test/java/org/forstdb/RocksNativeLibraryResource.java + src/test/java/org/forstdb/util/CapturingWriteBatchHandler.java + src/test/java/org/forstdb/util/WriteBatchGetter.java ) set(JAVA_TEST_CLASSES - src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java - src/test/java/org/rocksdb/EventListenerTest.java - src/test/java/org/rocksdb/CompactionOptionsTest.java - src/test/java/org/rocksdb/PlatformRandomHelper.java - src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java - src/test/java/org/rocksdb/MutableDBOptionsTest.java - src/test/java/org/rocksdb/WriteOptionsTest.java - src/test/java/org/rocksdb/SstPartitionerTest.java - src/test/java/org/rocksdb/RocksMemEnvTest.java - src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java - src/test/java/org/rocksdb/ClockCacheTest.java - src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java - src/test/java/org/rocksdb/SnapshotTest.java - src/test/java/org/rocksdb/CompactionJobStatsTest.java - src/test/java/org/rocksdb/MemTableTest.java - src/test/java/org/rocksdb/CompactionFilterFactoryTest.java - src/test/java/org/rocksdb/DefaultEnvTest.java - src/test/java/org/rocksdb/DBOptionsTest.java - src/test/java/org/rocksdb/RocksIteratorTest.java - src/test/java/org/rocksdb/SliceTest.java - src/test/java/org/rocksdb/MultiGetTest.java - src/test/java/org/rocksdb/ComparatorOptionsTest.java - src/test/java/org/rocksdb/NativeLibraryLoaderTest.java - src/test/java/org/rocksdb/StatisticsTest.java - src/test/java/org/rocksdb/WALRecoveryModeTest.java - src/test/java/org/rocksdb/TransactionLogIteratorTest.java - src/test/java/org/rocksdb/ReadOptionsTest.java - src/test/java/org/rocksdb/SecondaryDBTest.java - src/test/java/org/rocksdb/KeyMayExistTest.java - src/test/java/org/rocksdb/BlobOptionsTest.java - src/test/java/org/rocksdb/InfoLogLevelTest.java - src/test/java/org/rocksdb/CompactionPriorityTest.java - src/test/java/org/rocksdb/FlushOptionsTest.java - src/test/java/org/rocksdb/VerifyChecksumsTest.java - src/test/java/org/rocksdb/MultiColumnRegressionTest.java - src/test/java/org/rocksdb/FlushTest.java - src/test/java/org/rocksdb/HyperClockCacheTest.java - src/test/java/org/rocksdb/PutMultiplePartsTest.java - src/test/java/org/rocksdb/StatisticsCollectorTest.java - src/test/java/org/rocksdb/LRUCacheTest.java - src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java - src/test/java/org/rocksdb/TransactionTest.java - src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java - src/test/java/org/rocksdb/BackupEngineOptionsTest.java - src/test/java/org/rocksdb/CheckPointTest.java - src/test/java/org/rocksdb/PlainTableConfigTest.java - src/test/java/org/rocksdb/TransactionDBOptionsTest.java - src/test/java/org/rocksdb/ReadOnlyTest.java - src/test/java/org/rocksdb/EnvOptionsTest.java - src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java - src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java - src/test/java/org/rocksdb/test/TestableEventListener.java - src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java - src/test/java/org/rocksdb/test/TestableEventListener.java - src/test/java/org/rocksdb/test/RocksJunitRunner.java - src/test/java/org/rocksdb/LoggerTest.java - src/test/java/org/rocksdb/FilterTest.java - src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java - src/test/java/org/rocksdb/util/IntComparatorTest.java - src/test/java/org/rocksdb/util/JNIComparatorTest.java - src/test/java/org/rocksdb/util/ByteBufferAllocator.java - src/test/java/org/rocksdb/util/SizeUnitTest.java - src/test/java/org/rocksdb/util/BytewiseComparatorTest.java - src/test/java/org/rocksdb/util/EnvironmentTest.java - src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java - src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java - src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java - src/test/java/org/rocksdb/util/TestUtil.java - src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java - src/test/java/org/rocksdb/Types.java - src/test/java/org/rocksdb/MixedOptionsTest.java - src/test/java/org/rocksdb/CompactRangeOptionsTest.java - src/test/java/org/rocksdb/SstFileWriterTest.java - src/test/java/org/rocksdb/WalFilterTest.java - src/test/java/org/rocksdb/AbstractTransactionTest.java - src/test/java/org/rocksdb/MergeTest.java - src/test/java/org/rocksdb/OptionsTest.java - src/test/java/org/rocksdb/WriteBatchThreadedTest.java - src/test/java/org/rocksdb/MultiGetManyKeysTest.java - src/test/java/org/rocksdb/TimedEnvTest.java - src/test/java/org/rocksdb/CompactionStopStyleTest.java - src/test/java/org/rocksdb/CompactionJobInfoTest.java - src/test/java/org/rocksdb/BlockBasedTableConfigTest.java - src/test/java/org/rocksdb/BuiltinComparatorTest.java - src/test/java/org/rocksdb/RateLimiterTest.java - src/test/java/org/rocksdb/TransactionOptionsTest.java - src/test/java/org/rocksdb/WriteBatchWithIndexTest.java - src/test/java/org/rocksdb/WriteBatchHandlerTest.java - src/test/java/org/rocksdb/OptimisticTransactionDBTest.java - src/test/java/org/rocksdb/OptionsUtilTest.java - src/test/java/org/rocksdb/OptimisticTransactionTest.java - src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java - src/test/java/org/rocksdb/CompressionOptionsTest.java - src/test/java/org/rocksdb/ColumnFamilyTest.java - src/test/java/org/rocksdb/SstFileReaderTest.java - src/test/java/org/rocksdb/TransactionDBTest.java - src/test/java/org/rocksdb/RocksDBTest.java - src/test/java/org/rocksdb/MutableOptionsGetSetTest.java - src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java - src/test/java/org/rocksdb/SstFileManagerTest.java - src/test/java/org/rocksdb/BackupEngineTest.java - src/test/java/org/rocksdb/DirectSliceTest.java - src/test/java/org/rocksdb/StatsCallbackMock.java - src/test/java/org/rocksdb/CompressionTypesTest.java - src/test/java/org/rocksdb/MemoryUtilTest.java - src/test/java/org/rocksdb/TableFilterTest.java - src/test/java/org/rocksdb/TtlDBTest.java + src/test/java/org/forstdb/ConcurrentTaskLimiterTest.java + src/test/java/org/forstdb/EventListenerTest.java + src/test/java/org/forstdb/CompactionOptionsTest.java + src/test/java/org/forstdb/PlatformRandomHelper.java + src/test/java/org/forstdb/IngestExternalFileOptionsTest.java + src/test/java/org/forstdb/MutableDBOptionsTest.java + src/test/java/org/forstdb/WriteOptionsTest.java + src/test/java/org/forstdb/SstPartitionerTest.java + src/test/java/org/forstdb/RocksMemEnvTest.java + src/test/java/org/forstdb/CompactionOptionsUniversalTest.java + src/test/java/org/forstdb/ClockCacheTest.java + src/test/java/org/forstdb/BytewiseComparatorRegressionTest.java + src/test/java/org/forstdb/SnapshotTest.java + src/test/java/org/forstdb/CompactionJobStatsTest.java + src/test/java/org/forstdb/MemTableTest.java + src/test/java/org/forstdb/CompactionFilterFactoryTest.java + src/test/java/org/forstdb/DefaultEnvTest.java + src/test/java/org/forstdb/DBOptionsTest.java + src/test/java/org/forstdb/RocksIteratorTest.java + src/test/java/org/forstdb/SliceTest.java + src/test/java/org/forstdb/MultiGetTest.java + src/test/java/org/forstdb/ComparatorOptionsTest.java + src/test/java/org/forstdb/NativeLibraryLoaderTest.java + src/test/java/org/forstdb/StatisticsTest.java + src/test/java/org/forstdb/WALRecoveryModeTest.java + src/test/java/org/forstdb/TransactionLogIteratorTest.java + src/test/java/org/forstdb/ReadOptionsTest.java + src/test/java/org/forstdb/SecondaryDBTest.java + src/test/java/org/forstdb/KeyMayExistTest.java + src/test/java/org/forstdb/BlobOptionsTest.java + src/test/java/org/forstdb/InfoLogLevelTest.java + src/test/java/org/forstdb/CompactionPriorityTest.java + src/test/java/org/forstdb/FlushOptionsTest.java + src/test/java/org/forstdb/VerifyChecksumsTest.java + src/test/java/org/forstdb/MultiColumnRegressionTest.java + src/test/java/org/forstdb/FlushTest.java + src/test/java/org/forstdb/HyperClockCacheTest.java + src/test/java/org/forstdb/PutMultiplePartsTest.java + src/test/java/org/forstdb/StatisticsCollectorTest.java + src/test/java/org/forstdb/LRUCacheTest.java + src/test/java/org/forstdb/ColumnFamilyOptionsTest.java + src/test/java/org/forstdb/TransactionTest.java + src/test/java/org/forstdb/CompactionOptionsFIFOTest.java + src/test/java/org/forstdb/BackupEngineOptionsTest.java + src/test/java/org/forstdb/CheckPointTest.java + src/test/java/org/forstdb/PlainTableConfigTest.java + src/test/java/org/forstdb/TransactionDBOptionsTest.java + src/test/java/org/forstdb/ReadOnlyTest.java + src/test/java/org/forstdb/EnvOptionsTest.java + src/test/java/org/forstdb/test/RemoveEmptyValueCompactionFilterFactory.java + src/test/java/org/forstdb/test/RemoveEmptyValueCompactionFilterFactory.java + src/test/java/org/forstdb/test/TestableEventListener.java + src/test/java/org/forstdb/test/RemoveEmptyValueCompactionFilterFactory.java + src/test/java/org/forstdb/test/TestableEventListener.java + src/test/java/org/forstdb/test/RocksJunitRunner.java + src/test/java/org/forstdb/LoggerTest.java + src/test/java/org/forstdb/FilterTest.java + src/test/java/org/forstdb/ByteBufferUnsupportedOperationTest.java + src/test/java/org/forstdb/util/IntComparatorTest.java + src/test/java/org/forstdb/util/JNIComparatorTest.java + src/test/java/org/forstdb/util/ByteBufferAllocator.java + src/test/java/org/forstdb/util/SizeUnitTest.java + src/test/java/org/forstdb/util/BytewiseComparatorTest.java + src/test/java/org/forstdb/util/EnvironmentTest.java + src/test/java/org/forstdb/util/BytewiseComparatorIntTest.java + src/test/java/org/forstdb/util/DirectByteBufferAllocator.java + src/test/java/org/forstdb/util/HeapByteBufferAllocator.java + src/test/java/org/forstdb/util/TestUtil.java + src/test/java/org/forstdb/util/ReverseBytewiseComparatorIntTest.java + src/test/java/org/forstdb/Types.java + src/test/java/org/forstdb/MixedOptionsTest.java + src/test/java/org/forstdb/CompactRangeOptionsTest.java + src/test/java/org/forstdb/SstFileWriterTest.java + src/test/java/org/forstdb/WalFilterTest.java + src/test/java/org/forstdb/AbstractTransactionTest.java + src/test/java/org/forstdb/MergeTest.java + src/test/java/org/forstdb/OptionsTest.java + src/test/java/org/forstdb/WriteBatchThreadedTest.java + src/test/java/org/forstdb/MultiGetManyKeysTest.java + src/test/java/org/forstdb/TimedEnvTest.java + src/test/java/org/forstdb/CompactionStopStyleTest.java + src/test/java/org/forstdb/CompactionJobInfoTest.java + src/test/java/org/forstdb/BlockBasedTableConfigTest.java + src/test/java/org/forstdb/BuiltinComparatorTest.java + src/test/java/org/forstdb/RateLimiterTest.java + src/test/java/org/forstdb/TransactionOptionsTest.java + src/test/java/org/forstdb/WriteBatchWithIndexTest.java + src/test/java/org/forstdb/WriteBatchHandlerTest.java + src/test/java/org/forstdb/OptimisticTransactionDBTest.java + src/test/java/org/forstdb/OptionsUtilTest.java + src/test/java/org/forstdb/OptimisticTransactionTest.java + src/test/java/org/forstdb/MutableColumnFamilyOptionsTest.java + src/test/java/org/forstdb/CompressionOptionsTest.java + src/test/java/org/forstdb/ColumnFamilyTest.java + src/test/java/org/forstdb/SstFileReaderTest.java + src/test/java/org/forstdb/TransactionDBTest.java + src/test/java/org/forstdb/RocksDBTest.java + src/test/java/org/forstdb/MutableOptionsGetSetTest.java + src/test/java/org/forstdb/OptimisticTransactionOptionsTest.java + src/test/java/org/forstdb/SstFileManagerTest.java + src/test/java/org/forstdb/BackupEngineTest.java + src/test/java/org/forstdb/DirectSliceTest.java + src/test/java/org/forstdb/StatsCallbackMock.java + src/test/java/org/forstdb/CompressionTypesTest.java + src/test/java/org/forstdb/MemoryUtilTest.java + src/test/java/org/forstdb/TableFilterTest.java + src/test/java/org/forstdb/TtlDBTest.java ) set(JAVA_TEST_RUNNING_CLASSES - org.rocksdb.ConcurrentTaskLimiterTest - org.rocksdb.EventListenerTest - org.rocksdb.CompactionOptionsTest - org.rocksdb.IngestExternalFileOptionsTest - org.rocksdb.MutableDBOptionsTest - org.rocksdb.WriteOptionsTest - org.rocksdb.SstPartitionerTest - org.rocksdb.RocksMemEnvTest - org.rocksdb.CompactionOptionsUniversalTest - org.rocksdb.ClockCacheTest - # org.rocksdb.BytewiseComparatorRegressionTest - org.rocksdb.SnapshotTest - org.rocksdb.CompactionJobStatsTest - org.rocksdb.MemTableTest - org.rocksdb.CompactionFilterFactoryTest - # org.rocksdb.DefaultEnvTest - org.rocksdb.DBOptionsTest - org.rocksdb.WriteBatchTest - org.rocksdb.RocksIteratorTest - org.rocksdb.SliceTest - org.rocksdb.MultiGetTest - org.rocksdb.ComparatorOptionsTest - # org.rocksdb.NativeLibraryLoaderTest - org.rocksdb.StatisticsTest - org.rocksdb.WALRecoveryModeTest - org.rocksdb.TransactionLogIteratorTest - org.rocksdb.ReadOptionsTest - org.rocksdb.SecondaryDBTest - org.rocksdb.KeyMayExistTest - org.rocksdb.BlobOptionsTest - org.rocksdb.InfoLogLevelTest - org.rocksdb.CompactionPriorityTest - org.rocksdb.FlushOptionsTest - org.rocksdb.VerifyChecksumsTest - org.rocksdb.MultiColumnRegressionTest - org.rocksdb.FlushTest - org.rocksdb.HyperClockCacheTest - org.rocksdb.PutMultiplePartsTest - org.rocksdb.StatisticsCollectorTest - org.rocksdb.LRUCacheTest - org.rocksdb.ColumnFamilyOptionsTest - org.rocksdb.TransactionTest - org.rocksdb.CompactionOptionsFIFOTest - org.rocksdb.BackupEngineOptionsTest - org.rocksdb.CheckPointTest - org.rocksdb.PlainTableConfigTest - # org.rocksdb.TransactionDBOptionsTest - org.rocksdb.ReadOnlyTest - org.rocksdb.EnvOptionsTest - org.rocksdb.LoggerTest - org.rocksdb.FilterTest - # org.rocksdb.ByteBufferUnsupportedOperationTest - # org.rocksdb.util.IntComparatorTest - # org.rocksdb.util.JNIComparatorTest - org.rocksdb.util.SizeUnitTest - # org.rocksdb.util.BytewiseComparatorTest - org.rocksdb.util.EnvironmentTest - # org.rocksdb.util.BytewiseComparatorIntTest - # org.rocksdb.util.ReverseBytewiseComparatorIntTest - org.rocksdb.MixedOptionsTest - org.rocksdb.CompactRangeOptionsTest - # org.rocksdb.SstFileWriterTest - org.rocksdb.WalFilterTest - # org.rocksdb.AbstractTransactionTest - org.rocksdb.MergeTest - org.rocksdb.OptionsTest - org.rocksdb.WriteBatchThreadedTest - org.rocksdb.MultiGetManyKeysTest - org.rocksdb.TimedEnvTest - org.rocksdb.CompactionStopStyleTest - org.rocksdb.CompactionJobInfoTest - org.rocksdb.BlockBasedTableConfigTest - org.rocksdb.BuiltinComparatorTest - org.rocksdb.RateLimiterTest - # org.rocksdb.TransactionOptionsTest - org.rocksdb.WriteBatchWithIndexTest - org.rocksdb.WriteBatchHandlerTest - org.rocksdb.OptimisticTransactionDBTest - org.rocksdb.OptionsUtilTest - org.rocksdb.OptimisticTransactionTest - org.rocksdb.MutableColumnFamilyOptionsTest - org.rocksdb.CompressionOptionsTest - org.rocksdb.ColumnFamilyTest - org.rocksdb.SstFileReaderTest - org.rocksdb.TransactionDBTest - org.rocksdb.RocksDBTest - org.rocksdb.MutableOptionsGetSetTest - # org.rocksdb.OptimisticTransactionOptionsTest - org.rocksdb.SstFileManagerTest - org.rocksdb.BackupEngineTest - org.rocksdb.DirectSliceTest - org.rocksdb.CompressionTypesTest - org.rocksdb.MemoryUtilTest - org.rocksdb.TableFilterTest - org.rocksdb.TtlDBTest + org.forstdb.ConcurrentTaskLimiterTest + org.forstdb.EventListenerTest + org.forstdb.CompactionOptionsTest + org.forstdb.IngestExternalFileOptionsTest + org.forstdb.MutableDBOptionsTest + org.forstdb.WriteOptionsTest + org.forstdb.SstPartitionerTest + org.forstdb.RocksMemEnvTest + org.forstdb.CompactionOptionsUniversalTest + org.forstdb.ClockCacheTest + # org.forstdb.BytewiseComparatorRegressionTest + org.forstdb.SnapshotTest + org.forstdb.CompactionJobStatsTest + org.forstdb.MemTableTest + org.forstdb.CompactionFilterFactoryTest + # org.forstdb.DefaultEnvTest + org.forstdb.DBOptionsTest + org.forstdb.WriteBatchTest + org.forstdb.RocksIteratorTest + org.forstdb.SliceTest + org.forstdb.MultiGetTest + org.forstdb.ComparatorOptionsTest + # org.forstdb.NativeLibraryLoaderTest + org.forstdb.StatisticsTest + org.forstdb.WALRecoveryModeTest + org.forstdb.TransactionLogIteratorTest + org.forstdb.ReadOptionsTest + org.forstdb.SecondaryDBTest + org.forstdb.KeyMayExistTest + org.forstdb.BlobOptionsTest + org.forstdb.InfoLogLevelTest + org.forstdb.CompactionPriorityTest + org.forstdb.FlushOptionsTest + org.forstdb.VerifyChecksumsTest + org.forstdb.MultiColumnRegressionTest + org.forstdb.FlushTest + org.forstdb.HyperClockCacheTest + org.forstdb.PutMultiplePartsTest + org.forstdb.StatisticsCollectorTest + org.forstdb.LRUCacheTest + org.forstdb.ColumnFamilyOptionsTest + org.forstdb.TransactionTest + org.forstdb.CompactionOptionsFIFOTest + org.forstdb.BackupEngineOptionsTest + org.forstdb.CheckPointTest + org.forstdb.PlainTableConfigTest + # org.forstdb.TransactionDBOptionsTest + org.forstdb.ReadOnlyTest + org.forstdb.EnvOptionsTest + org.forstdb.LoggerTest + org.forstdb.FilterTest + # org.forstdb.ByteBufferUnsupportedOperationTest + # org.forstdb.util.IntComparatorTest + # org.forstdb.util.JNIComparatorTest + org.forstdb.util.SizeUnitTest + # org.forstdb.util.BytewiseComparatorTest + org.forstdb.util.EnvironmentTest + # org.forstdb.util.BytewiseComparatorIntTest + # org.forstdb.util.ReverseBytewiseComparatorIntTest + org.forstdb.MixedOptionsTest + org.forstdb.CompactRangeOptionsTest + # org.forstdb.SstFileWriterTest + org.forstdb.WalFilterTest + # org.forstdb.AbstractTransactionTest + org.forstdb.MergeTest + org.forstdb.OptionsTest + org.forstdb.WriteBatchThreadedTest + org.forstdb.MultiGetManyKeysTest + org.forstdb.TimedEnvTest + org.forstdb.CompactionStopStyleTest + org.forstdb.CompactionJobInfoTest + org.forstdb.BlockBasedTableConfigTest + org.forstdb.BuiltinComparatorTest + org.forstdb.RateLimiterTest + # org.forstdb.TransactionOptionsTest + org.forstdb.WriteBatchWithIndexTest + org.forstdb.WriteBatchHandlerTest + org.forstdb.OptimisticTransactionDBTest + org.forstdb.OptionsUtilTest + org.forstdb.OptimisticTransactionTest + org.forstdb.MutableColumnFamilyOptionsTest + org.forstdb.CompressionOptionsTest + org.forstdb.ColumnFamilyTest + org.forstdb.SstFileReaderTest + org.forstdb.TransactionDBTest + org.forstdb.RocksDBTest + org.forstdb.MutableOptionsGetSetTest + # org.forstdb.OptimisticTransactionOptionsTest + org.forstdb.SstFileManagerTest + org.forstdb.BackupEngineTest + org.forstdb.DirectSliceTest + org.forstdb.CompressionTypesTest + org.forstdb.MemoryUtilTest + org.forstdb.TableFilterTest + org.forstdb.TtlDBTest ) include(FindJava) @@ -653,111 +653,111 @@ if(${CMAKE_VERSION} VERSION_LESS "3.11.4") # Old CMake ONLY generate JNI headers, otherwise JNI is handled in add_jar step above message("Preparing JNI headers for old CMake (${CMAKE_VERSION})") set(NATIVE_JAVA_CLASSES - org.rocksdb.AbstractCompactionFilter - org.rocksdb.AbstractCompactionFilterFactory - org.rocksdb.AbstractComparator - org.rocksdb.AbstractEventListener - org.rocksdb.AbstractImmutableNativeReference - org.rocksdb.AbstractNativeReference - org.rocksdb.AbstractRocksIterator - org.rocksdb.AbstractSlice - org.rocksdb.AbstractTableFilter - org.rocksdb.AbstractTraceWriter - org.rocksdb.AbstractTransactionNotifier - org.rocksdb.AbstractWalFilter - org.rocksdb.BackupEngineOptions - org.rocksdb.BackupEngine - org.rocksdb.BlockBasedTableConfig - org.rocksdb.BloomFilter - org.rocksdb.CassandraCompactionFilter - org.rocksdb.CassandraValueMergeOperator - org.rocksdb.Checkpoint - org.rocksdb.ClockCache - org.rocksdb.Cache - org.rocksdb.ColumnFamilyHandle - org.rocksdb.ColumnFamilyOptions - org.rocksdb.CompactionJobInfo - org.rocksdb.CompactionJobStats - org.rocksdb.CompactionOptions - org.rocksdb.CompactionOptionsFIFO - org.rocksdb.CompactionOptionsUniversal - org.rocksdb.CompactRangeOptions - org.rocksdb.ComparatorOptions - org.rocksdb.CompressionOptions - org.rocksdb.ConcurrentTaskLimiterImpl - org.rocksdb.ConfigOptions - org.rocksdb.DBOptions - org.rocksdb.DirectSlice - org.rocksdb.Env - org.rocksdb.EnvFlinkTestSuite - org.rocksdb.EnvOptions - org.rocksdb.Filter - org.rocksdb.FlinkCompactionFilter - org.rocksdb.FlinkEnv - org.rocksdb.FlushOptions - org.rocksdb.HashLinkedListMemTableConfig - org.rocksdb.HashSkipListMemTableConfig - org.rocksdb.HyperClockCache - org.rocksdb.IngestExternalFileOptions - org.rocksdb.Logger - org.rocksdb.LRUCache - org.rocksdb.MemoryUtil - org.rocksdb.MemTableConfig - org.rocksdb.NativeComparatorWrapper - org.rocksdb.NativeLibraryLoader - org.rocksdb.OptimisticTransactionDB - org.rocksdb.OptimisticTransactionOptions - org.rocksdb.Options - org.rocksdb.OptionsUtil - org.rocksdb.PersistentCache - org.rocksdb.PlainTableConfig - org.rocksdb.RateLimiter - org.rocksdb.ReadOptions - org.rocksdb.RemoveEmptyValueCompactionFilter - org.rocksdb.RestoreOptions - org.rocksdb.RocksCallbackObject - org.rocksdb.RocksDB - org.rocksdb.RocksEnv - org.rocksdb.RocksIterator - org.rocksdb.RocksIteratorInterface - org.rocksdb.RocksMemEnv - org.rocksdb.RocksMutableObject - org.rocksdb.RocksObject - org.rocksdb.SkipListMemTableConfig - org.rocksdb.Slice - org.rocksdb.Snapshot - org.rocksdb.SstFileManager - org.rocksdb.SstFileWriter - org.rocksdb.SstFileReader - org.rocksdb.SstFileReaderIterator - org.rocksdb.SstPartitionerFactory - org.rocksdb.SstPartitionerFixedPrefixFactory - org.rocksdb.Statistics - org.rocksdb.StringAppendOperator - org.rocksdb.TableFormatConfig - org.rocksdb.ThreadStatus - org.rocksdb.TimedEnv - org.rocksdb.Transaction - org.rocksdb.TransactionDB - org.rocksdb.TransactionDBOptions - org.rocksdb.TransactionLogIterator - org.rocksdb.TransactionOptions - org.rocksdb.TtlDB - org.rocksdb.UInt64AddOperator - org.rocksdb.VectorMemTableConfig - org.rocksdb.WBWIRocksIterator - org.rocksdb.WriteBatch - org.rocksdb.WriteBatch.Handler - org.rocksdb.WriteBatchInterface - org.rocksdb.WriteBatchWithIndex - org.rocksdb.WriteOptions - org.rocksdb.NativeComparatorWrapperTest - org.rocksdb.RocksDBExceptionTest - org.rocksdb.SnapshotTest - org.rocksdb.WriteBatchTest - org.rocksdb.WriteBatchTestInternalHelper - org.rocksdb.WriteBufferManager - org.rocksdb.test.TestableEventListener + org.forstdb.AbstractCompactionFilter + org.forstdb.AbstractCompactionFilterFactory + org.forstdb.AbstractComparator + org.forstdb.AbstractEventListener + org.forstdb.AbstractImmutableNativeReference + org.forstdb.AbstractNativeReference + org.forstdb.AbstractRocksIterator + org.forstdb.AbstractSlice + org.forstdb.AbstractTableFilter + org.forstdb.AbstractTraceWriter + org.forstdb.AbstractTransactionNotifier + org.forstdb.AbstractWalFilter + org.forstdb.BackupEngineOptions + org.forstdb.BackupEngine + org.forstdb.BlockBasedTableConfig + org.forstdb.BloomFilter + org.forstdb.CassandraCompactionFilter + org.forstdb.CassandraValueMergeOperator + org.forstdb.Checkpoint + org.forstdb.ClockCache + org.forstdb.Cache + org.forstdb.ColumnFamilyHandle + org.forstdb.ColumnFamilyOptions + org.forstdb.CompactionJobInfo + org.forstdb.CompactionJobStats + org.forstdb.CompactionOptions + org.forstdb.CompactionOptionsFIFO + org.forstdb.CompactionOptionsUniversal + org.forstdb.CompactRangeOptions + org.forstdb.ComparatorOptions + org.forstdb.CompressionOptions + org.forstdb.ConcurrentTaskLimiterImpl + org.forstdb.ConfigOptions + org.forstdb.DBOptions + org.forstdb.DirectSlice + org.forstdb.Env + org.forstdb.EnvFlinkTestSuite + org.forstdb.EnvOptions + org.forstdb.Filter + org.forstdb.FlinkCompactionFilter + org.forstdb.FlinkEnv + org.forstdb.FlushOptions + org.forstdb.HashLinkedListMemTableConfig + org.forstdb.HashSkipListMemTableConfig + org.forstdb.HyperClockCache + org.forstdb.IngestExternalFileOptions + org.forstdb.Logger + org.forstdb.LRUCache + org.forstdb.MemoryUtil + org.forstdb.MemTableConfig + org.forstdb.NativeComparatorWrapper + org.forstdb.NativeLibraryLoader + org.forstdb.OptimisticTransactionDB + org.forstdb.OptimisticTransactionOptions + org.forstdb.Options + org.forstdb.OptionsUtil + org.forstdb.PersistentCache + org.forstdb.PlainTableConfig + org.forstdb.RateLimiter + org.forstdb.ReadOptions + org.forstdb.RemoveEmptyValueCompactionFilter + org.forstdb.RestoreOptions + org.forstdb.RocksCallbackObject + org.forstdb.RocksDB + org.forstdb.RocksEnv + org.forstdb.RocksIterator + org.forstdb.RocksIteratorInterface + org.forstdb.RocksMemEnv + org.forstdb.RocksMutableObject + org.forstdb.RocksObject + org.forstdb.SkipListMemTableConfig + org.forstdb.Slice + org.forstdb.Snapshot + org.forstdb.SstFileManager + org.forstdb.SstFileWriter + org.forstdb.SstFileReader + org.forstdb.SstFileReaderIterator + org.forstdb.SstPartitionerFactory + org.forstdb.SstPartitionerFixedPrefixFactory + org.forstdb.Statistics + org.forstdb.StringAppendOperator + org.forstdb.TableFormatConfig + org.forstdb.ThreadStatus + org.forstdb.TimedEnv + org.forstdb.Transaction + org.forstdb.TransactionDB + org.forstdb.TransactionDBOptions + org.forstdb.TransactionLogIterator + org.forstdb.TransactionOptions + org.forstdb.TtlDB + org.forstdb.UInt64AddOperator + org.forstdb.VectorMemTableConfig + org.forstdb.WBWIRocksIterator + org.forstdb.WriteBatch + org.forstdb.WriteBatch.Handler + org.forstdb.WriteBatchInterface + org.forstdb.WriteBatchWithIndex + org.forstdb.WriteOptions + org.forstdb.NativeComparatorWrapperTest + org.forstdb.RocksDBExceptionTest + org.forstdb.SnapshotTest + org.forstdb.WriteBatchTest + org.forstdb.WriteBatchTestInternalHelper + org.forstdb.WriteBufferManager + org.forstdb.test.TestableEventListener ) create_javah( @@ -802,12 +802,12 @@ foreach (CLAZZ ${JAVA_TEST_RUNNING_CLASSES}) if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") add_test( NAME jtest_${CLAZZ} - COMMAND ${Java_JAVA_EXECUTABLE} ${JVMARGS} -ea -Xcheck:jni -Djava.library.path=${PROJECT_BINARY_DIR}/java/${CMAKE_BUILD_TYPE} -classpath ${JAVA_RUN_TESTCLASSPATH}$${ROCKSDBJNI_CLASSES_TEST_JAR_FILE} org.rocksdb.test.RocksJunitRunner ${CLAZZ} + COMMAND ${Java_JAVA_EXECUTABLE} ${JVMARGS} -ea -Xcheck:jni -Djava.library.path=${PROJECT_BINARY_DIR}/java/${CMAKE_BUILD_TYPE} -classpath ${JAVA_RUN_TESTCLASSPATH}$${ROCKSDBJNI_CLASSES_TEST_JAR_FILE} org.forstdb.test.RocksJunitRunner ${CLAZZ} ) else() add_test( NAME jtest_${CLAZZ} - COMMAND ${Java_JAVA_EXECUTABLE} ${JVMARGS} -ea -Xcheck:jni -Djava.library.path=${PROJECT_BINARY_DIR}/java -classpath ${JAVA_RUN_TESTCLASSPATH}:${ROCKSDBJNI_CLASSES_TEST_JAR_FILE} org.rocksdb.test.RocksJunitRunner ${CLAZZ} + COMMAND ${Java_JAVA_EXECUTABLE} ${JVMARGS} -ea -Xcheck:jni -Djava.library.path=${PROJECT_BINARY_DIR}/java -classpath ${JAVA_RUN_TESTCLASSPATH}:${ROCKSDBJNI_CLASSES_TEST_JAR_FILE} org.forstdb.test.RocksJunitRunner ${CLAZZ} ) endif() endforeach(CLAZZ) \ No newline at end of file diff --git a/java/Makefile b/java/Makefile index aae28e0cd..7a6915cf0 100644 --- a/java/Makefile +++ b/java/Makefile @@ -1,103 +1,103 @@ NATIVE_JAVA_CLASSES = \ - org.rocksdb.AbstractCompactionFilter\ - org.rocksdb.AbstractCompactionFilterFactory\ - org.rocksdb.AbstractComparator\ - org.rocksdb.AbstractEventListener\ - org.rocksdb.AbstractSlice\ - org.rocksdb.AbstractTableFilter\ - org.rocksdb.AbstractTraceWriter\ - org.rocksdb.AbstractTransactionNotifier\ - org.rocksdb.AbstractWalFilter\ - org.rocksdb.BackupEngine\ - org.rocksdb.BackupEngineOptions\ - org.rocksdb.BlockBasedTableConfig\ - org.rocksdb.BloomFilter\ - org.rocksdb.Checkpoint\ - org.rocksdb.ClockCache\ - org.rocksdb.Cache\ - org.rocksdb.CassandraCompactionFilter\ - org.rocksdb.CassandraValueMergeOperator\ - org.rocksdb.ColumnFamilyHandle\ - org.rocksdb.ColumnFamilyOptions\ - org.rocksdb.CompactionJobInfo\ - org.rocksdb.CompactionJobStats\ - org.rocksdb.CompactionOptions\ - org.rocksdb.CompactionOptionsFIFO\ - org.rocksdb.CompactionOptionsUniversal\ - org.rocksdb.CompactRangeOptions\ - org.rocksdb.ComparatorOptions\ - org.rocksdb.CompressionOptions\ - org.rocksdb.ConfigOptions\ - org.rocksdb.DBOptions\ - org.rocksdb.DirectSlice\ - org.rocksdb.Env\ - org.rocksdb.EnvOptions\ - org.rocksdb.FlinkCompactionFilter\ - org.rocksdb.FlushOptions\ - org.rocksdb.Filter\ - org.rocksdb.IngestExternalFileOptions\ - org.rocksdb.HashLinkedListMemTableConfig\ - org.rocksdb.HashSkipListMemTableConfig\ - org.rocksdb.ConcurrentTaskLimiter\ - org.rocksdb.ConcurrentTaskLimiterImpl\ - org.rocksdb.KeyMayExist\ - org.rocksdb.Logger\ - org.rocksdb.LRUCache\ - org.rocksdb.MemoryUsageType\ - org.rocksdb.MemoryUtil\ - org.rocksdb.MergeOperator\ - org.rocksdb.NativeComparatorWrapper\ - org.rocksdb.OptimisticTransactionDB\ - org.rocksdb.OptimisticTransactionOptions\ - org.rocksdb.Options\ - org.rocksdb.OptionsUtil\ - org.rocksdb.PersistentCache\ - org.rocksdb.PerfContext\ - org.rocksdb.PerfLevel\ - org.rocksdb.PlainTableConfig\ - org.rocksdb.RateLimiter\ - org.rocksdb.ReadOptions\ - org.rocksdb.RemoveEmptyValueCompactionFilter\ - org.rocksdb.RestoreOptions\ - org.rocksdb.RocksCallbackObject\ - org.rocksdb.RocksDB\ - org.rocksdb.RocksEnv\ - org.rocksdb.RocksIterator\ - org.rocksdb.RocksMemEnv\ - org.rocksdb.SkipListMemTableConfig\ - org.rocksdb.Slice\ - org.rocksdb.SstFileManager\ - org.rocksdb.SstFileWriter\ - org.rocksdb.SstFileReader\ - org.rocksdb.SstFileReaderIterator\ - org.rocksdb.SstPartitionerFactory\ - org.rocksdb.SstPartitionerFixedPrefixFactory\ - org.rocksdb.Statistics\ - org.rocksdb.ThreadStatus\ - org.rocksdb.TimedEnv\ - org.rocksdb.Transaction\ - org.rocksdb.TransactionDB\ - org.rocksdb.TransactionDBOptions\ - org.rocksdb.TransactionOptions\ - org.rocksdb.TransactionLogIterator\ - org.rocksdb.TtlDB\ - org.rocksdb.VectorMemTableConfig\ - org.rocksdb.Snapshot\ - org.rocksdb.StringAppendOperator\ - org.rocksdb.UInt64AddOperator\ - org.rocksdb.WriteBatch\ - org.rocksdb.WriteBatch.Handler\ - org.rocksdb.WriteOptions\ - org.rocksdb.WriteBatchWithIndex\ - org.rocksdb.WriteBufferManager\ - org.rocksdb.WBWIRocksIterator + org.forstdb.AbstractCompactionFilter\ + org.forstdb.AbstractCompactionFilterFactory\ + org.forstdb.AbstractComparator\ + org.forstdb.AbstractEventListener\ + org.forstdb.AbstractSlice\ + org.forstdb.AbstractTableFilter\ + org.forstdb.AbstractTraceWriter\ + org.forstdb.AbstractTransactionNotifier\ + org.forstdb.AbstractWalFilter\ + org.forstdb.BackupEngine\ + org.forstdb.BackupEngineOptions\ + org.forstdb.BlockBasedTableConfig\ + org.forstdb.BloomFilter\ + org.forstdb.Checkpoint\ + org.forstdb.ClockCache\ + org.forstdb.Cache\ + org.forstdb.CassandraCompactionFilter\ + org.forstdb.CassandraValueMergeOperator\ + org.forstdb.ColumnFamilyHandle\ + org.forstdb.ColumnFamilyOptions\ + org.forstdb.CompactionJobInfo\ + org.forstdb.CompactionJobStats\ + org.forstdb.CompactionOptions\ + org.forstdb.CompactionOptionsFIFO\ + org.forstdb.CompactionOptionsUniversal\ + org.forstdb.CompactRangeOptions\ + org.forstdb.ComparatorOptions\ + org.forstdb.CompressionOptions\ + org.forstdb.ConfigOptions\ + org.forstdb.DBOptions\ + org.forstdb.DirectSlice\ + org.forstdb.Env\ + org.forstdb.EnvOptions\ + org.forstdb.FlinkCompactionFilter\ + org.forstdb.FlushOptions\ + org.forstdb.Filter\ + org.forstdb.IngestExternalFileOptions\ + org.forstdb.HashLinkedListMemTableConfig\ + org.forstdb.HashSkipListMemTableConfig\ + org.forstdb.ConcurrentTaskLimiter\ + org.forstdb.ConcurrentTaskLimiterImpl\ + org.forstdb.KeyMayExist\ + org.forstdb.Logger\ + org.forstdb.LRUCache\ + org.forstdb.MemoryUsageType\ + org.forstdb.MemoryUtil\ + org.forstdb.MergeOperator\ + org.forstdb.NativeComparatorWrapper\ + org.forstdb.OptimisticTransactionDB\ + org.forstdb.OptimisticTransactionOptions\ + org.forstdb.Options\ + org.forstdb.OptionsUtil\ + org.forstdb.PersistentCache\ + org.forstdb.PerfContext\ + org.forstdb.PerfLevel\ + org.forstdb.PlainTableConfig\ + org.forstdb.RateLimiter\ + org.forstdb.ReadOptions\ + org.forstdb.RemoveEmptyValueCompactionFilter\ + org.forstdb.RestoreOptions\ + org.forstdb.RocksCallbackObject\ + org.forstdb.RocksDB\ + org.forstdb.RocksEnv\ + org.forstdb.RocksIterator\ + org.forstdb.RocksMemEnv\ + org.forstdb.SkipListMemTableConfig\ + org.forstdb.Slice\ + org.forstdb.SstFileManager\ + org.forstdb.SstFileWriter\ + org.forstdb.SstFileReader\ + org.forstdb.SstFileReaderIterator\ + org.forstdb.SstPartitionerFactory\ + org.forstdb.SstPartitionerFixedPrefixFactory\ + org.forstdb.Statistics\ + org.forstdb.ThreadStatus\ + org.forstdb.TimedEnv\ + org.forstdb.Transaction\ + org.forstdb.TransactionDB\ + org.forstdb.TransactionDBOptions\ + org.forstdb.TransactionOptions\ + org.forstdb.TransactionLogIterator\ + org.forstdb.TtlDB\ + org.forstdb.VectorMemTableConfig\ + org.forstdb.Snapshot\ + org.forstdb.StringAppendOperator\ + org.forstdb.UInt64AddOperator\ + org.forstdb.WriteBatch\ + org.forstdb.WriteBatch.Handler\ + org.forstdb.WriteOptions\ + org.forstdb.WriteBatchWithIndex\ + org.forstdb.WriteBufferManager\ + org.forstdb.WBWIRocksIterator NATIVE_JAVA_TEST_CLASSES = \ - org.rocksdb.RocksDBExceptionTest\ - org.rocksdb.test.TestableEventListener\ - org.rocksdb.NativeComparatorWrapperTest.NativeStringComparatorWrapper\ - org.rocksdb.WriteBatchTest\ - org.rocksdb.WriteBatchTestInternalHelper + org.forstdb.RocksDBExceptionTest\ + org.forstdb.test.TestableEventListener\ + org.forstdb.NativeComparatorWrapperTest.NativeStringComparatorWrapper\ + org.forstdb.WriteBatchTest\ + org.forstdb.WriteBatchTestInternalHelper ROCKSDB_MAJOR = $(shell grep -E "ROCKSDB_MAJOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) ROCKSDB_MINOR = $(shell grep -E "ROCKSDB_MINOR.[0-9]" ../include/rocksdb/version.h | cut -d ' ' -f 3) @@ -108,109 +108,109 @@ ARCH := $(shell getconf LONG_BIT) SHA256_CMD ?= sha256sum JAVA_TESTS = \ - org.rocksdb.BackupEngineOptionsTest\ - org.rocksdb.BackupEngineTest\ - org.rocksdb.BlobOptionsTest\ - org.rocksdb.BlockBasedTableConfigTest\ - org.rocksdb.BuiltinComparatorTest\ - org.rocksdb.ByteBufferUnsupportedOperationTest\ - org.rocksdb.BytewiseComparatorRegressionTest\ - org.rocksdb.util.BytewiseComparatorTest\ - org.rocksdb.util.BytewiseComparatorIntTest\ - org.rocksdb.CheckPointTest\ - org.rocksdb.ClockCacheTest\ - org.rocksdb.ColumnFamilyOptionsTest\ - org.rocksdb.ColumnFamilyTest\ - org.rocksdb.CompactionFilterFactoryTest\ - org.rocksdb.CompactionJobInfoTest\ - org.rocksdb.CompactionJobStatsTest\ - org.rocksdb.CompactionOptionsTest\ - org.rocksdb.CompactionOptionsFIFOTest\ - org.rocksdb.CompactionOptionsUniversalTest\ - org.rocksdb.CompactionPriorityTest\ - org.rocksdb.CompactionStopStyleTest\ - org.rocksdb.ComparatorOptionsTest\ - org.rocksdb.CompressionOptionsTest\ - org.rocksdb.CompressionTypesTest\ - org.rocksdb.DBOptionsTest\ - org.rocksdb.DirectSliceTest\ - org.rocksdb.util.EnvironmentTest\ - org.rocksdb.EnvOptionsTest\ - org.rocksdb.EventListenerTest\ - org.rocksdb.IngestExternalFileOptionsTest\ - org.rocksdb.util.IntComparatorTest\ - org.rocksdb.util.JNIComparatorTest\ - org.rocksdb.FilterTest\ - org.rocksdb.FlushTest\ - org.rocksdb.ImportColumnFamilyTest\ - org.rocksdb.InfoLogLevelTest\ - org.rocksdb.KeyExistsTest \ - org.rocksdb.KeyMayExistTest\ - org.rocksdb.ConcurrentTaskLimiterTest\ - org.rocksdb.LoggerTest\ - org.rocksdb.LRUCacheTest\ - org.rocksdb.MemoryUtilTest\ - org.rocksdb.MemTableTest\ - org.rocksdb.MergeCFVariantsTest\ - org.rocksdb.MergeTest\ - org.rocksdb.MergeVariantsTest\ - org.rocksdb.MultiColumnRegressionTest \ - org.rocksdb.MultiGetManyKeysTest\ - org.rocksdb.MultiGetTest\ - org.rocksdb.MixedOptionsTest\ - org.rocksdb.MutableColumnFamilyOptionsTest\ - org.rocksdb.MutableDBOptionsTest\ - org.rocksdb.MutableOptionsGetSetTest \ - org.rocksdb.NativeComparatorWrapperTest\ - org.rocksdb.NativeLibraryLoaderTest\ - org.rocksdb.OptimisticTransactionTest\ - org.rocksdb.OptimisticTransactionDBTest\ - org.rocksdb.OptimisticTransactionOptionsTest\ - org.rocksdb.OptionsUtilTest\ - org.rocksdb.OptionsTest\ - org.rocksdb.PerfLevelTest \ - org.rocksdb.PerfContextTest \ - org.rocksdb.PutCFVariantsTest\ - org.rocksdb.PutVariantsTest\ - org.rocksdb.PlainTableConfigTest\ - org.rocksdb.RateLimiterTest\ - org.rocksdb.ReadOnlyTest\ - org.rocksdb.ReadOptionsTest\ - org.rocksdb.util.ReverseBytewiseComparatorIntTest\ - org.rocksdb.RocksDBTest\ - org.rocksdb.RocksDBExceptionTest\ - org.rocksdb.DefaultEnvTest\ - org.rocksdb.RocksIteratorTest\ - org.rocksdb.RocksMemEnvTest\ - org.rocksdb.util.SizeUnitTest\ - org.rocksdb.SecondaryDBTest\ - org.rocksdb.SliceTest\ - org.rocksdb.SnapshotTest\ - org.rocksdb.SstFileManagerTest\ - org.rocksdb.SstFileWriterTest\ - org.rocksdb.SstFileReaderTest\ - org.rocksdb.SstPartitionerTest\ - org.rocksdb.TableFilterTest\ - org.rocksdb.TimedEnvTest\ - org.rocksdb.TransactionTest\ - org.rocksdb.TransactionDBTest\ - org.rocksdb.TransactionOptionsTest\ - org.rocksdb.TransactionDBOptionsTest\ - org.rocksdb.TransactionLogIteratorTest\ - org.rocksdb.TtlDBTest\ - org.rocksdb.StatisticsTest\ - org.rocksdb.StatisticsCollectorTest\ - org.rocksdb.VerifyChecksumsTest\ - org.rocksdb.WalFilterTest\ - org.rocksdb.WALRecoveryModeTest\ - org.rocksdb.WriteBatchHandlerTest\ - org.rocksdb.WriteBatchTest\ - org.rocksdb.WriteBatchThreadedTest\ - org.rocksdb.WriteOptionsTest\ - org.rocksdb.WriteBatchWithIndexTest + org.forstdb.BackupEngineOptionsTest\ + org.forstdb.BackupEngineTest\ + org.forstdb.BlobOptionsTest\ + org.forstdb.BlockBasedTableConfigTest\ + org.forstdb.BuiltinComparatorTest\ + org.forstdb.ByteBufferUnsupportedOperationTest\ + org.forstdb.BytewiseComparatorRegressionTest\ + org.forstdb.util.BytewiseComparatorTest\ + org.forstdb.util.BytewiseComparatorIntTest\ + org.forstdb.CheckPointTest\ + org.forstdb.ClockCacheTest\ + org.forstdb.ColumnFamilyOptionsTest\ + org.forstdb.ColumnFamilyTest\ + org.forstdb.CompactionFilterFactoryTest\ + org.forstdb.CompactionJobInfoTest\ + org.forstdb.CompactionJobStatsTest\ + org.forstdb.CompactionOptionsTest\ + org.forstdb.CompactionOptionsFIFOTest\ + org.forstdb.CompactionOptionsUniversalTest\ + org.forstdb.CompactionPriorityTest\ + org.forstdb.CompactionStopStyleTest\ + org.forstdb.ComparatorOptionsTest\ + org.forstdb.CompressionOptionsTest\ + org.forstdb.CompressionTypesTest\ + org.forstdb.DBOptionsTest\ + org.forstdb.DirectSliceTest\ + org.forstdb.util.EnvironmentTest\ + org.forstdb.EnvOptionsTest\ + org.forstdb.EventListenerTest\ + org.forstdb.IngestExternalFileOptionsTest\ + org.forstdb.util.IntComparatorTest\ + org.forstdb.util.JNIComparatorTest\ + org.forstdb.FilterTest\ + org.forstdb.FlushTest\ + org.forstdb.ImportColumnFamilyTest\ + org.forstdb.InfoLogLevelTest\ + org.forstdb.KeyExistsTest \ + org.forstdb.KeyMayExistTest\ + org.forstdb.ConcurrentTaskLimiterTest\ + org.forstdb.LoggerTest\ + org.forstdb.LRUCacheTest\ + org.forstdb.MemoryUtilTest\ + org.forstdb.MemTableTest\ + org.forstdb.MergeCFVariantsTest\ + org.forstdb.MergeTest\ + org.forstdb.MergeVariantsTest\ + org.forstdb.MultiColumnRegressionTest \ + org.forstdb.MultiGetManyKeysTest\ + org.forstdb.MultiGetTest\ + org.forstdb.MixedOptionsTest\ + org.forstdb.MutableColumnFamilyOptionsTest\ + org.forstdb.MutableDBOptionsTest\ + org.forstdb.MutableOptionsGetSetTest \ + org.forstdb.NativeComparatorWrapperTest\ + org.forstdb.NativeLibraryLoaderTest\ + org.forstdb.OptimisticTransactionTest\ + org.forstdb.OptimisticTransactionDBTest\ + org.forstdb.OptimisticTransactionOptionsTest\ + org.forstdb.OptionsUtilTest\ + org.forstdb.OptionsTest\ + org.forstdb.PerfLevelTest \ + org.forstdb.PerfContextTest \ + org.forstdb.PutCFVariantsTest\ + org.forstdb.PutVariantsTest\ + org.forstdb.PlainTableConfigTest\ + org.forstdb.RateLimiterTest\ + org.forstdb.ReadOnlyTest\ + org.forstdb.ReadOptionsTest\ + org.forstdb.util.ReverseBytewiseComparatorIntTest\ + org.forstdb.RocksDBTest\ + org.forstdb.RocksDBExceptionTest\ + org.forstdb.DefaultEnvTest\ + org.forstdb.RocksIteratorTest\ + org.forstdb.RocksMemEnvTest\ + org.forstdb.util.SizeUnitTest\ + org.forstdb.SecondaryDBTest\ + org.forstdb.SliceTest\ + org.forstdb.SnapshotTest\ + org.forstdb.SstFileManagerTest\ + org.forstdb.SstFileWriterTest\ + org.forstdb.SstFileReaderTest\ + org.forstdb.SstPartitionerTest\ + org.forstdb.TableFilterTest\ + org.forstdb.TimedEnvTest\ + org.forstdb.TransactionTest\ + org.forstdb.TransactionDBTest\ + org.forstdb.TransactionOptionsTest\ + org.forstdb.TransactionDBOptionsTest\ + org.forstdb.TransactionLogIteratorTest\ + org.forstdb.TtlDBTest\ + org.forstdb.StatisticsTest\ + org.forstdb.StatisticsCollectorTest\ + org.forstdb.VerifyChecksumsTest\ + org.forstdb.WalFilterTest\ + org.forstdb.WALRecoveryModeTest\ + org.forstdb.WriteBatchHandlerTest\ + org.forstdb.WriteBatchTest\ + org.forstdb.WriteBatchThreadedTest\ + org.forstdb.WriteOptionsTest\ + org.forstdb.WriteBatchWithIndexTest FLINK_TESTS = \ - org.rocksdb.flink.FlinkEnvTest + org.forstdb.flink.FlinkEnvTest MAIN_SRC = src/main/java TEST_SRC = src/test/java @@ -302,11 +302,11 @@ include $(ROCKSDB_PLUGIN_MKS) # Add paths to Java sources in plugins ROCKSDB_PLUGIN_JAVA_ROOTS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(PLUGIN_PATH)/$(plugin)/java) -PLUGIN_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/rocksdb/util org/rocksdb, $(root)/$(MAIN_SRC)/$(pkg)/*.java)) -CORE_SOURCES = $(foreach pkg, org/rocksdb/util org/rocksdb, $(MAIN_SRC)/$(pkg)/*.java) +PLUGIN_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/forstdb/util org/forstdb, $(root)/$(MAIN_SRC)/$(pkg)/*.java)) +CORE_SOURCES = $(foreach pkg, org/forstdb/util org/forstdb, $(MAIN_SRC)/$(pkg)/*.java) SOURCES = $(wildcard $(CORE_SOURCES) $(PLUGIN_SOURCES)) -PLUGIN_TEST_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb, $(root)/$(TEST_SRC)/$(pkg)/*.java)) -CORE_TEST_SOURCES = $(foreach pkg, org/rocksdb/test org/rocksdb/util org/rocksdb/flink org/rocksdb, $(TEST_SRC)/$(pkg)/*.java) +PLUGIN_TEST_SOURCES = $(foreach root, $(ROCKSDB_PLUGIN_JAVA_ROOTS), $(foreach pkg, org/forstdb/test org/forstdb/util org/forstdb, $(root)/$(TEST_SRC)/$(pkg)/*.java)) +CORE_TEST_SOURCES = $(foreach pkg, org/forstdb/test org/forstdb/util org/forstdb/flink org/forstdb, $(TEST_SRC)/$(pkg)/*.java) TEST_SOURCES = $(wildcard $(CORE_TEST_SOURCES) $(PLUGIN_TEST_SOURCES)) MOCK_FLINK_TEST_SOURCES = $(foreach pkg, org/apache/flink/core/fs org/apache/flink/state/forst/fs, flinktestmock/src/main/java/$(pkg)/*.java) @@ -458,15 +458,15 @@ test: java mock_flink_fs java_test $(MAKE) run_test run_test: - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ALL_JAVA_TESTS) - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner org.rocksdb.StatisticsTest + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.forstdb.test.RocksJunitRunner $(ALL_JAVA_TESTS) + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.forstdb.test.RocksJunitRunner org.forstdb.StatisticsTest run_plugin_test: - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ROCKSDB_PLUGIN_JAVA_TESTS) + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.forstdb.test.RocksJunitRunner $(ROCKSDB_PLUGIN_JAVA_TESTS) db_bench: java $(AM_V_GEN)mkdir -p $(BENCHMARK_MAIN_CLASSES) - $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(BENCHMARK_MAIN_CLASSES) $(BENCHMARK_MAIN_SRC)/org/rocksdb/benchmark/*.java + $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(BENCHMARK_MAIN_CLASSES) $(BENCHMARK_MAIN_SRC)/org/forstdb/benchmark/*.java pmd: $(MAVEN_CMD) pmd:pmd pmd:cpd pmd:check @@ -479,4 +479,4 @@ flink_test: java java_test mock_flink_fs $(MAKE) run_flink_test run_flink_test: - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(FLINK_TESTS) + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.forstdb.test.RocksJunitRunner $(FLINK_TESTS) diff --git a/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java b/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java index 070f0fe75..4b8372f51 100644 --- a/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java +++ b/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java @@ -19,7 +19,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.rocksdb.benchmark; +package org.forstdb.benchmark; import java.io.IOException; import java.lang.Runnable; @@ -43,9 +43,9 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; -import org.rocksdb.*; -import org.rocksdb.RocksMemEnv; -import org.rocksdb.util.SizeUnit; +import org.forstdb.*; +import org.forstdb.RocksMemEnv; +import org.forstdb.util.SizeUnit; class Stats { int id_; diff --git a/java/rocksjni/backup_engine_options.cc b/java/forstjni/backup_engine_options.cc similarity index 77% rename from java/rocksjni/backup_engine_options.cc rename to java/forstjni/backup_engine_options.cc index 25bfb6720..589a711be 100644 --- a/java/rocksjni/backup_engine_options.cc +++ b/java/forstjni/backup_engine_options.cc @@ -14,20 +14,20 @@ #include #include -#include "include/org_rocksdb_BackupEngineOptions.h" +#include "include/org_forstdb_BackupEngineOptions.h" #include "rocksdb/utilities/backup_engine.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /////////////////////////////////////////////////////////////////////////// // BackupDBOptions /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: newBackupEngineOptions * Signature: (Ljava/lang/String;)J */ -jlong Java_org_rocksdb_BackupEngineOptions_newBackupEngineOptions( +jlong Java_org_forstdb_BackupEngineOptions_newBackupEngineOptions( JNIEnv* env, jclass /*jcls*/, jstring jpath) { const char* cpath = env->GetStringUTFChars(jpath, nullptr); if (cpath == nullptr) { @@ -40,11 +40,11 @@ jlong Java_org_rocksdb_BackupEngineOptions_newBackupEngineOptions( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: backupDir * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_BackupEngineOptions_backupDir(JNIEnv* env, +jstring Java_org_forstdb_BackupEngineOptions_backupDir(JNIEnv* env, jobject /*jopt*/, jlong jhandle) { auto* bopt = @@ -53,11 +53,11 @@ jstring Java_org_rocksdb_BackupEngineOptions_backupDir(JNIEnv* env, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setBackupEnv * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setBackupEnv( +void Java_org_forstdb_BackupEngineOptions_setBackupEnv( JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle, jlong jrocks_env_handle) { auto* bopt = reinterpret_cast(jhandle); @@ -67,11 +67,11 @@ void Java_org_rocksdb_BackupEngineOptions_setBackupEnv( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setShareTableFiles * Signature: (JZ)V */ -void Java_org_rocksdb_BackupEngineOptions_setShareTableFiles(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngineOptions_setShareTableFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { @@ -81,11 +81,11 @@ void Java_org_rocksdb_BackupEngineOptions_setShareTableFiles(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: shareTableFiles * Signature: (J)Z */ -jboolean Java_org_rocksdb_BackupEngineOptions_shareTableFiles(JNIEnv* /*env*/, +jboolean Java_org_forstdb_BackupEngineOptions_shareTableFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = @@ -94,11 +94,11 @@ jboolean Java_org_rocksdb_BackupEngineOptions_shareTableFiles(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setInfoLog * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setInfoLog(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngineOptions_setInfoLog(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong /*jlogger_handle*/) { @@ -111,11 +111,11 @@ void Java_org_rocksdb_BackupEngineOptions_setInfoLog(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setSync * Signature: (JZ)V */ -void Java_org_rocksdb_BackupEngineOptions_setSync(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngineOptions_setSync(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { @@ -125,11 +125,11 @@ void Java_org_rocksdb_BackupEngineOptions_setSync(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: sync * Signature: (J)Z */ -jboolean Java_org_rocksdb_BackupEngineOptions_sync(JNIEnv* /*env*/, +jboolean Java_org_forstdb_BackupEngineOptions_sync(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = @@ -138,11 +138,11 @@ jboolean Java_org_rocksdb_BackupEngineOptions_sync(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setDestroyOldData * Signature: (JZ)V */ -void Java_org_rocksdb_BackupEngineOptions_setDestroyOldData(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngineOptions_setDestroyOldData(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { @@ -152,11 +152,11 @@ void Java_org_rocksdb_BackupEngineOptions_setDestroyOldData(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: destroyOldData * Signature: (J)Z */ -jboolean Java_org_rocksdb_BackupEngineOptions_destroyOldData(JNIEnv* /*env*/, +jboolean Java_org_forstdb_BackupEngineOptions_destroyOldData(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = @@ -165,11 +165,11 @@ jboolean Java_org_rocksdb_BackupEngineOptions_destroyOldData(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setBackupLogFiles * Signature: (JZ)V */ -void Java_org_rocksdb_BackupEngineOptions_setBackupLogFiles(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngineOptions_setBackupLogFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { @@ -179,11 +179,11 @@ void Java_org_rocksdb_BackupEngineOptions_setBackupLogFiles(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: backupLogFiles * Signature: (J)Z */ -jboolean Java_org_rocksdb_BackupEngineOptions_backupLogFiles(JNIEnv* /*env*/, +jboolean Java_org_forstdb_BackupEngineOptions_backupLogFiles(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = @@ -192,11 +192,11 @@ jboolean Java_org_rocksdb_BackupEngineOptions_backupLogFiles(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setBackupRateLimit * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setBackupRateLimit( +void Java_org_forstdb_BackupEngineOptions_setBackupRateLimit( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jbackup_rate_limit) { auto* bopt = @@ -205,11 +205,11 @@ void Java_org_rocksdb_BackupEngineOptions_setBackupRateLimit( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: backupRateLimit * Signature: (J)J */ -jlong Java_org_rocksdb_BackupEngineOptions_backupRateLimit(JNIEnv* /*env*/, +jlong Java_org_forstdb_BackupEngineOptions_backupRateLimit(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = @@ -218,11 +218,11 @@ jlong Java_org_rocksdb_BackupEngineOptions_backupRateLimit(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setBackupRateLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setBackupRateLimiter( +void Java_org_forstdb_BackupEngineOptions_setBackupRateLimiter( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jrate_limiter_handle) { auto* bopt = @@ -234,11 +234,11 @@ void Java_org_rocksdb_BackupEngineOptions_setBackupRateLimiter( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setRestoreRateLimit * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setRestoreRateLimit( +void Java_org_forstdb_BackupEngineOptions_setRestoreRateLimit( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jrestore_rate_limit) { auto* bopt = @@ -247,11 +247,11 @@ void Java_org_rocksdb_BackupEngineOptions_setRestoreRateLimit( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: restoreRateLimit * Signature: (J)J */ -jlong Java_org_rocksdb_BackupEngineOptions_restoreRateLimit(JNIEnv* /*env*/, +jlong Java_org_forstdb_BackupEngineOptions_restoreRateLimit(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = @@ -260,11 +260,11 @@ jlong Java_org_rocksdb_BackupEngineOptions_restoreRateLimit(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setRestoreRateLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setRestoreRateLimiter( +void Java_org_forstdb_BackupEngineOptions_setRestoreRateLimiter( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jrate_limiter_handle) { auto* bopt = @@ -276,11 +276,11 @@ void Java_org_rocksdb_BackupEngineOptions_setRestoreRateLimiter( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setShareFilesWithChecksum * Signature: (JZ)V */ -void Java_org_rocksdb_BackupEngineOptions_setShareFilesWithChecksum( +void Java_org_forstdb_BackupEngineOptions_setShareFilesWithChecksum( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean flag) { auto* bopt = reinterpret_cast(jhandle); @@ -288,11 +288,11 @@ void Java_org_rocksdb_BackupEngineOptions_setShareFilesWithChecksum( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: shareFilesWithChecksum * Signature: (J)Z */ -jboolean Java_org_rocksdb_BackupEngineOptions_shareFilesWithChecksum( +jboolean Java_org_forstdb_BackupEngineOptions_shareFilesWithChecksum( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); @@ -300,11 +300,11 @@ jboolean Java_org_rocksdb_BackupEngineOptions_shareFilesWithChecksum( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setMaxBackgroundOperations * Signature: (JI)V */ -void Java_org_rocksdb_BackupEngineOptions_setMaxBackgroundOperations( +void Java_org_forstdb_BackupEngineOptions_setMaxBackgroundOperations( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint max_background_operations) { auto* bopt = @@ -313,11 +313,11 @@ void Java_org_rocksdb_BackupEngineOptions_setMaxBackgroundOperations( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: maxBackgroundOperations * Signature: (J)I */ -jint Java_org_rocksdb_BackupEngineOptions_maxBackgroundOperations( +jint Java_org_forstdb_BackupEngineOptions_maxBackgroundOperations( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); @@ -325,11 +325,11 @@ jint Java_org_rocksdb_BackupEngineOptions_maxBackgroundOperations( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: setCallbackTriggerIntervalSize * Signature: (JJ)V */ -void Java_org_rocksdb_BackupEngineOptions_setCallbackTriggerIntervalSize( +void Java_org_forstdb_BackupEngineOptions_setCallbackTriggerIntervalSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jcallback_trigger_interval_size) { auto* bopt = @@ -339,11 +339,11 @@ void Java_org_rocksdb_BackupEngineOptions_setCallbackTriggerIntervalSize( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: callbackTriggerIntervalSize * Signature: (J)J */ -jlong Java_org_rocksdb_BackupEngineOptions_callbackTriggerIntervalSize( +jlong Java_org_forstdb_BackupEngineOptions_callbackTriggerIntervalSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* bopt = reinterpret_cast(jhandle); @@ -351,11 +351,11 @@ jlong Java_org_rocksdb_BackupEngineOptions_callbackTriggerIntervalSize( } /* - * Class: org_rocksdb_BackupEngineOptions + * Class: org_forstdb_BackupEngineOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_BackupEngineOptions_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngineOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle) { auto* bopt = diff --git a/java/rocksjni/backupenginejni.cc b/java/forstjni/backupenginejni.cc similarity index 86% rename from java/rocksjni/backupenginejni.cc rename to java/forstjni/backupenginejni.cc index 1ba7ea286..2a1876b4c 100644 --- a/java/rocksjni/backupenginejni.cc +++ b/java/forstjni/backupenginejni.cc @@ -10,17 +10,17 @@ #include -#include "include/org_rocksdb_BackupEngine.h" +#include "include/org_forstdb_BackupEngine.h" #include "rocksdb/utilities/backup_engine.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: open * Signature: (JJ)J */ -jlong Java_org_rocksdb_BackupEngine_open(JNIEnv* env, jclass /*jcls*/, +jlong Java_org_forstdb_BackupEngine_open(JNIEnv* env, jclass /*jcls*/, jlong env_handle, jlong backup_engine_options_handle) { auto* rocks_env = reinterpret_cast(env_handle); @@ -40,11 +40,11 @@ jlong Java_org_rocksdb_BackupEngine_open(JNIEnv* env, jclass /*jcls*/, } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: createNewBackup * Signature: (JJZ)V */ -void Java_org_rocksdb_BackupEngine_createNewBackup( +void Java_org_forstdb_BackupEngine_createNewBackup( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jlong db_handle, jboolean jflush_before_backup) { auto* db = reinterpret_cast(db_handle); @@ -61,11 +61,11 @@ void Java_org_rocksdb_BackupEngine_createNewBackup( } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: createNewBackupWithMetadata * Signature: (JJLjava/lang/String;Z)V */ -void Java_org_rocksdb_BackupEngine_createNewBackupWithMetadata( +void Java_org_forstdb_BackupEngine_createNewBackupWithMetadata( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jlong db_handle, jstring japp_metadata, jboolean jflush_before_backup) { auto* db = reinterpret_cast(db_handle); @@ -92,11 +92,11 @@ void Java_org_rocksdb_BackupEngine_createNewBackupWithMetadata( } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: getBackupInfo * Signature: (J)Ljava/util/List; */ -jobject Java_org_rocksdb_BackupEngine_getBackupInfo(JNIEnv* env, +jobject Java_org_forstdb_BackupEngine_getBackupInfo(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle) { auto* backup_engine = @@ -107,11 +107,11 @@ jobject Java_org_rocksdb_BackupEngine_getBackupInfo(JNIEnv* env, } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: getCorruptedBackups * Signature: (J)[I */ -jintArray Java_org_rocksdb_BackupEngine_getCorruptedBackups(JNIEnv* env, +jintArray Java_org_forstdb_BackupEngine_getCorruptedBackups(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle) { auto* backup_engine = @@ -135,11 +135,11 @@ jintArray Java_org_rocksdb_BackupEngine_getCorruptedBackups(JNIEnv* env, } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: garbageCollect * Signature: (J)V */ -void Java_org_rocksdb_BackupEngine_garbageCollect(JNIEnv* env, jobject /*jbe*/, +void Java_org_forstdb_BackupEngine_garbageCollect(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle) { auto* backup_engine = reinterpret_cast(jbe_handle); @@ -153,11 +153,11 @@ void Java_org_rocksdb_BackupEngine_garbageCollect(JNIEnv* env, jobject /*jbe*/, } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: purgeOldBackups * Signature: (JI)V */ -void Java_org_rocksdb_BackupEngine_purgeOldBackups(JNIEnv* env, jobject /*jbe*/, +void Java_org_forstdb_BackupEngine_purgeOldBackups(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jnum_backups_to_keep) { auto* backup_engine = @@ -173,11 +173,11 @@ void Java_org_rocksdb_BackupEngine_purgeOldBackups(JNIEnv* env, jobject /*jbe*/, } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: deleteBackup * Signature: (JI)V */ -void Java_org_rocksdb_BackupEngine_deleteBackup(JNIEnv* env, jobject /*jbe*/, +void Java_org_forstdb_BackupEngine_deleteBackup(JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jbackup_id) { auto* backup_engine = @@ -193,11 +193,11 @@ void Java_org_rocksdb_BackupEngine_deleteBackup(JNIEnv* env, jobject /*jbe*/, } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: restoreDbFromBackup * Signature: (JILjava/lang/String;Ljava/lang/String;J)V */ -void Java_org_rocksdb_BackupEngine_restoreDbFromBackup( +void Java_org_forstdb_BackupEngine_restoreDbFromBackup( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jint jbackup_id, jstring jdb_dir, jstring jwal_dir, jlong jrestore_options_handle) { auto* backup_engine = @@ -230,11 +230,11 @@ void Java_org_rocksdb_BackupEngine_restoreDbFromBackup( } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: restoreDbFromLatestBackup * Signature: (JLjava/lang/String;Ljava/lang/String;J)V */ -void Java_org_rocksdb_BackupEngine_restoreDbFromLatestBackup( +void Java_org_forstdb_BackupEngine_restoreDbFromLatestBackup( JNIEnv* env, jobject /*jbe*/, jlong jbe_handle, jstring jdb_dir, jstring jwal_dir, jlong jrestore_options_handle) { auto* backup_engine = @@ -266,11 +266,11 @@ void Java_org_rocksdb_BackupEngine_restoreDbFromLatestBackup( } /* - * Class: org_rocksdb_BackupEngine + * Class: org_forstdb_BackupEngine * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_BackupEngine_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_BackupEngine_disposeInternal(JNIEnv* /*env*/, jobject /*jbe*/, jlong jbe_handle) { auto* be = reinterpret_cast(jbe_handle); diff --git a/java/rocksjni/cache.cc b/java/forstjni/cache.cc similarity index 78% rename from java/rocksjni/cache.cc rename to java/forstjni/cache.cc index 5ca1d5175..a1c863d35 100644 --- a/java/rocksjni/cache.cc +++ b/java/forstjni/cache.cc @@ -8,26 +8,26 @@ #include -#include "include/org_rocksdb_Cache.h" +#include "include/org_forstdb_Cache.h" #include "rocksdb/advanced_cache.h" /* - * Class: org_rocksdb_Cache + * Class: org_forstdb_Cache * Method: getUsage * Signature: (J)J */ -jlong Java_org_rocksdb_Cache_getUsage(JNIEnv*, jclass, jlong jhandle) { +jlong Java_org_forstdb_Cache_getUsage(JNIEnv*, jclass, jlong jhandle) { auto* sptr_cache = reinterpret_cast*>(jhandle); return static_cast(sptr_cache->get()->GetUsage()); } /* - * Class: org_rocksdb_Cache + * Class: org_forstdb_Cache * Method: getPinnedUsage * Signature: (J)J */ -jlong Java_org_rocksdb_Cache_getPinnedUsage(JNIEnv*, jclass, jlong jhandle) { +jlong Java_org_forstdb_Cache_getPinnedUsage(JNIEnv*, jclass, jlong jhandle) { auto* sptr_cache = reinterpret_cast*>(jhandle); return static_cast(sptr_cache->get()->GetPinnedUsage()); diff --git a/java/rocksjni/cassandra_compactionfilterjni.cc b/java/forstjni/cassandra_compactionfilterjni.cc similarity index 78% rename from java/rocksjni/cassandra_compactionfilterjni.cc rename to java/forstjni/cassandra_compactionfilterjni.cc index 25817aeca..805f31051 100644 --- a/java/rocksjni/cassandra_compactionfilterjni.cc +++ b/java/forstjni/cassandra_compactionfilterjni.cc @@ -5,16 +5,16 @@ #include -#include "include/org_rocksdb_CassandraCompactionFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "include/org_forstdb_CassandraCompactionFilter.h" +#include "forstjni/cplusplus_to_java_convert.h" #include "utilities/cassandra/cassandra_compaction_filter.h" /* - * Class: org_rocksdb_CassandraCompactionFilter + * Class: org_forstdb_CassandraCompactionFilter * Method: createNewCassandraCompactionFilter0 * Signature: (ZI)J */ -jlong Java_org_rocksdb_CassandraCompactionFilter_createNewCassandraCompactionFilter0( +jlong Java_org_forstdb_CassandraCompactionFilter_createNewCassandraCompactionFilter0( JNIEnv* /*env*/, jclass /*jcls*/, jboolean purge_ttl_on_expiration, jint gc_grace_period_in_seconds) { auto* compaction_filter = diff --git a/java/rocksjni/cassandra_value_operator.cc b/java/forstjni/cassandra_value_operator.cc similarity index 77% rename from java/rocksjni/cassandra_value_operator.cc rename to java/forstjni/cassandra_value_operator.cc index 6de28c1b1..46f4caae5 100644 --- a/java/rocksjni/cassandra_value_operator.cc +++ b/java/forstjni/cassandra_value_operator.cc @@ -10,7 +10,7 @@ #include #include -#include "include/org_rocksdb_CassandraValueMergeOperator.h" +#include "include/org_forstdb_CassandraValueMergeOperator.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/merge_operator.h" @@ -18,16 +18,16 @@ #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" #include "utilities/cassandra/merge_operator.h" /* - * Class: org_rocksdb_CassandraValueMergeOperator + * Class: org_forstdb_CassandraValueMergeOperator * Method: newSharedCassandraValueMergeOperator * Signature: (II)J */ -jlong Java_org_rocksdb_CassandraValueMergeOperator_newSharedCassandraValueMergeOperator( +jlong Java_org_forstdb_CassandraValueMergeOperator_newSharedCassandraValueMergeOperator( JNIEnv* /*env*/, jclass /*jclazz*/, jint gcGracePeriodInSeconds, jint operands_limit) { auto* op = new std::shared_ptr( @@ -37,11 +37,11 @@ jlong Java_org_rocksdb_CassandraValueMergeOperator_newSharedCassandraValueMergeO } /* - * Class: org_rocksdb_CassandraValueMergeOperator + * Class: org_forstdb_CassandraValueMergeOperator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CassandraValueMergeOperator_disposeInternal( +void Java_org_forstdb_CassandraValueMergeOperator_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* op = reinterpret_cast*>( diff --git a/java/rocksjni/checkpoint.cc b/java/forstjni/checkpoint.cc similarity index 85% rename from java/rocksjni/checkpoint.cc rename to java/forstjni/checkpoint.cc index cef5f3ca8..dd689b5aa 100644 --- a/java/rocksjni/checkpoint.cc +++ b/java/forstjni/checkpoint.cc @@ -14,16 +14,16 @@ #include -#include "include/org_rocksdb_Checkpoint.h" +#include "include/org_forstdb_Checkpoint.h" #include "rocksdb/db.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_Checkpoint + * Class: org_forstdb_Checkpoint * Method: newCheckpoint * Signature: (J)J */ -jlong Java_org_rocksdb_Checkpoint_newCheckpoint(JNIEnv* /*env*/, +jlong Java_org_forstdb_Checkpoint_newCheckpoint(JNIEnv* /*env*/, jclass /*jclazz*/, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -33,11 +33,11 @@ jlong Java_org_rocksdb_Checkpoint_newCheckpoint(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Checkpoint + * Class: org_forstdb_Checkpoint * Method: dispose * Signature: (J)V */ -void Java_org_rocksdb_Checkpoint_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_Checkpoint_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* checkpoint = reinterpret_cast(jhandle); @@ -46,11 +46,11 @@ void Java_org_rocksdb_Checkpoint_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Checkpoint + * Class: org_forstdb_Checkpoint * Method: createCheckpoint * Signature: (JLjava/lang/String;)V */ -void Java_org_rocksdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, jlong jcheckpoint_handle, jstring jcheckpoint_path) { const char* checkpoint_path = env->GetStringUTFChars(jcheckpoint_path, 0); @@ -71,11 +71,11 @@ void Java_org_rocksdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Checkpoint + * Class: org_forstdb_Checkpoint * Method: exportColumnFamily * Signature: (JJLjava/lang/String;)Lorg/rocksdb/ExportImportFilesMetaData; */ -jlong Java_org_rocksdb_Checkpoint_exportColumnFamily( +jlong Java_org_forstdb_Checkpoint_exportColumnFamily( JNIEnv* env, jobject /*jobj*/, jlong jcheckpoint_handle, jlong jcolumn_family_handle, jstring jexport_path) { const char* export_path = env->GetStringUTFChars(jexport_path, 0); diff --git a/java/rocksjni/clock_cache.cc b/java/forstjni/clock_cache.cc similarity index 81% rename from java/rocksjni/clock_cache.cc rename to java/forstjni/clock_cache.cc index e04991aa9..e5778d15e 100644 --- a/java/rocksjni/clock_cache.cc +++ b/java/forstjni/clock_cache.cc @@ -10,15 +10,15 @@ #include -#include "include/org_rocksdb_ClockCache.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "include/org_forstdb_ClockCache.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_ClockCache + * Class: org_forstdb_ClockCache * Method: newClockCache * Signature: (JIZ)J */ -jlong Java_org_rocksdb_ClockCache_newClockCache( +jlong Java_org_forstdb_ClockCache_newClockCache( JNIEnv* /*env*/, jclass /*jcls*/, jlong jcapacity, jint jnum_shard_bits, jboolean jstrict_capacity_limit) { auto* sptr_clock_cache = new std::shared_ptr( @@ -29,11 +29,11 @@ jlong Java_org_rocksdb_ClockCache_newClockCache( } /* - * Class: org_rocksdb_ClockCache + * Class: org_forstdb_ClockCache * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ClockCache_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_ClockCache_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_clock_cache = diff --git a/java/rocksjni/columnfamilyhandle.cc b/java/forstjni/columnfamilyhandle.cc similarity index 80% rename from java/rocksjni/columnfamilyhandle.cc rename to java/forstjni/columnfamilyhandle.cc index 4140580f0..abca5ff5f 100644 --- a/java/rocksjni/columnfamilyhandle.cc +++ b/java/forstjni/columnfamilyhandle.cc @@ -10,15 +10,15 @@ #include #include -#include "include/org_rocksdb_ColumnFamilyHandle.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_ColumnFamilyHandle.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_ColumnFamilyHandle + * Class: org_forstdb_ColumnFamilyHandle * Method: getName * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_ColumnFamilyHandle_getName(JNIEnv* env, +jbyteArray Java_org_forstdb_ColumnFamilyHandle_getName(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); @@ -27,11 +27,11 @@ jbyteArray Java_org_rocksdb_ColumnFamilyHandle_getName(JNIEnv* env, } /* - * Class: org_rocksdb_ColumnFamilyHandle + * Class: org_forstdb_ColumnFamilyHandle * Method: getID * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyHandle_getID(JNIEnv* /*env*/, +jint Java_org_forstdb_ColumnFamilyHandle_getID(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); @@ -40,11 +40,11 @@ jint Java_org_rocksdb_ColumnFamilyHandle_getID(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_ColumnFamilyHandle + * Class: org_forstdb_ColumnFamilyHandle * Method: getDescriptor * Signature: (J)Lorg/rocksdb/ColumnFamilyDescriptor; */ -jobject Java_org_rocksdb_ColumnFamilyHandle_getDescriptor(JNIEnv* env, +jobject Java_org_forstdb_ColumnFamilyHandle_getDescriptor(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); @@ -59,11 +59,11 @@ jobject Java_org_rocksdb_ColumnFamilyHandle_getDescriptor(JNIEnv* env, } /* - * Class: org_rocksdb_ColumnFamilyHandle + * Class: org_forstdb_ColumnFamilyHandle * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ColumnFamilyHandle_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_ColumnFamilyHandle_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* cfh = reinterpret_cast(jhandle); diff --git a/java/rocksjni/compact_range_options.cc b/java/forstjni/compact_range_options.cc similarity index 70% rename from java/rocksjni/compact_range_options.cc rename to java/forstjni/compact_range_options.cc index d07263ab6..89d8a9156 100644 --- a/java/rocksjni/compact_range_options.cc +++ b/java/forstjni/compact_range_options.cc @@ -8,10 +8,10 @@ #include -#include "include/org_rocksdb_CompactRangeOptions.h" +#include "include/org_forstdb_CompactRangeOptions.h" #include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" #include "util/coding.h" /** @@ -23,7 +23,7 @@ * maintain the lifetime of these parameters (`full_history_ts_low`, `canceled`) * by including their values in this class. */ -class Java_org_rocksdb_CompactRangeOptions { +class Java_org_forstdb_CompactRangeOptions { public: ROCKSDB_NAMESPACE::CompactRangeOptions compactRangeOptions; @@ -64,229 +64,229 @@ class Java_org_rocksdb_CompactRangeOptions { }; /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: newCompactRangeOptions * Signature: ()J */ -jlong Java_org_rocksdb_CompactRangeOptions_newCompactRangeOptions( +jlong Java_org_forstdb_CompactRangeOptions_newCompactRangeOptions( JNIEnv* /*env*/, jclass /*jclazz*/) { - auto* options = new Java_org_rocksdb_CompactRangeOptions(); + auto* options = new Java_org_forstdb_CompactRangeOptions(); return GET_CPLUSPLUS_POINTER(&options->compactRangeOptions); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: exclusiveManualCompaction * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactRangeOptions_exclusiveManualCompaction( +jboolean Java_org_forstdb_CompactRangeOptions_exclusiveManualCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return static_cast( options->compactRangeOptions.exclusive_manual_compaction); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setExclusiveManualCompaction * Signature: (JZ)V */ -void Java_org_rocksdb_CompactRangeOptions_setExclusiveManualCompaction( +void Java_org_forstdb_CompactRangeOptions_setExclusiveManualCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean exclusive_manual_compaction) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.exclusive_manual_compaction = static_cast(exclusive_manual_compaction); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: bottommostLevelCompaction * Signature: (J)I */ -jint Java_org_rocksdb_CompactRangeOptions_bottommostLevelCompaction( +jint Java_org_forstdb_CompactRangeOptions_bottommostLevelCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::BottommostLevelCompactionJni:: toJavaBottommostLevelCompaction( options->compactRangeOptions.bottommost_level_compaction); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setBottommostLevelCompaction * Signature: (JI)V */ -void Java_org_rocksdb_CompactRangeOptions_setBottommostLevelCompaction( +void Java_org_forstdb_CompactRangeOptions_setBottommostLevelCompaction( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint bottommost_level_compaction) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.bottommost_level_compaction = ROCKSDB_NAMESPACE::BottommostLevelCompactionJni:: toCppBottommostLevelCompaction(bottommost_level_compaction); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: changeLevel * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactRangeOptions_changeLevel(JNIEnv* /*env*/, +jboolean Java_org_forstdb_CompactRangeOptions_changeLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return static_cast(options->compactRangeOptions.change_level); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setChangeLevel * Signature: (JZ)V */ -void Java_org_rocksdb_CompactRangeOptions_setChangeLevel( +void Java_org_forstdb_CompactRangeOptions_setChangeLevel( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean change_level) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.change_level = static_cast(change_level); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: targetLevel * Signature: (J)I */ -jint Java_org_rocksdb_CompactRangeOptions_targetLevel(JNIEnv* /*env*/, +jint Java_org_forstdb_CompactRangeOptions_targetLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return static_cast(options->compactRangeOptions.target_level); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setTargetLevel * Signature: (JI)V */ -void Java_org_rocksdb_CompactRangeOptions_setTargetLevel(JNIEnv* /*env*/, +void Java_org_forstdb_CompactRangeOptions_setTargetLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint target_level) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.target_level = static_cast(target_level); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: targetPathId * Signature: (J)I */ -jint Java_org_rocksdb_CompactRangeOptions_targetPathId(JNIEnv* /*env*/, +jint Java_org_forstdb_CompactRangeOptions_targetPathId(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return static_cast(options->compactRangeOptions.target_path_id); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setTargetPathId * Signature: (JI)V */ -void Java_org_rocksdb_CompactRangeOptions_setTargetPathId(JNIEnv* /*env*/, +void Java_org_forstdb_CompactRangeOptions_setTargetPathId(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint target_path_id) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.target_path_id = static_cast(target_path_id); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: allowWriteStall * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactRangeOptions_allowWriteStall(JNIEnv* /*env*/, +jboolean Java_org_forstdb_CompactRangeOptions_allowWriteStall(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return static_cast(options->compactRangeOptions.allow_write_stall); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setAllowWriteStall * Signature: (JZ)V */ -void Java_org_rocksdb_CompactRangeOptions_setAllowWriteStall( +void Java_org_forstdb_CompactRangeOptions_setAllowWriteStall( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean allow_write_stall) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.allow_write_stall = static_cast(allow_write_stall); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: maxSubcompactions * Signature: (J)I */ -jint Java_org_rocksdb_CompactRangeOptions_maxSubcompactions(JNIEnv* /*env*/, +jint Java_org_forstdb_CompactRangeOptions_maxSubcompactions(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return static_cast(options->compactRangeOptions.max_subcompactions); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setMaxSubcompactions * Signature: (JI)V */ -void Java_org_rocksdb_CompactRangeOptions_setMaxSubcompactions( +void Java_org_forstdb_CompactRangeOptions_setMaxSubcompactions( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jint max_subcompactions) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->compactRangeOptions.max_subcompactions = static_cast(max_subcompactions); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setFullHistoryTSLow * Signature: (JJJ)V */ -void Java_org_rocksdb_CompactRangeOptions_setFullHistoryTSLow(JNIEnv*, jobject, +void Java_org_forstdb_CompactRangeOptions_setFullHistoryTSLow(JNIEnv*, jobject, jlong jhandle, jlong start, jlong range) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->set_full_history_ts_low(start, range); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: fullHistoryTSLow * Signature: (J)Lorg/rocksdb/CompactRangeOptions/Timestamp; */ -jobject Java_org_rocksdb_CompactRangeOptions_fullHistoryTSLow(JNIEnv* env, +jobject Java_org_forstdb_CompactRangeOptions_fullHistoryTSLow(JNIEnv* env, jobject, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); uint64_t start; uint64_t range; jobject result = nullptr; @@ -300,39 +300,39 @@ jobject Java_org_rocksdb_CompactRangeOptions_fullHistoryTSLow(JNIEnv* env, } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: setCanceled * Signature: (JZ)V */ -void Java_org_rocksdb_CompactRangeOptions_setCanceled(JNIEnv*, jobject, +void Java_org_forstdb_CompactRangeOptions_setCanceled(JNIEnv*, jobject, jlong jhandle, jboolean jcanceled) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); options->set_canceled(jcanceled); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: canceled * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactRangeOptions_canceled(JNIEnv*, jobject, +jboolean Java_org_forstdb_CompactRangeOptions_canceled(JNIEnv*, jobject, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); return options->get_canceled(); } /* - * Class: org_rocksdb_CompactRangeOptions + * Class: org_forstdb_CompactRangeOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompactRangeOptions_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_CompactRangeOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* options = - reinterpret_cast(jhandle); + reinterpret_cast(jhandle); delete options; } diff --git a/java/rocksjni/compaction_filter.cc b/java/forstjni/compaction_filter.cc similarity index 83% rename from java/rocksjni/compaction_filter.cc rename to java/forstjni/compaction_filter.cc index ea04996ac..f45234896 100644 --- a/java/rocksjni/compaction_filter.cc +++ b/java/forstjni/compaction_filter.cc @@ -10,16 +10,16 @@ #include -#include "include/org_rocksdb_AbstractCompactionFilter.h" +#include "include/org_forstdb_AbstractCompactionFilter.h" // /* - * Class: org_rocksdb_AbstractCompactionFilter + * Class: org_forstdb_AbstractCompactionFilter * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_AbstractCompactionFilter_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_AbstractCompactionFilter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* cf = reinterpret_cast(handle); diff --git a/java/rocksjni/compaction_filter_factory.cc b/java/forstjni/compaction_filter_factory.cc similarity index 71% rename from java/rocksjni/compaction_filter_factory.cc rename to java/forstjni/compaction_filter_factory.cc index 16fbdbbdd..5f68420c3 100644 --- a/java/rocksjni/compaction_filter_factory.cc +++ b/java/forstjni/compaction_filter_factory.cc @@ -10,16 +10,16 @@ #include -#include "include/org_rocksdb_AbstractCompactionFilterFactory.h" -#include "rocksjni/compaction_filter_factory_jnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "include/org_forstdb_AbstractCompactionFilterFactory.h" +#include "forstjni/compaction_filter_factory_jnicallback.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_AbstractCompactionFilterFactory + * Class: org_forstdb_AbstractCompactionFilterFactory * Method: createNewCompactionFilterFactory0 * Signature: ()J */ -jlong Java_org_rocksdb_AbstractCompactionFilterFactory_createNewCompactionFilterFactory0( +jlong Java_org_forstdb_AbstractCompactionFilterFactory_createNewCompactionFilterFactory0( JNIEnv* env, jobject jobj) { auto* cff = new ROCKSDB_NAMESPACE::CompactionFilterFactoryJniCallback(env, jobj); @@ -29,11 +29,11 @@ jlong Java_org_rocksdb_AbstractCompactionFilterFactory_createNewCompactionFilter } /* - * Class: org_rocksdb_AbstractCompactionFilterFactory + * Class: org_forstdb_AbstractCompactionFilterFactory * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_AbstractCompactionFilterFactory_disposeInternal( +void Java_org_forstdb_AbstractCompactionFilterFactory_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* ptr_sptr_cff = reinterpret_cast< std::shared_ptr*>( diff --git a/java/rocksjni/compaction_filter_factory_jnicallback.cc b/java/forstjni/compaction_filter_factory_jnicallback.cc similarity index 96% rename from java/rocksjni/compaction_filter_factory_jnicallback.cc rename to java/forstjni/compaction_filter_factory_jnicallback.cc index 14285526f..ccf08eb0b 100644 --- a/java/rocksjni/compaction_filter_factory_jnicallback.cc +++ b/java/forstjni/compaction_filter_factory_jnicallback.cc @@ -6,9 +6,9 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::CompactionFilterFactory. -#include "rocksjni/compaction_filter_factory_jnicallback.h" +#include "forstjni/compaction_filter_factory_jnicallback.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { CompactionFilterFactoryJniCallback::CompactionFilterFactoryJniCallback( diff --git a/java/rocksjni/compaction_filter_factory_jnicallback.h b/java/forstjni/compaction_filter_factory_jnicallback.h similarity index 97% rename from java/rocksjni/compaction_filter_factory_jnicallback.h rename to java/forstjni/compaction_filter_factory_jnicallback.h index 2f26f8dbe..c8f1e718b 100644 --- a/java/rocksjni/compaction_filter_factory_jnicallback.h +++ b/java/forstjni/compaction_filter_factory_jnicallback.h @@ -14,7 +14,7 @@ #include #include "rocksdb/compaction_filter.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/compaction_job_info.cc b/java/forstjni/compaction_job_info.cc similarity index 79% rename from java/rocksjni/compaction_job_info.cc rename to java/forstjni/compaction_job_info.cc index fb292f59c..b6bef26e1 100644 --- a/java/rocksjni/compaction_job_info.cc +++ b/java/forstjni/compaction_job_info.cc @@ -8,27 +8,27 @@ #include -#include "include/org_rocksdb_CompactionJobInfo.h" +#include "include/org_forstdb_CompactionJobInfo.h" #include "rocksdb/listener.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: newCompactionJobInfo * Signature: ()J */ -jlong Java_org_rocksdb_CompactionJobInfo_newCompactionJobInfo(JNIEnv*, jclass) { +jlong Java_org_forstdb_CompactionJobInfo_newCompactionJobInfo(JNIEnv*, jclass) { auto* compact_job_info = new ROCKSDB_NAMESPACE::CompactionJobInfo(); return GET_CPLUSPLUS_POINTER(compact_job_info); } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompactionJobInfo_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_CompactionJobInfo_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -36,11 +36,11 @@ void Java_org_rocksdb_CompactionJobInfo_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: columnFamilyName * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_CompactionJobInfo_columnFamilyName(JNIEnv* env, +jbyteArray Java_org_forstdb_CompactionJobInfo_columnFamilyName(JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = @@ -49,11 +49,11 @@ jbyteArray Java_org_rocksdb_CompactionJobInfo_columnFamilyName(JNIEnv* env, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: status * Signature: (J)Lorg/rocksdb/Status; */ -jobject Java_org_rocksdb_CompactionJobInfo_status(JNIEnv* env, jclass, +jobject Java_org_forstdb_CompactionJobInfo_status(JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -61,11 +61,11 @@ jobject Java_org_rocksdb_CompactionJobInfo_status(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: threadId * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobInfo_threadId(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobInfo_threadId(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -73,22 +73,22 @@ jlong Java_org_rocksdb_CompactionJobInfo_threadId(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: jobId * Signature: (J)I */ -jint Java_org_rocksdb_CompactionJobInfo_jobId(JNIEnv*, jclass, jlong jhandle) { +jint Java_org_forstdb_CompactionJobInfo_jobId(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); return static_cast(compact_job_info->job_id); } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: baseInputLevel * Signature: (J)I */ -jint Java_org_rocksdb_CompactionJobInfo_baseInputLevel(JNIEnv*, jclass, +jint Java_org_forstdb_CompactionJobInfo_baseInputLevel(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -96,11 +96,11 @@ jint Java_org_rocksdb_CompactionJobInfo_baseInputLevel(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: outputLevel * Signature: (J)I */ -jint Java_org_rocksdb_CompactionJobInfo_outputLevel(JNIEnv*, jclass, +jint Java_org_forstdb_CompactionJobInfo_outputLevel(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -108,11 +108,11 @@ jint Java_org_rocksdb_CompactionJobInfo_outputLevel(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: inputFiles * Signature: (J)[Ljava/lang/String; */ -jobjectArray Java_org_rocksdb_CompactionJobInfo_inputFiles(JNIEnv* env, jclass, +jobjectArray Java_org_forstdb_CompactionJobInfo_inputFiles(JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -121,11 +121,11 @@ jobjectArray Java_org_rocksdb_CompactionJobInfo_inputFiles(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: outputFiles * Signature: (J)[Ljava/lang/String; */ -jobjectArray Java_org_rocksdb_CompactionJobInfo_outputFiles(JNIEnv* env, jclass, +jobjectArray Java_org_forstdb_CompactionJobInfo_outputFiles(JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -134,11 +134,11 @@ jobjectArray Java_org_rocksdb_CompactionJobInfo_outputFiles(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: tableProperties * Signature: (J)Ljava/util/Map; */ -jobject Java_org_rocksdb_CompactionJobInfo_tableProperties(JNIEnv* env, jclass, +jobject Java_org_forstdb_CompactionJobInfo_tableProperties(JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -191,11 +191,11 @@ jobject Java_org_rocksdb_CompactionJobInfo_tableProperties(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: compactionReason * Signature: (J)B */ -jbyte Java_org_rocksdb_CompactionJobInfo_compactionReason(JNIEnv*, jclass, +jbyte Java_org_forstdb_CompactionJobInfo_compactionReason(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -204,11 +204,11 @@ jbyte Java_org_rocksdb_CompactionJobInfo_compactionReason(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: compression * Signature: (J)B */ -jbyte Java_org_rocksdb_CompactionJobInfo_compression(JNIEnv*, jclass, +jbyte Java_org_forstdb_CompactionJobInfo_compression(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); @@ -217,11 +217,11 @@ jbyte Java_org_rocksdb_CompactionJobInfo_compression(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobInfo + * Class: org_forstdb_CompactionJobInfo * Method: stats * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobInfo_stats(JNIEnv*, jclass, jlong jhandle) { +jlong Java_org_forstdb_CompactionJobInfo_stats(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_info = reinterpret_cast(jhandle); auto* stats = new ROCKSDB_NAMESPACE::CompactionJobStats(); diff --git a/java/rocksjni/compaction_job_stats.cc b/java/forstjni/compaction_job_stats.cc similarity index 74% rename from java/rocksjni/compaction_job_stats.cc rename to java/forstjni/compaction_job_stats.cc index a2599c132..ca009e4af 100644 --- a/java/rocksjni/compaction_job_stats.cc +++ b/java/forstjni/compaction_job_stats.cc @@ -10,27 +10,27 @@ #include -#include "include/org_rocksdb_CompactionJobStats.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_CompactionJobStats.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: newCompactionJobStats * Signature: ()J */ -jlong Java_org_rocksdb_CompactionJobStats_newCompactionJobStats(JNIEnv*, +jlong Java_org_forstdb_CompactionJobStats_newCompactionJobStats(JNIEnv*, jclass) { auto* compact_job_stats = new ROCKSDB_NAMESPACE::CompactionJobStats(); return GET_CPLUSPLUS_POINTER(compact_job_stats); } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompactionJobStats_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_CompactionJobStats_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -38,22 +38,22 @@ void Java_org_rocksdb_CompactionJobStats_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: reset * Signature: (J)V */ -void Java_org_rocksdb_CompactionJobStats_reset(JNIEnv*, jclass, jlong jhandle) { +void Java_org_forstdb_CompactionJobStats_reset(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); compact_job_stats->Reset(); } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: add * Signature: (JJ)V */ -void Java_org_rocksdb_CompactionJobStats_add(JNIEnv*, jclass, jlong jhandle, +void Java_org_forstdb_CompactionJobStats_add(JNIEnv*, jclass, jlong jhandle, jlong jother_handle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -63,11 +63,11 @@ void Java_org_rocksdb_CompactionJobStats_add(JNIEnv*, jclass, jlong jhandle, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: elapsedMicros * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_elapsedMicros(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_elapsedMicros(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -75,11 +75,11 @@ jlong Java_org_rocksdb_CompactionJobStats_elapsedMicros(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numInputRecords * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numInputRecords(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numInputRecords(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -87,11 +87,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numInputRecords(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numInputFiles * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numInputFiles(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numInputFiles(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -99,11 +99,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numInputFiles(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numInputFilesAtOutputLevel * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numInputFilesAtOutputLevel( +jlong Java_org_forstdb_CompactionJobStats_numInputFilesAtOutputLevel( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -111,11 +111,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numInputFilesAtOutputLevel( } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numOutputRecords * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numOutputRecords(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numOutputRecords(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -123,11 +123,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numOutputRecords(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numOutputFiles * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numOutputFiles(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numOutputFiles(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -135,11 +135,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numOutputFiles(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: isManualCompaction * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactionJobStats_isManualCompaction(JNIEnv*, jclass, +jboolean Java_org_forstdb_CompactionJobStats_isManualCompaction(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -151,11 +151,11 @@ jboolean Java_org_rocksdb_CompactionJobStats_isManualCompaction(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: totalInputBytes * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_totalInputBytes(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_totalInputBytes(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -163,11 +163,11 @@ jlong Java_org_rocksdb_CompactionJobStats_totalInputBytes(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: totalOutputBytes * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_totalOutputBytes(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_totalOutputBytes(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -175,11 +175,11 @@ jlong Java_org_rocksdb_CompactionJobStats_totalOutputBytes(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numRecordsReplaced * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numRecordsReplaced(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numRecordsReplaced(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -187,11 +187,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numRecordsReplaced(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: totalInputRawKeyBytes * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_totalInputRawKeyBytes(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_totalInputRawKeyBytes(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -199,11 +199,11 @@ jlong Java_org_rocksdb_CompactionJobStats_totalInputRawKeyBytes(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: totalInputRawValueBytes * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_totalInputRawValueBytes( +jlong Java_org_forstdb_CompactionJobStats_totalInputRawValueBytes( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -211,11 +211,11 @@ jlong Java_org_rocksdb_CompactionJobStats_totalInputRawValueBytes( } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numInputDeletionRecords * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numInputDeletionRecords( +jlong Java_org_forstdb_CompactionJobStats_numInputDeletionRecords( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -223,11 +223,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numInputDeletionRecords( } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numExpiredDeletionRecords * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numExpiredDeletionRecords( +jlong Java_org_forstdb_CompactionJobStats_numExpiredDeletionRecords( JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -235,11 +235,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numExpiredDeletionRecords( } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numCorruptKeys * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numCorruptKeys(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numCorruptKeys(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -247,11 +247,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numCorruptKeys(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: fileWriteNanos * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_fileWriteNanos(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_fileWriteNanos(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -259,11 +259,11 @@ jlong Java_org_rocksdb_CompactionJobStats_fileWriteNanos(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: fileRangeSyncNanos * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_fileRangeSyncNanos(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_fileRangeSyncNanos(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -271,11 +271,11 @@ jlong Java_org_rocksdb_CompactionJobStats_fileRangeSyncNanos(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: fileFsyncNanos * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_fileFsyncNanos(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_fileFsyncNanos(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -283,11 +283,11 @@ jlong Java_org_rocksdb_CompactionJobStats_fileFsyncNanos(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: filePrepareWriteNanos * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_filePrepareWriteNanos(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_filePrepareWriteNanos(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -295,11 +295,11 @@ jlong Java_org_rocksdb_CompactionJobStats_filePrepareWriteNanos(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: smallestOutputKeyPrefix * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_CompactionJobStats_smallestOutputKeyPrefix( +jbyteArray Java_org_forstdb_CompactionJobStats_smallestOutputKeyPrefix( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -308,11 +308,11 @@ jbyteArray Java_org_rocksdb_CompactionJobStats_smallestOutputKeyPrefix( } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: largestOutputKeyPrefix * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_CompactionJobStats_largestOutputKeyPrefix( +jbyteArray Java_org_forstdb_CompactionJobStats_largestOutputKeyPrefix( JNIEnv* env, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -321,11 +321,11 @@ jbyteArray Java_org_rocksdb_CompactionJobStats_largestOutputKeyPrefix( } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numSingleDelFallthru * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numSingleDelFallthru(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numSingleDelFallthru(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); @@ -333,11 +333,11 @@ jlong Java_org_rocksdb_CompactionJobStats_numSingleDelFallthru(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionJobStats + * Class: org_forstdb_CompactionJobStats * Method: numSingleDelMismatch * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionJobStats_numSingleDelMismatch(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionJobStats_numSingleDelMismatch(JNIEnv*, jclass, jlong jhandle) { auto* compact_job_stats = reinterpret_cast(jhandle); diff --git a/java/rocksjni/compaction_options.cc b/java/forstjni/compaction_options.cc similarity index 74% rename from java/rocksjni/compaction_options.cc rename to java/forstjni/compaction_options.cc index bbbde0313..0cf3e92df 100644 --- a/java/rocksjni/compaction_options.cc +++ b/java/forstjni/compaction_options.cc @@ -8,27 +8,27 @@ #include -#include "include/org_rocksdb_CompactionOptions.h" +#include "include/org_forstdb_CompactionOptions.h" #include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: newCompactionOptions * Signature: ()J */ -jlong Java_org_rocksdb_CompactionOptions_newCompactionOptions(JNIEnv*, jclass) { +jlong Java_org_forstdb_CompactionOptions_newCompactionOptions(JNIEnv*, jclass) { auto* compact_opts = new ROCKSDB_NAMESPACE::CompactionOptions(); return GET_CPLUSPLUS_POINTER(compact_opts); } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompactionOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_CompactionOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); @@ -36,11 +36,11 @@ void Java_org_rocksdb_CompactionOptions_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: compression * Signature: (J)B */ -jbyte Java_org_rocksdb_CompactionOptions_compression(JNIEnv*, jclass, +jbyte Java_org_forstdb_CompactionOptions_compression(JNIEnv*, jclass, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); @@ -49,11 +49,11 @@ jbyte Java_org_rocksdb_CompactionOptions_compression(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: setCompression * Signature: (JB)V */ -void Java_org_rocksdb_CompactionOptions_setCompression( +void Java_org_forstdb_CompactionOptions_setCompression( JNIEnv*, jclass, jlong jhandle, jbyte jcompression_type_value) { auto* compact_opts = reinterpret_cast(jhandle); @@ -63,11 +63,11 @@ void Java_org_rocksdb_CompactionOptions_setCompression( } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: outputFileSizeLimit * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionOptions_outputFileSizeLimit(JNIEnv*, jclass, +jlong Java_org_forstdb_CompactionOptions_outputFileSizeLimit(JNIEnv*, jclass, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); @@ -75,11 +75,11 @@ jlong Java_org_rocksdb_CompactionOptions_outputFileSizeLimit(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: setOutputFileSizeLimit * Signature: (JJ)V */ -void Java_org_rocksdb_CompactionOptions_setOutputFileSizeLimit( +void Java_org_forstdb_CompactionOptions_setOutputFileSizeLimit( JNIEnv*, jclass, jlong jhandle, jlong joutput_file_size_limit) { auto* compact_opts = reinterpret_cast(jhandle); @@ -88,11 +88,11 @@ void Java_org_rocksdb_CompactionOptions_setOutputFileSizeLimit( } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: maxSubcompactions * Signature: (J)I */ -jint Java_org_rocksdb_CompactionOptions_maxSubcompactions(JNIEnv*, jclass, +jint Java_org_forstdb_CompactionOptions_maxSubcompactions(JNIEnv*, jclass, jlong jhandle) { auto* compact_opts = reinterpret_cast(jhandle); @@ -100,11 +100,11 @@ jint Java_org_rocksdb_CompactionOptions_maxSubcompactions(JNIEnv*, jclass, } /* - * Class: org_rocksdb_CompactionOptions + * Class: org_forstdb_CompactionOptions * Method: setMaxSubcompactions * Signature: (JI)V */ -void Java_org_rocksdb_CompactionOptions_setMaxSubcompactions( +void Java_org_forstdb_CompactionOptions_setMaxSubcompactions( JNIEnv*, jclass, jlong jhandle, jint jmax_subcompactions) { auto* compact_opts = reinterpret_cast(jhandle); diff --git a/java/rocksjni/compaction_options_fifo.cc b/java/forstjni/compaction_options_fifo.cc similarity index 73% rename from java/rocksjni/compaction_options_fifo.cc rename to java/forstjni/compaction_options_fifo.cc index f6a47fec5..3a4bf5c25 100644 --- a/java/rocksjni/compaction_options_fifo.cc +++ b/java/forstjni/compaction_options_fifo.cc @@ -8,27 +8,27 @@ #include -#include "include/org_rocksdb_CompactionOptionsFIFO.h" +#include "include/org_forstdb_CompactionOptionsFIFO.h" #include "rocksdb/advanced_options.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_CompactionOptionsFIFO + * Class: org_forstdb_CompactionOptionsFIFO * Method: newCompactionOptionsFIFO * Signature: ()J */ -jlong Java_org_rocksdb_CompactionOptionsFIFO_newCompactionOptionsFIFO(JNIEnv*, +jlong Java_org_forstdb_CompactionOptionsFIFO_newCompactionOptionsFIFO(JNIEnv*, jclass) { const auto* opt = new ROCKSDB_NAMESPACE::CompactionOptionsFIFO(); return GET_CPLUSPLUS_POINTER(opt); } /* - * Class: org_rocksdb_CompactionOptionsFIFO + * Class: org_forstdb_CompactionOptionsFIFO * Method: setMaxTableFilesSize * Signature: (JJ)V */ -void Java_org_rocksdb_CompactionOptionsFIFO_setMaxTableFilesSize( +void Java_org_forstdb_CompactionOptionsFIFO_setMaxTableFilesSize( JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { auto* opt = reinterpret_cast(jhandle); @@ -36,11 +36,11 @@ void Java_org_rocksdb_CompactionOptionsFIFO_setMaxTableFilesSize( } /* - * Class: org_rocksdb_CompactionOptionsFIFO + * Class: org_forstdb_CompactionOptionsFIFO * Method: maxTableFilesSize * Signature: (J)J */ -jlong Java_org_rocksdb_CompactionOptionsFIFO_maxTableFilesSize(JNIEnv*, jobject, +jlong Java_org_forstdb_CompactionOptionsFIFO_maxTableFilesSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -48,11 +48,11 @@ jlong Java_org_rocksdb_CompactionOptionsFIFO_maxTableFilesSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionOptionsFIFO + * Class: org_forstdb_CompactionOptionsFIFO * Method: setAllowCompaction * Signature: (JZ)V */ -void Java_org_rocksdb_CompactionOptionsFIFO_setAllowCompaction( +void Java_org_forstdb_CompactionOptionsFIFO_setAllowCompaction( JNIEnv*, jobject, jlong jhandle, jboolean allow_compaction) { auto* opt = reinterpret_cast(jhandle); @@ -60,11 +60,11 @@ void Java_org_rocksdb_CompactionOptionsFIFO_setAllowCompaction( } /* - * Class: org_rocksdb_CompactionOptionsFIFO + * Class: org_forstdb_CompactionOptionsFIFO * Method: allowCompaction * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactionOptionsFIFO_allowCompaction(JNIEnv*, +jboolean Java_org_forstdb_CompactionOptionsFIFO_allowCompaction(JNIEnv*, jobject, jlong jhandle) { auto* opt = @@ -73,11 +73,11 @@ jboolean Java_org_rocksdb_CompactionOptionsFIFO_allowCompaction(JNIEnv*, } /* - * Class: org_rocksdb_CompactionOptionsFIFO + * Class: org_forstdb_CompactionOptionsFIFO * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompactionOptionsFIFO_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_CompactionOptionsFIFO_disposeInternal(JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast(jhandle); } diff --git a/java/rocksjni/compaction_options_universal.cc b/java/forstjni/compaction_options_universal.cc similarity index 71% rename from java/rocksjni/compaction_options_universal.cc rename to java/forstjni/compaction_options_universal.cc index 9fc6f3158..c2fb1d6d4 100644 --- a/java/rocksjni/compaction_options_universal.cc +++ b/java/forstjni/compaction_options_universal.cc @@ -8,28 +8,28 @@ #include -#include "include/org_rocksdb_CompactionOptionsUniversal.h" +#include "include/org_forstdb_CompactionOptionsUniversal.h" #include "rocksdb/advanced_options.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: newCompactionOptionsUniversal * Signature: ()J */ -jlong Java_org_rocksdb_CompactionOptionsUniversal_newCompactionOptionsUniversal( +jlong Java_org_forstdb_CompactionOptionsUniversal_newCompactionOptionsUniversal( JNIEnv*, jclass) { const auto* opt = new ROCKSDB_NAMESPACE::CompactionOptionsUniversal(); return GET_CPLUSPLUS_POINTER(opt); } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setSizeRatio * Signature: (JI)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setSizeRatio( +void Java_org_forstdb_CompactionOptionsUniversal_setSizeRatio( JNIEnv*, jobject, jlong jhandle, jint jsize_ratio) { auto* opt = reinterpret_cast(jhandle); @@ -37,11 +37,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setSizeRatio( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: sizeRatio * Signature: (J)I */ -jint Java_org_rocksdb_CompactionOptionsUniversal_sizeRatio(JNIEnv*, jobject, +jint Java_org_forstdb_CompactionOptionsUniversal_sizeRatio(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -49,11 +49,11 @@ jint Java_org_rocksdb_CompactionOptionsUniversal_sizeRatio(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setMinMergeWidth * Signature: (JI)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setMinMergeWidth( +void Java_org_forstdb_CompactionOptionsUniversal_setMinMergeWidth( JNIEnv*, jobject, jlong jhandle, jint jmin_merge_width) { auto* opt = reinterpret_cast(jhandle); @@ -61,11 +61,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setMinMergeWidth( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: minMergeWidth * Signature: (J)I */ -jint Java_org_rocksdb_CompactionOptionsUniversal_minMergeWidth(JNIEnv*, jobject, +jint Java_org_forstdb_CompactionOptionsUniversal_minMergeWidth(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -73,11 +73,11 @@ jint Java_org_rocksdb_CompactionOptionsUniversal_minMergeWidth(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setMaxMergeWidth * Signature: (JI)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setMaxMergeWidth( +void Java_org_forstdb_CompactionOptionsUniversal_setMaxMergeWidth( JNIEnv*, jobject, jlong jhandle, jint jmax_merge_width) { auto* opt = reinterpret_cast(jhandle); @@ -85,11 +85,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setMaxMergeWidth( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: maxMergeWidth * Signature: (J)I */ -jint Java_org_rocksdb_CompactionOptionsUniversal_maxMergeWidth(JNIEnv*, jobject, +jint Java_org_forstdb_CompactionOptionsUniversal_maxMergeWidth(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -97,11 +97,11 @@ jint Java_org_rocksdb_CompactionOptionsUniversal_maxMergeWidth(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setMaxSizeAmplificationPercent * Signature: (JI)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setMaxSizeAmplificationPercent( +void Java_org_forstdb_CompactionOptionsUniversal_setMaxSizeAmplificationPercent( JNIEnv*, jobject, jlong jhandle, jint jmax_size_amplification_percent) { auto* opt = reinterpret_cast(jhandle); @@ -110,11 +110,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setMaxSizeAmplificationPercent( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: maxSizeAmplificationPercent * Signature: (J)I */ -jint Java_org_rocksdb_CompactionOptionsUniversal_maxSizeAmplificationPercent( +jint Java_org_forstdb_CompactionOptionsUniversal_maxSizeAmplificationPercent( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -122,11 +122,11 @@ jint Java_org_rocksdb_CompactionOptionsUniversal_maxSizeAmplificationPercent( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setCompressionSizePercent * Signature: (JI)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setCompressionSizePercent( +void Java_org_forstdb_CompactionOptionsUniversal_setCompressionSizePercent( JNIEnv*, jobject, jlong jhandle, jint jcompression_size_percent) { auto* opt = reinterpret_cast(jhandle); @@ -135,11 +135,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setCompressionSizePercent( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: compressionSizePercent * Signature: (J)I */ -jint Java_org_rocksdb_CompactionOptionsUniversal_compressionSizePercent( +jint Java_org_forstdb_CompactionOptionsUniversal_compressionSizePercent( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -147,11 +147,11 @@ jint Java_org_rocksdb_CompactionOptionsUniversal_compressionSizePercent( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setStopStyle * Signature: (JB)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setStopStyle( +void Java_org_forstdb_CompactionOptionsUniversal_setStopStyle( JNIEnv*, jobject, jlong jhandle, jbyte jstop_style_value) { auto* opt = reinterpret_cast(jhandle); @@ -161,11 +161,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setStopStyle( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: stopStyle * Signature: (J)B */ -jbyte Java_org_rocksdb_CompactionOptionsUniversal_stopStyle(JNIEnv*, jobject, +jbyte Java_org_forstdb_CompactionOptionsUniversal_stopStyle(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -174,11 +174,11 @@ jbyte Java_org_rocksdb_CompactionOptionsUniversal_stopStyle(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: setAllowTrivialMove * Signature: (JZ)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_setAllowTrivialMove( +void Java_org_forstdb_CompactionOptionsUniversal_setAllowTrivialMove( JNIEnv*, jobject, jlong jhandle, jboolean jallow_trivial_move) { auto* opt = reinterpret_cast(jhandle); @@ -186,11 +186,11 @@ void Java_org_rocksdb_CompactionOptionsUniversal_setAllowTrivialMove( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: allowTrivialMove * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompactionOptionsUniversal_allowTrivialMove( +jboolean Java_org_forstdb_CompactionOptionsUniversal_allowTrivialMove( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -198,11 +198,11 @@ jboolean Java_org_rocksdb_CompactionOptionsUniversal_allowTrivialMove( } /* - * Class: org_rocksdb_CompactionOptionsUniversal + * Class: org_forstdb_CompactionOptionsUniversal * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompactionOptionsUniversal_disposeInternal( +void Java_org_forstdb_CompactionOptionsUniversal_disposeInternal( JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast( jhandle); diff --git a/java/rocksjni/comparator.cc b/java/forstjni/comparator.cc similarity index 71% rename from java/rocksjni/comparator.cc rename to java/forstjni/comparator.cc index 11279c4ce..0e69990cf 100644 --- a/java/rocksjni/comparator.cc +++ b/java/forstjni/comparator.cc @@ -13,18 +13,18 @@ #include #include -#include "include/org_rocksdb_AbstractComparator.h" -#include "include/org_rocksdb_NativeComparatorWrapper.h" -#include "rocksjni/comparatorjnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_AbstractComparator.h" +#include "include/org_forstdb_NativeComparatorWrapper.h" +#include "forstjni/comparatorjnicallback.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_AbstractComparator + * Class: org_forstdb_AbstractComparator * Method: createNewComparator * Signature: (J)J */ -jlong Java_org_rocksdb_AbstractComparator_createNewComparator( +jlong Java_org_forstdb_AbstractComparator_createNewComparator( JNIEnv* env, jobject jcomparator, jlong copt_handle) { auto* copt = reinterpret_cast( @@ -35,11 +35,11 @@ jlong Java_org_rocksdb_AbstractComparator_createNewComparator( } /* - * Class: org_rocksdb_AbstractComparator + * Class: org_forstdb_AbstractComparator * Method: usingDirectBuffers * Signature: (J)Z */ -jboolean Java_org_rocksdb_AbstractComparator_usingDirectBuffers(JNIEnv*, +jboolean Java_org_forstdb_AbstractComparator_usingDirectBuffers(JNIEnv*, jobject, jlong jhandle) { auto* c = @@ -48,11 +48,11 @@ jboolean Java_org_rocksdb_AbstractComparator_usingDirectBuffers(JNIEnv*, } /* - * Class: org_rocksdb_NativeComparatorWrapper + * Class: org_forstdb_NativeComparatorWrapper * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_NativeComparatorWrapper_disposeInternal( +void Java_org_forstdb_NativeComparatorWrapper_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jcomparator_handle) { auto* comparator = reinterpret_cast(jcomparator_handle); diff --git a/java/rocksjni/comparatorjnicallback.cc b/java/forstjni/comparatorjnicallback.cc similarity index 99% rename from java/rocksjni/comparatorjnicallback.cc rename to java/forstjni/comparatorjnicallback.cc index d354b40b8..775399223 100644 --- a/java/rocksjni/comparatorjnicallback.cc +++ b/java/forstjni/comparatorjnicallback.cc @@ -6,9 +6,9 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Comparator. -#include "rocksjni/comparatorjnicallback.h" +#include "forstjni/comparatorjnicallback.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { ComparatorJniCallback::ComparatorJniCallback( diff --git a/java/rocksjni/comparatorjnicallback.h b/java/forstjni/comparatorjnicallback.h similarity index 99% rename from java/rocksjni/comparatorjnicallback.h rename to java/forstjni/comparatorjnicallback.h index 034c0d5d7..671d2a3a8 100644 --- a/java/rocksjni/comparatorjnicallback.h +++ b/java/forstjni/comparatorjnicallback.h @@ -17,7 +17,7 @@ #include "port/port.h" #include "rocksdb/comparator.h" #include "rocksdb/slice.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" #include "util/thread_local.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/compression_options.cc b/java/forstjni/compression_options.cc similarity index 73% rename from java/rocksjni/compression_options.cc rename to java/forstjni/compression_options.cc index 53f240560..702dcb8de 100644 --- a/java/rocksjni/compression_options.cc +++ b/java/forstjni/compression_options.cc @@ -8,27 +8,27 @@ #include -#include "include/org_rocksdb_CompressionOptions.h" +#include "include/org_forstdb_CompressionOptions.h" #include "rocksdb/advanced_options.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: newCompressionOptions * Signature: ()J */ -jlong Java_org_rocksdb_CompressionOptions_newCompressionOptions(JNIEnv*, +jlong Java_org_forstdb_CompressionOptions_newCompressionOptions(JNIEnv*, jclass) { const auto* opt = new ROCKSDB_NAMESPACE::CompressionOptions(); return GET_CPLUSPLUS_POINTER(opt); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setWindowBits * Signature: (JI)V */ -void Java_org_rocksdb_CompressionOptions_setWindowBits(JNIEnv*, jobject, +void Java_org_forstdb_CompressionOptions_setWindowBits(JNIEnv*, jobject, jlong jhandle, jint jwindow_bits) { auto* opt = reinterpret_cast(jhandle); @@ -36,44 +36,44 @@ void Java_org_rocksdb_CompressionOptions_setWindowBits(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: windowBits * Signature: (J)I */ -jint Java_org_rocksdb_CompressionOptions_windowBits(JNIEnv*, jobject, +jint Java_org_forstdb_CompressionOptions_windowBits(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->window_bits); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setLevel * Signature: (JI)V */ -void Java_org_rocksdb_CompressionOptions_setLevel(JNIEnv*, jobject, +void Java_org_forstdb_CompressionOptions_setLevel(JNIEnv*, jobject, jlong jhandle, jint jlevel) { auto* opt = reinterpret_cast(jhandle); opt->level = static_cast(jlevel); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: level * Signature: (J)I */ -jint Java_org_rocksdb_CompressionOptions_level(JNIEnv*, jobject, +jint Java_org_forstdb_CompressionOptions_level(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->level); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setStrategy * Signature: (JI)V */ -void Java_org_rocksdb_CompressionOptions_setStrategy(JNIEnv*, jobject, +void Java_org_forstdb_CompressionOptions_setStrategy(JNIEnv*, jobject, jlong jhandle, jint jstrategy) { auto* opt = reinterpret_cast(jhandle); @@ -81,22 +81,22 @@ void Java_org_rocksdb_CompressionOptions_setStrategy(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: strategy * Signature: (J)I */ -jint Java_org_rocksdb_CompressionOptions_strategy(JNIEnv*, jobject, +jint Java_org_forstdb_CompressionOptions_strategy(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->strategy); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setMaxDictBytes * Signature: (JI)V */ -void Java_org_rocksdb_CompressionOptions_setMaxDictBytes(JNIEnv*, jobject, +void Java_org_forstdb_CompressionOptions_setMaxDictBytes(JNIEnv*, jobject, jlong jhandle, jint jmax_dict_bytes) { auto* opt = reinterpret_cast(jhandle); @@ -104,77 +104,77 @@ void Java_org_rocksdb_CompressionOptions_setMaxDictBytes(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: maxDictBytes * Signature: (J)I */ -jint Java_org_rocksdb_CompressionOptions_maxDictBytes(JNIEnv*, jobject, +jint Java_org_forstdb_CompressionOptions_maxDictBytes(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_dict_bytes); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setZstdMaxTrainBytes * Signature: (JI)V */ -void Java_org_rocksdb_CompressionOptions_setZstdMaxTrainBytes( +void Java_org_forstdb_CompressionOptions_setZstdMaxTrainBytes( JNIEnv*, jobject, jlong jhandle, jint jzstd_max_train_bytes) { auto* opt = reinterpret_cast(jhandle); opt->zstd_max_train_bytes = static_cast(jzstd_max_train_bytes); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: zstdMaxTrainBytes * Signature: (J)I */ -jint Java_org_rocksdb_CompressionOptions_zstdMaxTrainBytes(JNIEnv*, jobject, +jint Java_org_forstdb_CompressionOptions_zstdMaxTrainBytes(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->zstd_max_train_bytes); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setMaxDictBufferBytes * Signature: (JJ)V */ -void Java_org_rocksdb_CompressionOptions_setMaxDictBufferBytes( +void Java_org_forstdb_CompressionOptions_setMaxDictBufferBytes( JNIEnv*, jobject, jlong jhandle, jlong jmax_dict_buffer_bytes) { auto* opt = reinterpret_cast(jhandle); opt->max_dict_buffer_bytes = static_cast(jmax_dict_buffer_bytes); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: maxDictBufferBytes * Signature: (J)J */ -jlong Java_org_rocksdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject, +jlong Java_org_forstdb_CompressionOptions_maxDictBufferBytes(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_dict_buffer_bytes); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setZstdMaxTrainBytes * Signature: (JZ)V */ -void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer( +void Java_org_forstdb_CompressionOptions_setUseZstdDictTrainer( JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) { auto* opt = reinterpret_cast(jhandle); opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE; } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: zstdMaxTrainBytes * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*, +jboolean Java_org_forstdb_CompressionOptions_useZstdDictTrainer(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -182,11 +182,11 @@ jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*, } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: setEnabled * Signature: (JZ)V */ -void Java_org_rocksdb_CompressionOptions_setEnabled(JNIEnv*, jobject, +void Java_org_forstdb_CompressionOptions_setEnabled(JNIEnv*, jobject, jlong jhandle, jboolean jenabled) { auto* opt = reinterpret_cast(jhandle); @@ -194,21 +194,21 @@ void Java_org_rocksdb_CompressionOptions_setEnabled(JNIEnv*, jobject, } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: enabled * Signature: (J)Z */ -jboolean Java_org_rocksdb_CompressionOptions_enabled(JNIEnv*, jobject, +jboolean Java_org_forstdb_CompressionOptions_enabled(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enabled); } /* - * Class: org_rocksdb_CompressionOptions + * Class: org_forstdb_CompressionOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_CompressionOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_CompressionOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast(jhandle); } diff --git a/java/rocksjni/concurrent_task_limiter.cc b/java/forstjni/concurrent_task_limiter.cc similarity index 75% rename from java/rocksjni/concurrent_task_limiter.cc rename to java/forstjni/concurrent_task_limiter.cc index 0b0b2d271..0c9b08c27 100644 --- a/java/rocksjni/concurrent_task_limiter.cc +++ b/java/forstjni/concurrent_task_limiter.cc @@ -11,16 +11,16 @@ #include #include -#include "include/org_rocksdb_ConcurrentTaskLimiterImpl.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_ConcurrentTaskLimiterImpl.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl + * Class: org_forstdb_ConcurrentTaskLimiterImpl * Method: newConcurrentTaskLimiterImpl0 * Signature: (Ljava/lang/String;I)J */ -jlong Java_org_rocksdb_ConcurrentTaskLimiterImpl_newConcurrentTaskLimiterImpl0( +jlong Java_org_forstdb_ConcurrentTaskLimiterImpl_newConcurrentTaskLimiterImpl0( JNIEnv* env, jclass, jstring jname, jint limit) { jboolean has_exception = JNI_FALSE; std::string name = @@ -36,11 +36,11 @@ jlong Java_org_rocksdb_ConcurrentTaskLimiterImpl_newConcurrentTaskLimiterImpl0( } /* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl + * Class: org_forstdb_ConcurrentTaskLimiterImpl * Method: name * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_ConcurrentTaskLimiterImpl_name(JNIEnv* env, jclass, +jstring Java_org_forstdb_ConcurrentTaskLimiterImpl_name(JNIEnv* env, jclass, jlong handle) { const auto& limiter = *reinterpret_cast< std::shared_ptr*>(handle); @@ -48,11 +48,11 @@ jstring Java_org_rocksdb_ConcurrentTaskLimiterImpl_name(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl + * Class: org_forstdb_ConcurrentTaskLimiterImpl * Method: setMaxOutstandingTask * Signature: (JI)V */ -void Java_org_rocksdb_ConcurrentTaskLimiterImpl_setMaxOutstandingTask( +void Java_org_forstdb_ConcurrentTaskLimiterImpl_setMaxOutstandingTask( JNIEnv*, jclass, jlong handle, jint max_outstanding_task) { const auto& limiter = *reinterpret_cast< std::shared_ptr*>(handle); @@ -60,11 +60,11 @@ void Java_org_rocksdb_ConcurrentTaskLimiterImpl_setMaxOutstandingTask( } /* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl + * Class: org_forstdb_ConcurrentTaskLimiterImpl * Method: resetMaxOutstandingTask * Signature: (J)V */ -void Java_org_rocksdb_ConcurrentTaskLimiterImpl_resetMaxOutstandingTask( +void Java_org_forstdb_ConcurrentTaskLimiterImpl_resetMaxOutstandingTask( JNIEnv*, jclass, jlong handle) { const auto& limiter = *reinterpret_cast< std::shared_ptr*>(handle); @@ -72,11 +72,11 @@ void Java_org_rocksdb_ConcurrentTaskLimiterImpl_resetMaxOutstandingTask( } /* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl + * Class: org_forstdb_ConcurrentTaskLimiterImpl * Method: outstandingTask * Signature: (J)I */ -jint Java_org_rocksdb_ConcurrentTaskLimiterImpl_outstandingTask(JNIEnv*, jclass, +jint Java_org_forstdb_ConcurrentTaskLimiterImpl_outstandingTask(JNIEnv*, jclass, jlong handle) { const auto& limiter = *reinterpret_cast< std::shared_ptr*>(handle); @@ -84,11 +84,11 @@ jint Java_org_rocksdb_ConcurrentTaskLimiterImpl_outstandingTask(JNIEnv*, jclass, } /* - * Class: org_rocksdb_ConcurrentTaskLimiterImpl + * Class: org_forstdb_ConcurrentTaskLimiterImpl * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ConcurrentTaskLimiterImpl_disposeInternal(JNIEnv*, +void Java_org_forstdb_ConcurrentTaskLimiterImpl_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* ptr = reinterpret_cast< diff --git a/java/rocksjni/config_options.cc b/java/forstjni/config_options.cc similarity index 76% rename from java/rocksjni/config_options.cc rename to java/forstjni/config_options.cc index 55a9cbb66..dd11ab813 100644 --- a/java/rocksjni/config_options.cc +++ b/java/forstjni/config_options.cc @@ -9,17 +9,17 @@ #include -#include "include/org_rocksdb_ConfigOptions.h" +#include "include/org_forstdb_ConfigOptions.h" #include "rocksdb/convenience.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ConfigOptions_disposeInternal(JNIEnv *, jobject, +void Java_org_forstdb_ConfigOptions_disposeInternal(JNIEnv *, jobject, jlong jhandle) { auto *co = reinterpret_cast(jhandle); assert(co != nullptr); @@ -27,21 +27,21 @@ void Java_org_rocksdb_ConfigOptions_disposeInternal(JNIEnv *, jobject, } /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: newConfigOptions * Signature: ()J */ -jlong Java_org_rocksdb_ConfigOptions_newConfigOptions(JNIEnv *, jclass) { +jlong Java_org_forstdb_ConfigOptions_newConfigOptions(JNIEnv *, jclass) { auto *cfg_opt = new ROCKSDB_NAMESPACE::ConfigOptions(); return GET_CPLUSPLUS_POINTER(cfg_opt); } /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: setEnv * Signature: (JJ;)V */ -void Java_org_rocksdb_ConfigOptions_setEnv(JNIEnv *, jclass, jlong handle, +void Java_org_forstdb_ConfigOptions_setEnv(JNIEnv *, jclass, jlong handle, jlong rocksdb_env_handle) { auto *cfg_opt = reinterpret_cast(handle); auto *rocksdb_env = @@ -50,11 +50,11 @@ void Java_org_rocksdb_ConfigOptions_setEnv(JNIEnv *, jclass, jlong handle, } /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: setDelimiter * Signature: (JLjava/lang/String;)V */ -void Java_org_rocksdb_ConfigOptions_setDelimiter(JNIEnv *env, jclass, +void Java_org_forstdb_ConfigOptions_setDelimiter(JNIEnv *env, jclass, jlong handle, jstring s) { auto *cfg_opt = reinterpret_cast(handle); const char *delim = env->GetStringUTFChars(s, nullptr); @@ -67,11 +67,11 @@ void Java_org_rocksdb_ConfigOptions_setDelimiter(JNIEnv *env, jclass, } /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: setIgnoreUnknownOptions * Signature: (JZ)V */ -void Java_org_rocksdb_ConfigOptions_setIgnoreUnknownOptions(JNIEnv *, jclass, +void Java_org_forstdb_ConfigOptions_setIgnoreUnknownOptions(JNIEnv *, jclass, jlong handle, jboolean b) { auto *cfg_opt = reinterpret_cast(handle); @@ -79,11 +79,11 @@ void Java_org_rocksdb_ConfigOptions_setIgnoreUnknownOptions(JNIEnv *, jclass, } /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: setInputStringsEscaped * Signature: (JZ)V */ -void Java_org_rocksdb_ConfigOptions_setInputStringsEscaped(JNIEnv *, jclass, +void Java_org_forstdb_ConfigOptions_setInputStringsEscaped(JNIEnv *, jclass, jlong handle, jboolean b) { auto *cfg_opt = reinterpret_cast(handle); @@ -91,11 +91,11 @@ void Java_org_rocksdb_ConfigOptions_setInputStringsEscaped(JNIEnv *, jclass, } /* - * Class: org_rocksdb_ConfigOptions + * Class: org_forstdb_ConfigOptions * Method: setSanityLevel * Signature: (JI)V */ -void Java_org_rocksdb_ConfigOptions_setSanityLevel(JNIEnv *, jclass, +void Java_org_forstdb_ConfigOptions_setSanityLevel(JNIEnv *, jclass, jlong handle, jbyte level) { auto *cfg_opt = reinterpret_cast(handle); cfg_opt->sanity_level = diff --git a/java/rocksjni/cplusplus_to_java_convert.h b/java/forstjni/cplusplus_to_java_convert.h similarity index 100% rename from java/rocksjni/cplusplus_to_java_convert.h rename to java/forstjni/cplusplus_to_java_convert.h diff --git a/java/rocksjni/env.cc b/java/forstjni/env.cc similarity index 78% rename from java/rocksjni/env.cc rename to java/forstjni/env.cc index bb739fe2b..bde4ed574 100644 --- a/java/rocksjni/env.cc +++ b/java/forstjni/env.cc @@ -12,28 +12,28 @@ #include -#include "include/org_rocksdb_Env.h" -#include "include/org_rocksdb_RocksEnv.h" -#include "include/org_rocksdb_RocksMemEnv.h" -#include "include/org_rocksdb_TimedEnv.h" +#include "include/org_forstdb_Env.h" +#include "include/org_forstdb_RocksEnv.h" +#include "include/org_forstdb_RocksMemEnv.h" +#include "include/org_forstdb_TimedEnv.h" #include "portal.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: getDefaultEnvInternal * Signature: ()J */ -jlong Java_org_rocksdb_Env_getDefaultEnvInternal(JNIEnv*, jclass) { +jlong Java_org_forstdb_Env_getDefaultEnvInternal(JNIEnv*, jclass) { return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::Env::Default()); } /* - * Class: org_rocksdb_RocksEnv + * Class: org_forstdb_RocksEnv * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RocksEnv_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_RocksEnv_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); @@ -41,11 +41,11 @@ void Java_org_rocksdb_RocksEnv_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: setBackgroundThreads * Signature: (JIB)V */ -void Java_org_rocksdb_Env_setBackgroundThreads(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Env_setBackgroundThreads(JNIEnv*, jobject, jlong jhandle, jint jnum, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); @@ -55,11 +55,11 @@ void Java_org_rocksdb_Env_setBackgroundThreads(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: getBackgroundThreads * Signature: (JB)I */ -jint Java_org_rocksdb_Env_getBackgroundThreads(JNIEnv*, jobject, jlong jhandle, +jint Java_org_forstdb_Env_getBackgroundThreads(JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); const int num = rocks_env->GetBackgroundThreads( @@ -68,11 +68,11 @@ jint Java_org_rocksdb_Env_getBackgroundThreads(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: getThreadPoolQueueLen * Signature: (JB)I */ -jint Java_org_rocksdb_Env_getThreadPoolQueueLen(JNIEnv*, jobject, jlong jhandle, +jint Java_org_forstdb_Env_getThreadPoolQueueLen(JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); const int queue_len = rocks_env->GetThreadPoolQueueLen( @@ -81,11 +81,11 @@ jint Java_org_rocksdb_Env_getThreadPoolQueueLen(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: incBackgroundThreadsIfNeeded * Signature: (JIB)V */ -void Java_org_rocksdb_Env_incBackgroundThreadsIfNeeded(JNIEnv*, jobject, +void Java_org_forstdb_Env_incBackgroundThreadsIfNeeded(JNIEnv*, jobject, jlong jhandle, jint jnum, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); @@ -95,11 +95,11 @@ void Java_org_rocksdb_Env_incBackgroundThreadsIfNeeded(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: lowerThreadPoolIOPriority * Signature: (JB)V */ -void Java_org_rocksdb_Env_lowerThreadPoolIOPriority(JNIEnv*, jobject, +void Java_org_forstdb_Env_lowerThreadPoolIOPriority(JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); @@ -108,11 +108,11 @@ void Java_org_rocksdb_Env_lowerThreadPoolIOPriority(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: lowerThreadPoolCPUPriority * Signature: (JB)V */ -void Java_org_rocksdb_Env_lowerThreadPoolCPUPriority(JNIEnv*, jobject, +void Java_org_forstdb_Env_lowerThreadPoolCPUPriority(JNIEnv*, jobject, jlong jhandle, jbyte jpriority_value) { auto* rocks_env = reinterpret_cast(jhandle); @@ -121,11 +121,11 @@ void Java_org_rocksdb_Env_lowerThreadPoolCPUPriority(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Env + * Class: org_forstdb_Env * Method: getThreadList * Signature: (J)[Lorg/rocksdb/ThreadStatus; */ -jobjectArray Java_org_rocksdb_Env_getThreadList(JNIEnv* env, jobject, +jobjectArray Java_org_forstdb_Env_getThreadList(JNIEnv* env, jobject, jlong jhandle) { auto* rocks_env = reinterpret_cast(jhandle); std::vector thread_status; @@ -159,22 +159,22 @@ jobjectArray Java_org_rocksdb_Env_getThreadList(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksMemEnv + * Class: org_forstdb_RocksMemEnv * Method: createMemEnv * Signature: (J)J */ -jlong Java_org_rocksdb_RocksMemEnv_createMemEnv(JNIEnv*, jclass, +jlong Java_org_forstdb_RocksMemEnv_createMemEnv(JNIEnv*, jclass, jlong jbase_env_handle) { auto* base_env = reinterpret_cast(jbase_env_handle); return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::NewMemEnv(base_env)); } /* - * Class: org_rocksdb_RocksMemEnv + * Class: org_forstdb_RocksMemEnv * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RocksMemEnv_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_RocksMemEnv_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); @@ -182,22 +182,22 @@ void Java_org_rocksdb_RocksMemEnv_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_TimedEnv + * Class: org_forstdb_TimedEnv * Method: createTimedEnv * Signature: (J)J */ -jlong Java_org_rocksdb_TimedEnv_createTimedEnv(JNIEnv*, jclass, +jlong Java_org_forstdb_TimedEnv_createTimedEnv(JNIEnv*, jclass, jlong jbase_env_handle) { auto* base_env = reinterpret_cast(jbase_env_handle); return GET_CPLUSPLUS_POINTER(ROCKSDB_NAMESPACE::NewTimedEnv(base_env)); } /* - * Class: org_rocksdb_TimedEnv + * Class: org_forstdb_TimedEnv * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_TimedEnv_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_TimedEnv_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* e = reinterpret_cast(jhandle); assert(e != nullptr); diff --git a/java/rocksjni/env_flink.cc b/java/forstjni/env_flink.cc similarity index 87% rename from java/rocksjni/env_flink.cc rename to java/forstjni/env_flink.cc index f6d4b44ca..c3fee7690 100644 --- a/java/rocksjni/env_flink.cc +++ b/java/forstjni/env_flink.cc @@ -20,17 +20,17 @@ #include -#include +#include "include/org_forstdb_FlinkEnv.h" -#include "java/rocksjni/portal.h" +#include "java/forstjni/portal.h" #include "rocksdb/env.h" /* - * Class: org_rocksdb_FlinkEnv + * Class: org_forstdb_FlinkEnv * Method: createFlinkEnv * Signature: (Ljava/lang/String;)J */ -jlong Java_org_rocksdb_FlinkEnv_createFlinkEnv(JNIEnv* env, jclass, +jlong Java_org_forstdb_FlinkEnv_createFlinkEnv(JNIEnv* env, jclass, jstring base_path) { jboolean has_exception = JNI_FALSE; auto path = @@ -51,11 +51,11 @@ jlong Java_org_rocksdb_FlinkEnv_createFlinkEnv(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_FlinkEnv + * Class: org_forstdb_FlinkEnv * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_FlinkEnv_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_FlinkEnv_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* handle = reinterpret_cast(jhandle); assert(handle != nullptr); diff --git a/java/rocksjni/env_flink_test_suite.cc b/java/forstjni/env_flink_test_suite.cc similarity index 84% rename from java/rocksjni/env_flink_test_suite.cc rename to java/forstjni/env_flink_test_suite.cc index 5e66ca746..529f95018 100644 --- a/java/rocksjni/env_flink_test_suite.cc +++ b/java/forstjni/env_flink_test_suite.cc @@ -20,15 +20,15 @@ #include -#include "include/org_rocksdb_EnvFlinkTestSuite.h" -#include "java/rocksjni/portal.h" +#include "include/org_forstdb_EnvFlinkTestSuite.h" +#include "java/forstjni/portal.h" /* - * Class: org_rocksdb_EnvFlinkTestSuite + * Class: org_forstdb_EnvFlinkTestSuite * Method: buildNativeObject * Signature: (Ljava/lang/String;)J */ -jlong Java_org_rocksdb_EnvFlinkTestSuite_buildNativeObject(JNIEnv* env, jobject, +jlong Java_org_forstdb_EnvFlinkTestSuite_buildNativeObject(JNIEnv* env, jobject, jstring basePath) { jboolean has_exception = JNI_FALSE; auto path = @@ -43,11 +43,11 @@ jlong Java_org_rocksdb_EnvFlinkTestSuite_buildNativeObject(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_EnvFlinkTestSuite + * Class: org_forstdb_EnvFlinkTestSuite * Method: runAllTestSuites * Signature: (J)V */ -JNIEXPORT void JNICALL Java_org_rocksdb_EnvFlinkTestSuite_runAllTestSuites( +JNIEXPORT void JNICALL Java_org_forstdb_EnvFlinkTestSuite_runAllTestSuites( JNIEnv* jniEnv, jobject, jlong objectHandle) { auto env_flink_test_suites = reinterpret_cast(objectHandle); @@ -61,11 +61,11 @@ JNIEXPORT void JNICALL Java_org_rocksdb_EnvFlinkTestSuite_runAllTestSuites( } /* - * Class: org_rocksdb_EnvFlinkTestSuite + * Class: org_forstdb_EnvFlinkTestSuite * Method: disposeInternal * Signature: (J)V */ -JNIEXPORT void JNICALL Java_org_rocksdb_EnvFlinkTestSuite_disposeInternal( +JNIEXPORT void JNICALL Java_org_forstdb_EnvFlinkTestSuite_disposeInternal( JNIEnv*, jobject, jlong objectHandle) { auto test_suites = reinterpret_cast(objectHandle); diff --git a/java/rocksjni/env_options.cc b/java/forstjni/env_options.cc similarity index 72% rename from java/rocksjni/env_options.cc rename to java/forstjni/env_options.cc index 3237e2775..a0d6b1158 100644 --- a/java/rocksjni/env_options.cc +++ b/java/forstjni/env_options.cc @@ -9,9 +9,9 @@ #include -#include "include/org_rocksdb_EnvOptions.h" +#include "include/org_forstdb_EnvOptions.h" #include "rocksdb/env.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" #define ENV_OPTIONS_SET_BOOL(_jhandle, _opt) \ reinterpret_cast(_jhandle)->_opt = \ @@ -29,21 +29,21 @@ reinterpret_cast(_jhandle)->_opt /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: newEnvOptions * Signature: ()J */ -jlong Java_org_rocksdb_EnvOptions_newEnvOptions__(JNIEnv *, jclass) { +jlong Java_org_forstdb_EnvOptions_newEnvOptions__(JNIEnv *, jclass) { auto *env_opt = new ROCKSDB_NAMESPACE::EnvOptions(); return GET_CPLUSPLUS_POINTER(env_opt); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: newEnvOptions * Signature: (J)J */ -jlong Java_org_rocksdb_EnvOptions_newEnvOptions__J(JNIEnv *, jclass, +jlong Java_org_forstdb_EnvOptions_newEnvOptions__J(JNIEnv *, jclass, jlong jdboptions_handle) { auto *db_options = reinterpret_cast(jdboptions_handle); @@ -52,11 +52,11 @@ jlong Java_org_rocksdb_EnvOptions_newEnvOptions__J(JNIEnv *, jclass, } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_EnvOptions_disposeInternal(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_disposeInternal(JNIEnv *, jobject, jlong jhandle) { auto *eo = reinterpret_cast(jhandle); assert(eo != nullptr); @@ -64,237 +64,237 @@ void Java_org_rocksdb_EnvOptions_disposeInternal(JNIEnv *, jobject, } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setUseMmapReads * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setUseMmapReads(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setUseMmapReads(JNIEnv *, jobject, jlong jhandle, jboolean use_mmap_reads) { ENV_OPTIONS_SET_BOOL(jhandle, use_mmap_reads); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: useMmapReads * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_useMmapReads(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_useMmapReads(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_mmap_reads); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setUseMmapWrites * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setUseMmapWrites(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setUseMmapWrites(JNIEnv *, jobject, jlong jhandle, jboolean use_mmap_writes) { ENV_OPTIONS_SET_BOOL(jhandle, use_mmap_writes); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: useMmapWrites * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_useMmapWrites(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_useMmapWrites(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_mmap_writes); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setUseDirectReads * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setUseDirectReads(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setUseDirectReads(JNIEnv *, jobject, jlong jhandle, jboolean use_direct_reads) { ENV_OPTIONS_SET_BOOL(jhandle, use_direct_reads); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: useDirectReads * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_useDirectReads(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_useDirectReads(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_direct_reads); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setUseDirectWrites * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setUseDirectWrites( +void Java_org_forstdb_EnvOptions_setUseDirectWrites( JNIEnv *, jobject, jlong jhandle, jboolean use_direct_writes) { ENV_OPTIONS_SET_BOOL(jhandle, use_direct_writes); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: useDirectWrites * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_useDirectWrites(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_useDirectWrites(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, use_direct_writes); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setAllowFallocate * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setAllowFallocate(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setAllowFallocate(JNIEnv *, jobject, jlong jhandle, jboolean allow_fallocate) { ENV_OPTIONS_SET_BOOL(jhandle, allow_fallocate); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: allowFallocate * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_allowFallocate(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_allowFallocate(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, allow_fallocate); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setSetFdCloexec * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setSetFdCloexec(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setSetFdCloexec(JNIEnv *, jobject, jlong jhandle, jboolean set_fd_cloexec) { ENV_OPTIONS_SET_BOOL(jhandle, set_fd_cloexec); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setFdCloexec * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_setFdCloexec(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_setFdCloexec(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, set_fd_cloexec); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setBytesPerSync * Signature: (JJ)V */ -void Java_org_rocksdb_EnvOptions_setBytesPerSync(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setBytesPerSync(JNIEnv *, jobject, jlong jhandle, jlong bytes_per_sync) { ENV_OPTIONS_SET_UINT64_T(jhandle, bytes_per_sync); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: bytesPerSync * Signature: (J)J */ -jlong Java_org_rocksdb_EnvOptions_bytesPerSync(JNIEnv *, jobject, +jlong Java_org_forstdb_EnvOptions_bytesPerSync(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, bytes_per_sync); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setFallocateWithKeepSize * Signature: (JZ)V */ -void Java_org_rocksdb_EnvOptions_setFallocateWithKeepSize( +void Java_org_forstdb_EnvOptions_setFallocateWithKeepSize( JNIEnv *, jobject, jlong jhandle, jboolean fallocate_with_keep_size) { ENV_OPTIONS_SET_BOOL(jhandle, fallocate_with_keep_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: fallocateWithKeepSize * Signature: (J)Z */ -jboolean Java_org_rocksdb_EnvOptions_fallocateWithKeepSize(JNIEnv *, jobject, +jboolean Java_org_forstdb_EnvOptions_fallocateWithKeepSize(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, fallocate_with_keep_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setCompactionReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_EnvOptions_setCompactionReadaheadSize( +void Java_org_forstdb_EnvOptions_setCompactionReadaheadSize( JNIEnv *, jobject, jlong jhandle, jlong compaction_readahead_size) { ENV_OPTIONS_SET_SIZE_T(jhandle, compaction_readahead_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: compactionReadaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_EnvOptions_compactionReadaheadSize(JNIEnv *, jobject, +jlong Java_org_forstdb_EnvOptions_compactionReadaheadSize(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, compaction_readahead_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setRandomAccessMaxBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_EnvOptions_setRandomAccessMaxBufferSize( +void Java_org_forstdb_EnvOptions_setRandomAccessMaxBufferSize( JNIEnv *, jobject, jlong jhandle, jlong random_access_max_buffer_size) { ENV_OPTIONS_SET_SIZE_T(jhandle, random_access_max_buffer_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: randomAccessMaxBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_EnvOptions_randomAccessMaxBufferSize(JNIEnv *, jobject, +jlong Java_org_forstdb_EnvOptions_randomAccessMaxBufferSize(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, random_access_max_buffer_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setWritableFileMaxBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_EnvOptions_setWritableFileMaxBufferSize( +void Java_org_forstdb_EnvOptions_setWritableFileMaxBufferSize( JNIEnv *, jobject, jlong jhandle, jlong writable_file_max_buffer_size) { ENV_OPTIONS_SET_SIZE_T(jhandle, writable_file_max_buffer_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: writableFileMaxBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_EnvOptions_writableFileMaxBufferSize(JNIEnv *, jobject, +jlong Java_org_forstdb_EnvOptions_writableFileMaxBufferSize(JNIEnv *, jobject, jlong jhandle) { return ENV_OPTIONS_GET(jhandle, writable_file_max_buffer_size); } /* - * Class: org_rocksdb_EnvOptions + * Class: org_forstdb_EnvOptions * Method: setRateLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_EnvOptions_setRateLimiter(JNIEnv *, jobject, +void Java_org_forstdb_EnvOptions_setRateLimiter(JNIEnv *, jobject, jlong jhandle, jlong rl_handle) { auto *sptr_rate_limiter = diff --git a/java/rocksjni/event_listener.cc b/java/forstjni/event_listener.cc similarity index 74% rename from java/rocksjni/event_listener.cc rename to java/forstjni/event_listener.cc index 965932c9c..2f73b8b01 100644 --- a/java/rocksjni/event_listener.cc +++ b/java/forstjni/event_listener.cc @@ -10,17 +10,17 @@ #include -#include "include/org_rocksdb_AbstractEventListener.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/event_listener_jnicallback.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_AbstractEventListener.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/event_listener_jnicallback.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_AbstractEventListener + * Class: org_forstdb_AbstractEventListener * Method: createNewEventListener * Signature: (J)J */ -jlong Java_org_rocksdb_AbstractEventListener_createNewEventListener( +jlong Java_org_forstdb_AbstractEventListener_createNewEventListener( JNIEnv* env, jobject jobj, jlong jenabled_event_callback_values) { auto enabled_event_callbacks = ROCKSDB_NAMESPACE::EnabledEventCallbackJni::toCppEnabledEventCallbacks( @@ -33,11 +33,11 @@ jlong Java_org_rocksdb_AbstractEventListener_createNewEventListener( } /* - * Class: org_rocksdb_AbstractEventListener + * Class: org_forstdb_AbstractEventListener * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_AbstractEventListener_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_AbstractEventListener_disposeInternal(JNIEnv*, jobject, jlong jhandle) { delete reinterpret_cast*>( jhandle); diff --git a/java/rocksjni/event_listener_jnicallback.cc b/java/forstjni/event_listener_jnicallback.cc similarity index 99% rename from java/rocksjni/event_listener_jnicallback.cc rename to java/forstjni/event_listener_jnicallback.cc index 342d938b4..deb8d65de 100644 --- a/java/rocksjni/event_listener_jnicallback.cc +++ b/java/forstjni/event_listener_jnicallback.cc @@ -6,9 +6,9 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::EventListener. -#include "rocksjni/event_listener_jnicallback.h" +#include "forstjni/event_listener_jnicallback.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { EventListenerJniCallback::EventListenerJniCallback( diff --git a/java/rocksjni/event_listener_jnicallback.h b/java/forstjni/event_listener_jnicallback.h similarity index 99% rename from java/rocksjni/event_listener_jnicallback.h rename to java/forstjni/event_listener_jnicallback.h index f4a235a23..564210d37 100644 --- a/java/rocksjni/event_listener_jnicallback.h +++ b/java/forstjni/event_listener_jnicallback.h @@ -15,7 +15,7 @@ #include #include "rocksdb/listener.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/export_import_files_metadatajni.cc b/java/forstjni/export_import_files_metadatajni.cc similarity index 67% rename from java/rocksjni/export_import_files_metadatajni.cc rename to java/forstjni/export_import_files_metadatajni.cc index 213977ac2..547b49b4c 100644 --- a/java/rocksjni/export_import_files_metadatajni.cc +++ b/java/forstjni/export_import_files_metadatajni.cc @@ -4,16 +4,16 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#include "include/org_rocksdb_ExportImportFilesMetaData.h" -#include "include/org_rocksdb_LiveFileMetaData.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_ExportImportFilesMetaData.h" +#include "include/org_forstdb_LiveFileMetaData.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_ExportImportFilesMetaData + * Class: org_forstdb_ExportImportFilesMetaData * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ExportImportFilesMetaData_disposeInternal( +void Java_org_forstdb_ExportImportFilesMetaData_disposeInternal( JNIEnv* /*env*/, jobject /*jopt*/, jlong jhandle) { auto* metadata = reinterpret_cast(jhandle); diff --git a/java/rocksjni/filter.cc b/java/forstjni/filter.cc similarity index 76% rename from java/rocksjni/filter.cc rename to java/forstjni/filter.cc index ed22016d2..d07584dfc 100644 --- a/java/rocksjni/filter.cc +++ b/java/forstjni/filter.cc @@ -12,18 +12,18 @@ #include -#include "include/org_rocksdb_BloomFilter.h" -#include "include/org_rocksdb_Filter.h" +#include "include/org_forstdb_BloomFilter.h" +#include "include/org_forstdb_Filter.h" #include "rocksdb/filter_policy.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_BloomFilter + * Class: org_forstdb_BloomFilter * Method: createBloomFilter * Signature: (DZ)J */ -jlong Java_org_rocksdb_BloomFilter_createNewBloomFilter(JNIEnv* /*env*/, +jlong Java_org_forstdb_BloomFilter_createNewBloomFilter(JNIEnv* /*env*/, jclass /*jcls*/, jdouble bits_per_key) { auto* sptr_filter = @@ -33,11 +33,11 @@ jlong Java_org_rocksdb_BloomFilter_createNewBloomFilter(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Filter + * Class: org_forstdb_Filter * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_Filter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_Filter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( diff --git a/java/rocksjni/flink_compactionfilterjni.cc b/java/forstjni/flink_compactionfilterjni.cc similarity index 94% rename from java/rocksjni/flink_compactionfilterjni.cc rename to java/forstjni/flink_compactionfilterjni.cc index cd3e88027..793c56698 100644 --- a/java/rocksjni/flink_compactionfilterjni.cc +++ b/java/forstjni/flink_compactionfilterjni.cc @@ -6,10 +6,10 @@ #include #include -#include "include/org_rocksdb_FlinkCompactionFilter.h" +#include "include/org_forstdb_FlinkCompactionFilter.h" #include "loggerjnicallback.h" #include "portal.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" #include "utilities/flink/flink_compaction_filter.h" class JniCallbackBase : public ROCKSDB_NAMESPACE::JniCallback { @@ -159,11 +159,11 @@ static ROCKSDB_NAMESPACE::flink::FlinkCompactionFilter:: } /*x - * Class: org_rocksdb_FlinkCompactionFilter + * Class: org_forstdb_FlinkCompactionFilter * Method: createNewFlinkCompactionFilterConfigHolder * Signature: ()J */ -jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfigHolder( +jlong Java_org_forstdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfigHolder( JNIEnv* /* env */, jclass /* jcls */) { return reinterpret_cast( new std::shared_ptr< @@ -172,11 +172,11 @@ jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfi } /* - * Class: org_rocksdb_FlinkCompactionFilter + * Class: org_forstdb_FlinkCompactionFilter * Method: disposeFlinkCompactionFilterConfigHolder * Signature: (J)V */ -void Java_org_rocksdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHolder( +void Java_org_forstdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHolder( JNIEnv* /* env */, jclass /* jcls */, jlong handle) { auto* config_holder = reinterpret_cast*>(handle); @@ -184,11 +184,11 @@ void Java_org_rocksdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHo } /* - * Class: org_rocksdb_FlinkCompactionFilter + * Class: org_forstdb_FlinkCompactionFilter * Method: createNewFlinkCompactionFilter0 * Signature: (JJJ)J */ -jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( +jlong Java_org_forstdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( JNIEnv* env, jclass /* jcls */, jlong config_holder_handle, jobject jtime_provider, jlong logger_handle) { auto config_holder = @@ -212,11 +212,11 @@ jlong Java_org_rocksdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( } /* - * Class: org_rocksdb_FlinkCompactionFilter + * Class: org_forstdb_FlinkCompactionFilter * Method: configureFlinkCompactionFilter * Signature: (JIIJJILorg/rocksdb/FlinkCompactionFilter$ListElementFilter;)Z */ -jboolean Java_org_rocksdb_FlinkCompactionFilter_configureFlinkCompactionFilter( +jboolean Java_org_forstdb_FlinkCompactionFilter_configureFlinkCompactionFilter( JNIEnv* env, jclass /* jcls */, jlong handle, jint ji_state_type, jint ji_timestamp_offset, jlong jl_ttl_milli, jlong jquery_time_after_num_entries, jint ji_list_elem_len, diff --git a/java/rocksjni/hyper_clock_cache.cc b/java/forstjni/hyper_clock_cache.cc similarity index 78% rename from java/rocksjni/hyper_clock_cache.cc rename to java/forstjni/hyper_clock_cache.cc index 782f123a5..9fdab09f7 100644 --- a/java/rocksjni/hyper_clock_cache.cc +++ b/java/forstjni/hyper_clock_cache.cc @@ -9,15 +9,15 @@ #include #include "cache/clock_cache.h" -#include "include/org_rocksdb_HyperClockCache.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "include/org_forstdb_HyperClockCache.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_HyperClockCache + * Class: org_forstdb_HyperClockCache * Method: newHyperClockCache * Signature: (JJIZ)J */ -jlong Java_org_rocksdb_HyperClockCache_newHyperClockCache( +jlong Java_org_forstdb_HyperClockCache_newHyperClockCache( JNIEnv*, jclass, jlong capacity, jlong estimatedEntryCharge, jint numShardBits, jboolean strictCapacityLimit) { ROCKSDB_NAMESPACE::HyperClockCacheOptions cacheOptions = @@ -30,11 +30,11 @@ jlong Java_org_rocksdb_HyperClockCache_newHyperClockCache( } /* - * Class: org_rocksdb_HyperClockCache + * Class: org_forstdb_HyperClockCache * Method: disposeInternalJni * Signature: (J)V */ -void Java_org_rocksdb_HyperClockCache_disposeInternalJni(JNIEnv*, jclass, +void Java_org_forstdb_HyperClockCache_disposeInternalJni(JNIEnv*, jclass, jlong jhandle) { auto* hyper_clock_cache = reinterpret_cast*>(jhandle); diff --git a/java/rocksjni/import_column_family_options.cc b/java/forstjni/import_column_family_options.cc similarity index 71% rename from java/rocksjni/import_column_family_options.cc rename to java/forstjni/import_column_family_options.cc index 1a9bded51..3f642871e 100644 --- a/java/rocksjni/import_column_family_options.cc +++ b/java/forstjni/import_column_family_options.cc @@ -6,16 +6,16 @@ #include -#include "include/org_rocksdb_ImportColumnFamilyOptions.h" +#include "include/org_forstdb_ImportColumnFamilyOptions.h" #include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_ImportColumnFamilyOptions + * Class: org_forstdb_ImportColumnFamilyOptions * Method: newImportColumnFamilyOptions * Signature: ()J */ -jlong Java_org_rocksdb_ImportColumnFamilyOptions_newImportColumnFamilyOptions( +jlong Java_org_forstdb_ImportColumnFamilyOptions_newImportColumnFamilyOptions( JNIEnv *, jclass) { ROCKSDB_NAMESPACE::ImportColumnFamilyOptions *opts = new ROCKSDB_NAMESPACE::ImportColumnFamilyOptions(); @@ -23,11 +23,11 @@ jlong Java_org_rocksdb_ImportColumnFamilyOptions_newImportColumnFamilyOptions( } /* - * Class: org_rocksdb_ImportColumnFamilyOptions + * Class: org_forstdb_ImportColumnFamilyOptions * Method: setMoveFiles * Signature: (JZ)V */ -void Java_org_rocksdb_ImportColumnFamilyOptions_setMoveFiles( +void Java_org_forstdb_ImportColumnFamilyOptions_setMoveFiles( JNIEnv *, jobject, jlong jhandle, jboolean jmove_files) { auto *options = reinterpret_cast(jhandle); @@ -35,11 +35,11 @@ void Java_org_rocksdb_ImportColumnFamilyOptions_setMoveFiles( } /* - * Class: org_rocksdb_ImportColumnFamilyOptions + * Class: org_forstdb_ImportColumnFamilyOptions * Method: moveFiles * Signature: (J)Z */ -jboolean Java_org_rocksdb_ImportColumnFamilyOptions_moveFiles(JNIEnv *, jobject, +jboolean Java_org_forstdb_ImportColumnFamilyOptions_moveFiles(JNIEnv *, jobject, jlong jhandle) { auto *options = reinterpret_cast(jhandle); @@ -47,11 +47,11 @@ jboolean Java_org_rocksdb_ImportColumnFamilyOptions_moveFiles(JNIEnv *, jobject, } /* - * Class: org_rocksdb_ImportColumnFamilyOptions + * Class: org_forstdb_ImportColumnFamilyOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ImportColumnFamilyOptions_disposeInternal(JNIEnv *, +void Java_org_forstdb_ImportColumnFamilyOptions_disposeInternal(JNIEnv *, jobject, jlong jhandle) { delete reinterpret_cast( diff --git a/java/rocksjni/ingest_external_file_options.cc b/java/forstjni/ingest_external_file_options.cc similarity index 73% rename from java/rocksjni/ingest_external_file_options.cc rename to java/forstjni/ingest_external_file_options.cc index 052cf3325..8b87c33ab 100644 --- a/java/rocksjni/ingest_external_file_options.cc +++ b/java/forstjni/ingest_external_file_options.cc @@ -8,27 +8,27 @@ #include -#include "include/org_rocksdb_IngestExternalFileOptions.h" +#include "include/org_forstdb_IngestExternalFileOptions.h" #include "rocksdb/options.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: newIngestExternalFileOptions * Signature: ()J */ -jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__( +jlong Java_org_forstdb_IngestExternalFileOptions_newIngestExternalFileOptions__( JNIEnv*, jclass) { auto* options = new ROCKSDB_NAMESPACE::IngestExternalFileOptions(); return GET_CPLUSPLUS_POINTER(options); } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: newIngestExternalFileOptions * Signature: (ZZZZ)J */ -jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__ZZZZ( +jlong Java_org_forstdb_IngestExternalFileOptions_newIngestExternalFileOptions__ZZZZ( JNIEnv*, jclass, jboolean jmove_files, jboolean jsnapshot_consistency, jboolean jallow_global_seqno, jboolean jallow_blocking_flush) { auto* options = new ROCKSDB_NAMESPACE::IngestExternalFileOptions(); @@ -40,11 +40,11 @@ jlong Java_org_rocksdb_IngestExternalFileOptions_newIngestExternalFileOptions__Z } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: moveFiles * Signature: (J)Z */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_moveFiles(JNIEnv*, jobject, +jboolean Java_org_forstdb_IngestExternalFileOptions_moveFiles(JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); @@ -52,11 +52,11 @@ jboolean Java_org_rocksdb_IngestExternalFileOptions_moveFiles(JNIEnv*, jobject, } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: setMoveFiles * Signature: (JZ)V */ -void Java_org_rocksdb_IngestExternalFileOptions_setMoveFiles( +void Java_org_forstdb_IngestExternalFileOptions_setMoveFiles( JNIEnv*, jobject, jlong jhandle, jboolean jmove_files) { auto* options = reinterpret_cast(jhandle); @@ -64,11 +64,11 @@ void Java_org_rocksdb_IngestExternalFileOptions_setMoveFiles( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: snapshotConsistency * Signature: (J)Z */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_snapshotConsistency( +jboolean Java_org_forstdb_IngestExternalFileOptions_snapshotConsistency( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); @@ -76,11 +76,11 @@ jboolean Java_org_rocksdb_IngestExternalFileOptions_snapshotConsistency( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: setSnapshotConsistency * Signature: (JZ)V */ -void Java_org_rocksdb_IngestExternalFileOptions_setSnapshotConsistency( +void Java_org_forstdb_IngestExternalFileOptions_setSnapshotConsistency( JNIEnv*, jobject, jlong jhandle, jboolean jsnapshot_consistency) { auto* options = reinterpret_cast(jhandle); @@ -88,11 +88,11 @@ void Java_org_rocksdb_IngestExternalFileOptions_setSnapshotConsistency( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: allowGlobalSeqNo * Signature: (J)Z */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_allowGlobalSeqNo( +jboolean Java_org_forstdb_IngestExternalFileOptions_allowGlobalSeqNo( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); @@ -100,11 +100,11 @@ jboolean Java_org_rocksdb_IngestExternalFileOptions_allowGlobalSeqNo( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: setAllowGlobalSeqNo * Signature: (JZ)V */ -void Java_org_rocksdb_IngestExternalFileOptions_setAllowGlobalSeqNo( +void Java_org_forstdb_IngestExternalFileOptions_setAllowGlobalSeqNo( JNIEnv*, jobject, jlong jhandle, jboolean jallow_global_seqno) { auto* options = reinterpret_cast(jhandle); @@ -112,11 +112,11 @@ void Java_org_rocksdb_IngestExternalFileOptions_setAllowGlobalSeqNo( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: allowBlockingFlush * Signature: (J)Z */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_allowBlockingFlush( +jboolean Java_org_forstdb_IngestExternalFileOptions_allowBlockingFlush( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); @@ -124,11 +124,11 @@ jboolean Java_org_rocksdb_IngestExternalFileOptions_allowBlockingFlush( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: setAllowBlockingFlush * Signature: (JZ)V */ -void Java_org_rocksdb_IngestExternalFileOptions_setAllowBlockingFlush( +void Java_org_forstdb_IngestExternalFileOptions_setAllowBlockingFlush( JNIEnv*, jobject, jlong jhandle, jboolean jallow_blocking_flush) { auto* options = reinterpret_cast(jhandle); @@ -136,11 +136,11 @@ void Java_org_rocksdb_IngestExternalFileOptions_setAllowBlockingFlush( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: ingestBehind * Signature: (J)Z */ -jboolean Java_org_rocksdb_IngestExternalFileOptions_ingestBehind( +jboolean Java_org_forstdb_IngestExternalFileOptions_ingestBehind( JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); @@ -148,11 +148,11 @@ jboolean Java_org_rocksdb_IngestExternalFileOptions_ingestBehind( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: setIngestBehind * Signature: (JZ)V */ -void Java_org_rocksdb_IngestExternalFileOptions_setIngestBehind( +void Java_org_forstdb_IngestExternalFileOptions_setIngestBehind( JNIEnv*, jobject, jlong jhandle, jboolean jingest_behind) { auto* options = reinterpret_cast(jhandle); @@ -160,12 +160,12 @@ void Java_org_rocksdb_IngestExternalFileOptions_setIngestBehind( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: writeGlobalSeqno * Signature: (J)Z */ JNIEXPORT jboolean JNICALL -Java_org_rocksdb_IngestExternalFileOptions_writeGlobalSeqno(JNIEnv*, jobject, +Java_org_forstdb_IngestExternalFileOptions_writeGlobalSeqno(JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); @@ -173,12 +173,12 @@ Java_org_rocksdb_IngestExternalFileOptions_writeGlobalSeqno(JNIEnv*, jobject, } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: setWriteGlobalSeqno * Signature: (JZ)V */ JNIEXPORT void JNICALL -Java_org_rocksdb_IngestExternalFileOptions_setWriteGlobalSeqno( +Java_org_forstdb_IngestExternalFileOptions_setWriteGlobalSeqno( JNIEnv*, jobject, jlong jhandle, jboolean jwrite_global_seqno) { auto* options = reinterpret_cast(jhandle); @@ -186,11 +186,11 @@ Java_org_rocksdb_IngestExternalFileOptions_setWriteGlobalSeqno( } /* - * Class: org_rocksdb_IngestExternalFileOptions + * Class: org_forstdb_IngestExternalFileOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_IngestExternalFileOptions_disposeInternal(JNIEnv*, +void Java_org_forstdb_IngestExternalFileOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* options = diff --git a/java/rocksjni/iterator.cc b/java/forstjni/iterator.cc similarity index 82% rename from java/rocksjni/iterator.cc rename to java/forstjni/iterator.cc index 3ddb9778b..c202e5b41 100644 --- a/java/rocksjni/iterator.cc +++ b/java/forstjni/iterator.cc @@ -14,15 +14,15 @@ #include -#include "include/org_rocksdb_RocksIterator.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_RocksIterator.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_RocksIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -31,64 +31,64 @@ void Java_org_rocksdb_RocksIterator_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: isValid0 * Signature: (J)Z */ -jboolean Java_org_rocksdb_RocksIterator_isValid0(JNIEnv* /*env*/, +jboolean Java_org_forstdb_RocksIterator_isValid0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle)->Valid(); } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seekToFirst0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_seekToFirst0(JNIEnv* /*env*/, +void Java_org_forstdb_RocksIterator_seekToFirst0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToFirst(); } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seekToLast0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_seekToLast0(JNIEnv* /*env*/, +void Java_org_forstdb_RocksIterator_seekToLast0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToLast(); } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: next0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: prev0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Prev(); } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: refresh0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->Refresh(); @@ -101,11 +101,11 @@ void Java_org_rocksdb_RocksIterator_refresh0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seek0 * Signature: (J[BI)V */ -void Java_org_rocksdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -120,11 +120,11 @@ void Java_org_rocksdb_RocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, * the Java wrapper extracts the byte[] and passes it here. * In this case, the buffer offset of the key may be non-zero. * - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seek0 * Signature: (J[BII)V */ -void Java_org_rocksdb_RocksIterator_seekByteArray0( +void Java_org_forstdb_RocksIterator_seekByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -136,11 +136,11 @@ void Java_org_rocksdb_RocksIterator_seekByteArray0( } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seekDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { @@ -153,11 +153,11 @@ void Java_org_rocksdb_RocksIterator_seekDirect0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seekForPrevDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_RocksIterator_seekForPrevDirect0( +void Java_org_forstdb_RocksIterator_seekForPrevDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -169,11 +169,11 @@ void Java_org_rocksdb_RocksIterator_seekForPrevDirect0( } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seekForPrev0 * Signature: (J[BI)V */ -void Java_org_rocksdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { @@ -189,11 +189,11 @@ void Java_org_rocksdb_RocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, * the Java wrapper extracts the byte[] and passes it here. * In this case, the buffer offset of the key may be non-zero. * - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: seek0 * Signature: (J[BII)V */ -void Java_org_rocksdb_RocksIterator_seekForPrevByteArray0( +void Java_org_forstdb_RocksIterator_seekForPrevByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -205,11 +205,11 @@ void Java_org_rocksdb_RocksIterator_seekForPrevByteArray0( } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: status0 * Signature: (J)V */ -void Java_org_rocksdb_RocksIterator_status0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_RocksIterator_status0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->status(); @@ -222,11 +222,11 @@ void Java_org_rocksdb_RocksIterator_status0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: key0 * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_RocksIterator_key0(JNIEnv* env, jobject /*jobj*/, +jbyteArray Java_org_forstdb_RocksIterator_key0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice key_slice = it->key(); @@ -243,11 +243,11 @@ jbyteArray Java_org_rocksdb_RocksIterator_key0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: keyDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ -jint Java_org_rocksdb_RocksIterator_keyDirect0(JNIEnv* env, jobject /*jobj*/, +jint Java_org_forstdb_RocksIterator_keyDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { @@ -261,11 +261,11 @@ jint Java_org_rocksdb_RocksIterator_keyDirect0(JNIEnv* env, jobject /*jobj*/, * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: keyByteArray0 * Signature: (J[BII)I */ -jint Java_org_rocksdb_RocksIterator_keyByteArray0(JNIEnv* env, jobject /*jobj*/, +jint Java_org_forstdb_RocksIterator_keyByteArray0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { @@ -281,11 +281,11 @@ jint Java_org_rocksdb_RocksIterator_keyByteArray0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: value0 * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, +jbyteArray Java_org_forstdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Slice value_slice = it->value(); @@ -303,11 +303,11 @@ jbyteArray Java_org_rocksdb_RocksIterator_value0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: valueDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ -jint Java_org_rocksdb_RocksIterator_valueDirect0(JNIEnv* env, jobject /*jobj*/, +jint Java_org_forstdb_RocksIterator_valueDirect0(JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { @@ -321,11 +321,11 @@ jint Java_org_rocksdb_RocksIterator_valueDirect0(JNIEnv* env, jobject /*jobj*/, * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_RocksIterator + * Class: org_forstdb_RocksIterator * Method: valueByteArray0 * Signature: (J[BII)I */ -jint Java_org_rocksdb_RocksIterator_valueByteArray0( +jint Java_org_forstdb_RocksIterator_valueByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jvalue_target, jint jvalue_off, jint jvalue_len) { auto* it = reinterpret_cast(handle); diff --git a/java/rocksjni/jni_perf_context.cc b/java/forstjni/jni_perf_context.cc similarity index 75% rename from java/rocksjni/jni_perf_context.cc rename to java/forstjni/jni_perf_context.cc index e0124fdaa..813a3aed7 100644 --- a/java/rocksjni/jni_perf_context.cc +++ b/java/forstjni/jni_perf_context.cc @@ -5,22 +5,22 @@ #include -#include "include/org_rocksdb_PerfContext.h" +#include "include/org_forstdb_PerfContext.h" #include "rocksdb/db.h" #include "rocksdb/perf_context.h" -void Java_org_rocksdb_PerfContext_reset(JNIEnv*, jobject, jlong jpc_handle) { +void Java_org_forstdb_PerfContext_reset(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); perf_context->Reset(); } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getUserKeyComparisonCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getUserKeyComparisonCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getUserKeyComparisonCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -28,11 +28,11 @@ jlong Java_org_rocksdb_PerfContext_getUserKeyComparisonCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockCacheHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockCacheHitCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockCacheHitCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -40,11 +40,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockCacheHitCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockReadCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockReadCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockReadCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -52,11 +52,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockReadCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockCacheIndexHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockCacheIndexHitCount( +jlong Java_org_forstdb_PerfContext_getBlockCacheIndexHitCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -64,11 +64,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockCacheIndexHitCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockCacheStandaloneHandleCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockCacheStandaloneHandleCount( +jlong Java_org_forstdb_PerfContext_getBlockCacheStandaloneHandleCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -76,11 +76,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockCacheStandaloneHandleCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockCacheRealHandleCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockCacheRealHandleCount( +jlong Java_org_forstdb_PerfContext_getBlockCacheRealHandleCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -88,11 +88,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockCacheRealHandleCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getIndexBlockReadCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getIndexBlockReadCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getIndexBlockReadCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -100,11 +100,11 @@ jlong Java_org_rocksdb_PerfContext_getIndexBlockReadCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockCacheFilterHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockCacheFilterHitCount( +jlong Java_org_forstdb_PerfContext_getBlockCacheFilterHitCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -112,11 +112,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockCacheFilterHitCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getFilterBlockReadCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getFilterBlockReadCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getFilterBlockReadCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -124,11 +124,11 @@ jlong Java_org_rocksdb_PerfContext_getFilterBlockReadCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getCompressionDictBlockReadCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getCompressionDictBlockReadCount( +jlong Java_org_forstdb_PerfContext_getCompressionDictBlockReadCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -136,11 +136,11 @@ jlong Java_org_rocksdb_PerfContext_getCompressionDictBlockReadCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockReadByte * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockReadByte(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockReadByte(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -148,18 +148,18 @@ jlong Java_org_rocksdb_PerfContext_getBlockReadByte(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockReadTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockReadTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockReadTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); return perf_context->block_read_time; } -jlong Java_org_rocksdb_PerfContext_getBlockReadCpuTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockReadCpuTime(JNIEnv*, jobject, jlong jpc_handler) { // reinterpret_cast(jcf_handle); ROCKSDB_NAMESPACE::PerfContext* perf_context = @@ -168,11 +168,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockReadCpuTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSecondaryCacheHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSecondaryCacheHitCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSecondaryCacheHitCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -180,11 +180,11 @@ jlong Java_org_rocksdb_PerfContext_getSecondaryCacheHitCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getCompressedSecCacheInsertRealCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheInsertRealCount( +jlong Java_org_forstdb_PerfContext_getCompressedSecCacheInsertRealCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -192,11 +192,11 @@ jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheInsertRealCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getCompressedSecCacheInsertDummyCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheInsertDummyCount( +jlong Java_org_forstdb_PerfContext_getCompressedSecCacheInsertDummyCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -204,11 +204,11 @@ jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheInsertDummyCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getCompressedSecCacheUncompressedBytes * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheUncompressedBytes( +jlong Java_org_forstdb_PerfContext_getCompressedSecCacheUncompressedBytes( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -216,11 +216,11 @@ jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheUncompressedBytes( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getCompressedSecCacheCompressedBytes * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheCompressedBytes( +jlong Java_org_forstdb_PerfContext_getCompressedSecCacheCompressedBytes( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -228,11 +228,11 @@ jlong Java_org_rocksdb_PerfContext_getCompressedSecCacheCompressedBytes( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockChecksumTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockChecksumTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockChecksumTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -240,11 +240,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockChecksumTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockDecompressTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockDecompressTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockDecompressTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -252,11 +252,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockDecompressTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getReadBytes * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getReadBytes(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getReadBytes(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -264,11 +264,11 @@ jlong Java_org_rocksdb_PerfContext_getReadBytes(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getMultigetReadBytes * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getMultigetReadBytes(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getMultigetReadBytes(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -276,11 +276,11 @@ jlong Java_org_rocksdb_PerfContext_getMultigetReadBytes(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getIterReadBytes * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getIterReadBytes(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getIterReadBytes(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -288,11 +288,11 @@ jlong Java_org_rocksdb_PerfContext_getIterReadBytes(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlobCacheHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlobCacheHitCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlobCacheHitCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -300,11 +300,11 @@ jlong Java_org_rocksdb_PerfContext_getBlobCacheHitCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlobReadCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlobReadCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlobReadCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -312,11 +312,11 @@ jlong Java_org_rocksdb_PerfContext_getBlobReadCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlobReadByte * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlobReadByte(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlobReadByte(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -324,11 +324,11 @@ jlong Java_org_rocksdb_PerfContext_getBlobReadByte(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlobReadTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlobReadTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlobReadTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -336,11 +336,11 @@ jlong Java_org_rocksdb_PerfContext_getBlobReadTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlobChecksumTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlobChecksumTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlobChecksumTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -348,11 +348,11 @@ jlong Java_org_rocksdb_PerfContext_getBlobChecksumTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlobDecompressTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlobDecompressTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlobDecompressTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -360,11 +360,11 @@ jlong Java_org_rocksdb_PerfContext_getBlobDecompressTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getInternal_key_skipped_count * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getInternalKeySkippedCount( +jlong Java_org_forstdb_PerfContext_getInternalKeySkippedCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -372,11 +372,11 @@ jlong Java_org_rocksdb_PerfContext_getInternalKeySkippedCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getInternalDeleteSkippedCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getInternalDeleteSkippedCount( +jlong Java_org_forstdb_PerfContext_getInternalDeleteSkippedCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -384,11 +384,11 @@ jlong Java_org_rocksdb_PerfContext_getInternalDeleteSkippedCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getInternalRecentSkippedCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getInternalRecentSkippedCount( +jlong Java_org_forstdb_PerfContext_getInternalRecentSkippedCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -396,11 +396,11 @@ jlong Java_org_rocksdb_PerfContext_getInternalRecentSkippedCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getInternalMergeCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getInternalMergeCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getInternalMergeCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -408,11 +408,11 @@ jlong Java_org_rocksdb_PerfContext_getInternalMergeCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getInternalMergePointLookupCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getInternalMergePointLookupCount( +jlong Java_org_forstdb_PerfContext_getInternalMergePointLookupCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -420,11 +420,11 @@ jlong Java_org_rocksdb_PerfContext_getInternalMergePointLookupCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getInternalRangeDelReseekCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getInternalRangeDelReseekCount( +jlong Java_org_forstdb_PerfContext_getInternalRangeDelReseekCount( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -432,11 +432,11 @@ jlong Java_org_rocksdb_PerfContext_getInternalRangeDelReseekCount( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSnapshotTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSnapshotTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSnapshotTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -444,11 +444,11 @@ jlong Java_org_rocksdb_PerfContext_getSnapshotTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getFromMemtableTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getFromMemtableTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getFromMemtableTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -456,11 +456,11 @@ jlong Java_org_rocksdb_PerfContext_getFromMemtableTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getFromMemtableCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getFromMemtableCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getFromMemtableCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -468,11 +468,11 @@ jlong Java_org_rocksdb_PerfContext_getFromMemtableCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getPostProcessTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getPostProcessTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getPostProcessTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -480,11 +480,11 @@ jlong Java_org_rocksdb_PerfContext_getPostProcessTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getFromOutputFilesTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getFromOutputFilesTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getFromOutputFilesTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -492,11 +492,11 @@ jlong Java_org_rocksdb_PerfContext_getFromOutputFilesTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekOnMemtableTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekOnMemtableTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekOnMemtableTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -504,11 +504,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekOnMemtableTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekOnMemtableCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekOnMemtableCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekOnMemtableCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -516,11 +516,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekOnMemtableCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getNextOnMemtableCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getNextOnMemtableCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getNextOnMemtableCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -528,11 +528,11 @@ jlong Java_org_rocksdb_PerfContext_getNextOnMemtableCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getPrevOnMemtableCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getPrevOnMemtableCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getPrevOnMemtableCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -540,11 +540,11 @@ jlong Java_org_rocksdb_PerfContext_getPrevOnMemtableCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekChildSeekTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekChildSeekTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekChildSeekTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -552,11 +552,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekChildSeekTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekChildSeekCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekChildSeekCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekChildSeekCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -564,11 +564,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekChildSeekCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekMinHeapTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekMinHeapTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekMinHeapTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -576,11 +576,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekMinHeapTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekMaxHeapTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekMaxHeapTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekMaxHeapTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -588,11 +588,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekMaxHeapTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getSeekInternalSeekTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getSeekInternalSeekTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getSeekInternalSeekTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -600,11 +600,11 @@ jlong Java_org_rocksdb_PerfContext_getSeekInternalSeekTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getFindNextUserEntryTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getFindNextUserEntryTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getFindNextUserEntryTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -612,11 +612,11 @@ jlong Java_org_rocksdb_PerfContext_getFindNextUserEntryTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getWriteWalTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getWriteWalTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getWriteWalTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -624,11 +624,11 @@ jlong Java_org_rocksdb_PerfContext_getWriteWalTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getWriteMemtableTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getWriteMemtableTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getWriteMemtableTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -636,11 +636,11 @@ jlong Java_org_rocksdb_PerfContext_getWriteMemtableTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getWriteDelayTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getWriteDelayTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getWriteDelayTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -648,11 +648,11 @@ jlong Java_org_rocksdb_PerfContext_getWriteDelayTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getWriteSchedulingFlushesCompactionsTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getWriteSchedulingFlushesCompactionsTime( +jlong Java_org_forstdb_PerfContext_getWriteSchedulingFlushesCompactionsTime( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -660,11 +660,11 @@ jlong Java_org_rocksdb_PerfContext_getWriteSchedulingFlushesCompactionsTime( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getWritePreAndPostProcessTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getWritePreAndPostProcessTime( +jlong Java_org_forstdb_PerfContext_getWritePreAndPostProcessTime( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -672,11 +672,11 @@ jlong Java_org_rocksdb_PerfContext_getWritePreAndPostProcessTime( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getWriteThreadWaitNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getWriteThreadWaitNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getWriteThreadWaitNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -684,11 +684,11 @@ jlong Java_org_rocksdb_PerfContext_getWriteThreadWaitNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getDbMutexLockNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getDbMutexLockNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getDbMutexLockNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -696,11 +696,11 @@ jlong Java_org_rocksdb_PerfContext_getDbMutexLockNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getDbConditionWaitNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getDbConditionWaitNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getDbConditionWaitNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -708,11 +708,11 @@ jlong Java_org_rocksdb_PerfContext_getDbConditionWaitNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getMergeOperatorTimeNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getMergeOperatorTimeNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getMergeOperatorTimeNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -720,11 +720,11 @@ jlong Java_org_rocksdb_PerfContext_getMergeOperatorTimeNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getReadIndexBlockNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getReadIndexBlockNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getReadIndexBlockNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -732,11 +732,11 @@ jlong Java_org_rocksdb_PerfContext_getReadIndexBlockNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getReadFilterBlockNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getReadFilterBlockNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getReadFilterBlockNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -744,11 +744,11 @@ jlong Java_org_rocksdb_PerfContext_getReadFilterBlockNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getNewTableBlockIterNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getNewTableBlockIterNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getNewTableBlockIterNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -756,11 +756,11 @@ jlong Java_org_rocksdb_PerfContext_getNewTableBlockIterNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getNewTableIteratorNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getNewTableIteratorNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getNewTableIteratorNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -768,11 +768,11 @@ jlong Java_org_rocksdb_PerfContext_getNewTableIteratorNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBlockSeekNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBlockSeekNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBlockSeekNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -780,11 +780,11 @@ jlong Java_org_rocksdb_PerfContext_getBlockSeekNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getFindTableNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getFindTableNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getFindTableNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -792,11 +792,11 @@ jlong Java_org_rocksdb_PerfContext_getFindTableNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBloomMemtableHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBloomMemtableHitCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBloomMemtableHitCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -804,11 +804,11 @@ jlong Java_org_rocksdb_PerfContext_getBloomMemtableHitCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBloomMemtableMissCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBloomMemtableMissCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBloomMemtableMissCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -816,11 +816,11 @@ jlong Java_org_rocksdb_PerfContext_getBloomMemtableMissCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBloomSstHitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBloomSstHitCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBloomSstHitCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -828,11 +828,11 @@ jlong Java_org_rocksdb_PerfContext_getBloomSstHitCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getBloomSstMissCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getBloomSstMissCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getBloomSstMissCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -840,11 +840,11 @@ jlong Java_org_rocksdb_PerfContext_getBloomSstMissCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getKeyLockWaitTime * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getKeyLockWaitTime(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getKeyLockWaitTime(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -852,11 +852,11 @@ jlong Java_org_rocksdb_PerfContext_getKeyLockWaitTime(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getKeyLockWaitCount * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getKeyLockWaitCount(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getKeyLockWaitCount(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -864,11 +864,11 @@ jlong Java_org_rocksdb_PerfContext_getKeyLockWaitCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvNewSequentialFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvNewSequentialFileNanos( +jlong Java_org_forstdb_PerfContext_getEnvNewSequentialFileNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -876,11 +876,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvNewSequentialFileNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvNewRandomAccessFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvNewRandomAccessFileNanos( +jlong Java_org_forstdb_PerfContext_getEnvNewRandomAccessFileNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -888,11 +888,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvNewRandomAccessFileNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvNewWritableFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvNewWritableFileNanos( +jlong Java_org_forstdb_PerfContext_getEnvNewWritableFileNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -900,11 +900,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvNewWritableFileNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvReuseWritableFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvReuseWritableFileNanos( +jlong Java_org_forstdb_PerfContext_getEnvReuseWritableFileNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -912,11 +912,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvReuseWritableFileNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvNewRandomRwFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvNewRandomRwFileNanos( +jlong Java_org_forstdb_PerfContext_getEnvNewRandomRwFileNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -924,11 +924,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvNewRandomRwFileNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvNewDirectoryNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvNewDirectoryNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvNewDirectoryNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -936,11 +936,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvNewDirectoryNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvFileExistsNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvFileExistsNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvFileExistsNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -948,11 +948,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvFileExistsNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvGetChildrenNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvGetChildrenNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvGetChildrenNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -960,11 +960,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvGetChildrenNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvGetChildrenFileAttributesNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvGetChildrenFileAttributesNanos( +jlong Java_org_forstdb_PerfContext_getEnvGetChildrenFileAttributesNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -972,11 +972,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvGetChildrenFileAttributesNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvDeleteFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvDeleteFileNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvDeleteFileNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -984,11 +984,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvDeleteFileNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvCreateDirNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvCreateDirNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvCreateDirNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -996,11 +996,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvCreateDirNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvCreateDirIfMissingNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvCreateDirIfMissingNanos( +jlong Java_org_forstdb_PerfContext_getEnvCreateDirIfMissingNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1008,11 +1008,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvCreateDirIfMissingNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvDeleteDirNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvDeleteDirNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvDeleteDirNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1020,11 +1020,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvDeleteDirNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvGetFileSizeNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvGetFileSizeNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvGetFileSizeNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1032,11 +1032,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvGetFileSizeNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvGetFileModificationTimeNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvGetFileModificationTimeNanos( +jlong Java_org_forstdb_PerfContext_getEnvGetFileModificationTimeNanos( JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1044,11 +1044,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvGetFileModificationTimeNanos( } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvRenameFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvRenameFileNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvRenameFileNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1056,11 +1056,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvRenameFileNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvLinkFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvLinkFileNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvLinkFileNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1068,11 +1068,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvLinkFileNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvLockFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvLockFileNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvLockFileNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1080,11 +1080,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvLockFileNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvUnlockFileNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvUnlockFileNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvUnlockFileNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1092,11 +1092,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvUnlockFileNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEnvNewLoggerNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEnvNewLoggerNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEnvNewLoggerNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1104,11 +1104,11 @@ jlong Java_org_rocksdb_PerfContext_getEnvNewLoggerNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getCpuNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getGetCpuNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getGetCpuNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1116,11 +1116,11 @@ jlong Java_org_rocksdb_PerfContext_getGetCpuNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getIterNextCpuNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getIterNextCpuNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getIterNextCpuNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1128,11 +1128,11 @@ jlong Java_org_rocksdb_PerfContext_getIterNextCpuNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getIterPrevCpuNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getIterPrevCpuNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getIterPrevCpuNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1140,11 +1140,11 @@ jlong Java_org_rocksdb_PerfContext_getIterPrevCpuNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getIterSeekCpuNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getIterSeekCpuNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getIterSeekCpuNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1152,11 +1152,11 @@ jlong Java_org_rocksdb_PerfContext_getIterSeekCpuNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getEncryptDataNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getEncryptDataNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getEncryptDataNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1164,11 +1164,11 @@ jlong Java_org_rocksdb_PerfContext_getEncryptDataNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getDecryptDataNanos * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getDecryptDataNanos(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getDecryptDataNanos(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); @@ -1176,11 +1176,11 @@ jlong Java_org_rocksdb_PerfContext_getDecryptDataNanos(JNIEnv*, jobject, } /* - * Class: org_rocksdb_PerfContext + * Class: org_forstdb_PerfContext * Method: getNumberAsyncSeek * Signature: (J)J */ -jlong Java_org_rocksdb_PerfContext_getNumberAsyncSeek(JNIEnv*, jobject, +jlong Java_org_forstdb_PerfContext_getNumberAsyncSeek(JNIEnv*, jobject, jlong jpc_handle) { ROCKSDB_NAMESPACE::PerfContext* perf_context = reinterpret_cast(jpc_handle); diff --git a/java/rocksjni/jnicallback.cc b/java/forstjni/jnicallback.cc similarity index 96% rename from java/rocksjni/jnicallback.cc rename to java/forstjni/jnicallback.cc index f2742cd88..51fe1f04c 100644 --- a/java/rocksjni/jnicallback.cc +++ b/java/forstjni/jnicallback.cc @@ -6,11 +6,11 @@ // This file implements the callback "bridge" between Java and C++ for // JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" #include -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { JniCallback::JniCallback(JNIEnv* env, jobject jcallback_obj) { diff --git a/java/rocksjni/jnicallback.h b/java/forstjni/jnicallback.h similarity index 100% rename from java/rocksjni/jnicallback.h rename to java/forstjni/jnicallback.h diff --git a/java/rocksjni/kv_helper.h b/java/forstjni/kv_helper.h similarity index 99% rename from java/rocksjni/kv_helper.h rename to java/forstjni/kv_helper.h index 0eb2c6eb0..4caffa16c 100644 --- a/java/rocksjni/kv_helper.h +++ b/java/forstjni/kv_helper.h @@ -18,7 +18,7 @@ #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/loggerjnicallback.cc b/java/forstjni/loggerjnicallback.cc similarity index 92% rename from java/rocksjni/loggerjnicallback.cc rename to java/forstjni/loggerjnicallback.cc index aa9f95cd4..82724e945 100644 --- a/java/rocksjni/loggerjnicallback.cc +++ b/java/forstjni/loggerjnicallback.cc @@ -6,14 +6,14 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Logger. -#include "rocksjni/loggerjnicallback.h" +#include "forstjni/loggerjnicallback.h" #include #include -#include "include/org_rocksdb_Logger.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_Logger.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { @@ -223,11 +223,11 @@ LoggerJniCallback::~LoggerJniCallback() { } // namespace ROCKSDB_NAMESPACE /* - * Class: org_rocksdb_Logger + * Class: org_forstdb_Logger * Method: createNewLoggerOptions * Signature: (J)J */ -jlong Java_org_rocksdb_Logger_createNewLoggerOptions(JNIEnv* env, jobject jobj, +jlong Java_org_forstdb_Logger_createNewLoggerOptions(JNIEnv* env, jobject jobj, jlong joptions) { auto* sptr_logger = new std::shared_ptr( new ROCKSDB_NAMESPACE::LoggerJniCallback(env, jobj)); @@ -240,11 +240,11 @@ jlong Java_org_rocksdb_Logger_createNewLoggerOptions(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_Logger + * Class: org_forstdb_Logger * Method: createNewLoggerDbOptions * Signature: (J)J */ -jlong Java_org_rocksdb_Logger_createNewLoggerDbOptions(JNIEnv* env, +jlong Java_org_forstdb_Logger_createNewLoggerDbOptions(JNIEnv* env, jobject jobj, jlong jdb_options) { auto* sptr_logger = new std::shared_ptr( @@ -259,11 +259,11 @@ jlong Java_org_rocksdb_Logger_createNewLoggerDbOptions(JNIEnv* env, } /* - * Class: org_rocksdb_Logger + * Class: org_forstdb_Logger * Method: setInfoLogLevel * Signature: (JB)V */ -void Java_org_rocksdb_Logger_setInfoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_Logger_setInfoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jbyte jlog_level) { auto* handle = reinterpret_cast*>( @@ -273,11 +273,11 @@ void Java_org_rocksdb_Logger_setInfoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_Logger + * Class: org_forstdb_Logger * Method: infoLogLevel * Signature: (J)B */ -jbyte Java_org_rocksdb_Logger_infoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, +jbyte Java_org_forstdb_Logger_infoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( @@ -286,11 +286,11 @@ jbyte Java_org_rocksdb_Logger_infoLogLevel(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_Logger + * Class: org_forstdb_Logger * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_Logger_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_Logger_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = reinterpret_cast*>( diff --git a/java/rocksjni/loggerjnicallback.h b/java/forstjni/loggerjnicallback.h similarity index 97% rename from java/rocksjni/loggerjnicallback.h rename to java/forstjni/loggerjnicallback.h index 57774988c..ec1393aa3 100644 --- a/java/rocksjni/loggerjnicallback.h +++ b/java/forstjni/loggerjnicallback.h @@ -16,7 +16,7 @@ #include "port/port.h" #include "rocksdb/env.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/lru_cache.cc b/java/forstjni/lru_cache.cc similarity index 78% rename from java/rocksjni/lru_cache.cc rename to java/forstjni/lru_cache.cc index 56dffa2f0..cc2bb1851 100644 --- a/java/rocksjni/lru_cache.cc +++ b/java/forstjni/lru_cache.cc @@ -10,15 +10,15 @@ #include -#include "include/org_rocksdb_LRUCache.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "include/org_forstdb_LRUCache.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_LRUCache + * Class: org_forstdb_LRUCache * Method: newLRUCache * Signature: (JIZD)J */ -jlong Java_org_rocksdb_LRUCache_newLRUCache(JNIEnv* /*env*/, jclass /*jcls*/, +jlong Java_org_forstdb_LRUCache_newLRUCache(JNIEnv* /*env*/, jclass /*jcls*/, jlong jcapacity, jint jnum_shard_bits, jboolean jstrict_capacity_limit, @@ -29,18 +29,18 @@ jlong Java_org_rocksdb_LRUCache_newLRUCache(JNIEnv* /*env*/, jclass /*jcls*/, static_cast(jcapacity), static_cast(jnum_shard_bits), static_cast(jstrict_capacity_limit), static_cast(jhigh_pri_pool_ratio), - nullptr /* memory_allocator */, rocksdb::kDefaultToAdaptiveMutex, - rocksdb::kDefaultCacheMetadataChargePolicy, + nullptr /* memory_allocator */, ROCKSDB_NAMESPACE::kDefaultToAdaptiveMutex, + ROCKSDB_NAMESPACE::kDefaultCacheMetadataChargePolicy, static_cast(jlow_pri_pool_ratio))); return GET_CPLUSPLUS_POINTER(sptr_lru_cache); } /* - * Class: org_rocksdb_LRUCache + * Class: org_forstdb_LRUCache * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_LRUCache_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_LRUCache_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_lru_cache = diff --git a/java/rocksjni/memory_util.cc b/java/forstjni/memory_util.cc similarity index 95% rename from java/rocksjni/memory_util.cc rename to java/forstjni/memory_util.cc index c87c4f403..9a40b6d75 100644 --- a/java/rocksjni/memory_util.cc +++ b/java/forstjni/memory_util.cc @@ -12,15 +12,15 @@ #include #include -#include "include/org_rocksdb_MemoryUtil.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_MemoryUtil.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_MemoryUtil + * Class: org_forstdb_MemoryUtil * Method: getApproximateMemoryUsageByType * Signature: ([J[J)Ljava/util/Map; */ -jobject Java_org_rocksdb_MemoryUtil_getApproximateMemoryUsageByType( +jobject Java_org_forstdb_MemoryUtil_getApproximateMemoryUsageByType( JNIEnv *env, jclass, jlongArray jdb_handles, jlongArray jcache_handles) { jboolean has_exception = JNI_FALSE; std::vector dbs = diff --git a/java/rocksjni/memtablejni.cc b/java/forstjni/memtablejni.cc similarity index 79% rename from java/rocksjni/memtablejni.cc rename to java/forstjni/memtablejni.cc index a4d02f354..59473c7c1 100644 --- a/java/rocksjni/memtablejni.cc +++ b/java/forstjni/memtablejni.cc @@ -5,20 +5,20 @@ // // This file implements the "bridge" between Java and C++ for MemTables. -#include "include/org_rocksdb_HashLinkedListMemTableConfig.h" -#include "include/org_rocksdb_HashSkipListMemTableConfig.h" -#include "include/org_rocksdb_SkipListMemTableConfig.h" -#include "include/org_rocksdb_VectorMemTableConfig.h" +#include "include/org_forstdb_HashLinkedListMemTableConfig.h" +#include "include/org_forstdb_HashSkipListMemTableConfig.h" +#include "include/org_forstdb_SkipListMemTableConfig.h" +#include "include/org_forstdb_VectorMemTableConfig.h" #include "rocksdb/memtablerep.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_HashSkipListMemTableConfig + * Class: org_forstdb_HashSkipListMemTableConfig * Method: newMemTableFactoryHandle * Signature: (JII)J */ -jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( +jlong Java_org_forstdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( JNIEnv* env, jobject /*jobj*/, jlong jbucket_count, jint jheight, jint jbranching_factor) { ROCKSDB_NAMESPACE::Status s = @@ -33,11 +33,11 @@ jlong Java_org_rocksdb_HashSkipListMemTableConfig_newMemTableFactoryHandle( } /* - * Class: org_rocksdb_HashLinkedListMemTableConfig + * Class: org_forstdb_HashLinkedListMemTableConfig * Method: newMemTableFactoryHandle * Signature: (JJIZI)J */ -jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( +jlong Java_org_forstdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( JNIEnv* env, jobject /*jobj*/, jlong jbucket_count, jlong jhuge_page_tlb_size, jint jbucket_entries_logging_threshold, jboolean jif_log_bucket_dist_when_flash, jint jthreshold_use_skiplist) { @@ -60,11 +60,11 @@ jlong Java_org_rocksdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle( } /* - * Class: org_rocksdb_VectorMemTableConfig + * Class: org_forstdb_VectorMemTableConfig * Method: newMemTableFactoryHandle * Signature: (J)J */ -jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle( +jlong Java_org_forstdb_VectorMemTableConfig_newMemTableFactoryHandle( JNIEnv* env, jobject /*jobj*/, jlong jreserved_size) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jreserved_size); @@ -77,11 +77,11 @@ jlong Java_org_rocksdb_VectorMemTableConfig_newMemTableFactoryHandle( } /* - * Class: org_rocksdb_SkipListMemTableConfig + * Class: org_forstdb_SkipListMemTableConfig * Method: newMemTableFactoryHandle0 * Signature: (J)J */ -jlong Java_org_rocksdb_SkipListMemTableConfig_newMemTableFactoryHandle0( +jlong Java_org_forstdb_SkipListMemTableConfig_newMemTableFactoryHandle0( JNIEnv* env, jobject /*jobj*/, jlong jlookahead) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jlookahead); diff --git a/java/rocksjni/merge_operator.cc b/java/forstjni/merge_operator.cc similarity index 80% rename from java/rocksjni/merge_operator.cc rename to java/forstjni/merge_operator.cc index ce3c5df56..e5da11fb9 100644 --- a/java/rocksjni/merge_operator.cc +++ b/java/forstjni/merge_operator.cc @@ -16,24 +16,24 @@ #include #include -#include "include/org_rocksdb_StringAppendOperator.h" -#include "include/org_rocksdb_UInt64AddOperator.h" +#include "include/org_forstdb_StringAppendOperator.h" +#include "include/org_forstdb_UInt64AddOperator.h" #include "rocksdb/db.h" #include "rocksdb/memtablerep.h" #include "rocksdb/options.h" #include "rocksdb/slice_transform.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" #include "utilities/merge_operators.h" /* - * Class: org_rocksdb_StringAppendOperator + * Class: org_forstdb_StringAppendOperator * Method: newSharedStringAppendOperator * Signature: (C)J */ -jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__C( +jlong Java_org_forstdb_StringAppendOperator_newSharedStringAppendOperator__C( JNIEnv* /*env*/, jclass /*jclazz*/, jchar jdelim) { auto* sptr_string_append_op = new std::shared_ptr( @@ -42,7 +42,7 @@ jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__C( return GET_CPLUSPLUS_POINTER(sptr_string_append_op); } -jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2( +jlong Java_org_forstdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2( JNIEnv* env, jclass /*jclass*/, jstring jdelim) { jboolean has_exception = JNI_FALSE; auto delim = @@ -57,11 +57,11 @@ jlong Java_org_rocksdb_StringAppendOperator_newSharedStringAppendOperator__Ljava } /* - * Class: org_rocksdb_StringAppendOperator + * Class: org_forstdb_StringAppendOperator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_StringAppendOperator_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_StringAppendOperator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_string_append_op = @@ -71,11 +71,11 @@ void Java_org_rocksdb_StringAppendOperator_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_UInt64AddOperator + * Class: org_forstdb_UInt64AddOperator * Method: newSharedUInt64AddOperator * Signature: ()J */ -jlong Java_org_rocksdb_UInt64AddOperator_newSharedUInt64AddOperator( +jlong Java_org_forstdb_UInt64AddOperator_newSharedUInt64AddOperator( JNIEnv* /*env*/, jclass /*jclazz*/) { auto* sptr_uint64_add_op = new std::shared_ptr( @@ -84,11 +84,11 @@ jlong Java_org_rocksdb_UInt64AddOperator_newSharedUInt64AddOperator( } /* - * Class: org_rocksdb_UInt64AddOperator + * Class: org_forstdb_UInt64AddOperator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_UInt64AddOperator_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_UInt64AddOperator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_uint64_add_op = diff --git a/java/rocksjni/native_comparator_wrapper_test.cc b/java/forstjni/native_comparator_wrapper_test.cc similarity index 83% rename from java/rocksjni/native_comparator_wrapper_test.cc rename to java/forstjni/native_comparator_wrapper_test.cc index ac33ca22d..708d6fd4c 100644 --- a/java/rocksjni/native_comparator_wrapper_test.cc +++ b/java/forstjni/native_comparator_wrapper_test.cc @@ -7,10 +7,10 @@ #include -#include "include/org_rocksdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h" +#include "include/org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h" #include "rocksdb/comparator.h" #include "rocksdb/slice.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" namespace ROCKSDB_NAMESPACE { @@ -33,11 +33,11 @@ class NativeComparatorWrapperTestStringComparator : public Comparator { } // namespace ROCKSDB_NAMESPACE /* - * Class: org_rocksdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper + * Class: org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper * Method: newStringComparator * Signature: ()J */ -jlong Java_org_rocksdb_NativeComparatorWrapperTest_00024NativeStringComparatorWrapper_newStringComparator( +jlong Java_org_forstdb_NativeComparatorWrapperTest_00024NativeStringComparatorWrapper_newStringComparator( JNIEnv* /*env*/, jobject /*jobj*/) { auto* comparator = new ROCKSDB_NAMESPACE::NativeComparatorWrapperTestStringComparator(); diff --git a/java/rocksjni/optimistic_transaction_db.cc b/java/forstjni/optimistic_transaction_db.cc similarity index 88% rename from java/rocksjni/optimistic_transaction_db.cc rename to java/forstjni/optimistic_transaction_db.cc index 238224f58..0e6fcf1c7 100644 --- a/java/rocksjni/optimistic_transaction_db.cc +++ b/java/forstjni/optimistic_transaction_db.cc @@ -10,18 +10,18 @@ #include -#include "include/org_rocksdb_OptimisticTransactionDB.h" +#include "include/org_forstdb_OptimisticTransactionDB.h" #include "rocksdb/options.h" #include "rocksdb/utilities/transaction.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: open * Signature: (JLjava/lang/String;)J */ -jlong Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2( +jlong Java_org_forstdb_OptimisticTransactionDB_open__JLjava_lang_String_2( JNIEnv* env, jclass, jlong joptions_handle, jstring jdb_path) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { @@ -46,12 +46,12 @@ jlong Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2( } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: open * Signature: (JLjava/lang/String;[[B[J)[J */ jlongArray -Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2_3_3B_3J( +Java_org_forstdb_OptimisticTransactionDB_open__JLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jdb_options_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options_handles) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); @@ -141,11 +141,11 @@ Java_org_rocksdb_OptimisticTransactionDB_open__JLjava_lang_String_2_3_3B_3J( } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_OptimisticTransactionDB_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_OptimisticTransactionDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); @@ -154,11 +154,11 @@ void Java_org_rocksdb_OptimisticTransactionDB_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: closeDatabase * Signature: (J)V */ -void Java_org_rocksdb_OptimisticTransactionDB_closeDatabase(JNIEnv* env, jclass, +void Java_org_forstdb_OptimisticTransactionDB_closeDatabase(JNIEnv* env, jclass, jlong jhandle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); @@ -168,11 +168,11 @@ void Java_org_rocksdb_OptimisticTransactionDB_closeDatabase(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: beginTransaction * Signature: (JJ)J */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJ( +jlong Java_org_forstdb_OptimisticTransactionDB_beginTransaction__JJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); @@ -184,11 +184,11 @@ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJ( } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: beginTransaction * Signature: (JJJ)J */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJJ( +jlong Java_org_forstdb_OptimisticTransactionDB_beginTransaction__JJJ( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jwrite_options_handle, jlong joptimistic_txn_options_handle) { auto* optimistic_txn_db = @@ -204,11 +204,11 @@ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction__JJJ( } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: beginTransaction_withOld * Signature: (JJJ)J */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJ( +jlong Java_org_forstdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jold_txn_handle) { auto* optimistic_txn_db = @@ -230,11 +230,11 @@ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJ( } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: beginTransaction_withOld * Signature: (JJJJ)J */ -jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJJ( +jlong Java_org_forstdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong joptimistic_txn_options_handle, jlong jold_txn_handle) { auto* optimistic_txn_db = @@ -258,11 +258,11 @@ jlong Java_org_rocksdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJJ( } /* - * Class: org_rocksdb_OptimisticTransactionDB + * Class: org_forstdb_OptimisticTransactionDB * Method: getBaseDB * Signature: (J)J */ -jlong Java_org_rocksdb_OptimisticTransactionDB_getBaseDB(JNIEnv*, jobject, +jlong Java_org_forstdb_OptimisticTransactionDB_getBaseDB(JNIEnv*, jobject, jlong jhandle) { auto* optimistic_txn_db = reinterpret_cast(jhandle); diff --git a/java/rocksjni/optimistic_transaction_options.cc b/java/forstjni/optimistic_transaction_options.cc similarity index 72% rename from java/rocksjni/optimistic_transaction_options.cc rename to java/forstjni/optimistic_transaction_options.cc index 501c6c4fb..feb5e0238 100644 --- a/java/rocksjni/optimistic_transaction_options.cc +++ b/java/forstjni/optimistic_transaction_options.cc @@ -8,17 +8,17 @@ #include -#include "include/org_rocksdb_OptimisticTransactionOptions.h" +#include "include/org_forstdb_OptimisticTransactionOptions.h" #include "rocksdb/comparator.h" #include "rocksdb/utilities/optimistic_transaction_db.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_OptimisticTransactionOptions + * Class: org_forstdb_OptimisticTransactionOptions * Method: newOptimisticTransactionOptions * Signature: ()J */ -jlong Java_org_rocksdb_OptimisticTransactionOptions_newOptimisticTransactionOptions( +jlong Java_org_forstdb_OptimisticTransactionOptions_newOptimisticTransactionOptions( JNIEnv* /*env*/, jclass /*jcls*/) { ROCKSDB_NAMESPACE::OptimisticTransactionOptions* opts = new ROCKSDB_NAMESPACE::OptimisticTransactionOptions(); @@ -26,11 +26,11 @@ jlong Java_org_rocksdb_OptimisticTransactionOptions_newOptimisticTransactionOpti } /* - * Class: org_rocksdb_OptimisticTransactionOptions + * Class: org_forstdb_OptimisticTransactionOptions * Method: isSetSnapshot * Signature: (J)Z */ -jboolean Java_org_rocksdb_OptimisticTransactionOptions_isSetSnapshot( +jboolean Java_org_forstdb_OptimisticTransactionOptions_isSetSnapshot( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast( @@ -39,11 +39,11 @@ jboolean Java_org_rocksdb_OptimisticTransactionOptions_isSetSnapshot( } /* - * Class: org_rocksdb_OptimisticTransactionOptions + * Class: org_forstdb_OptimisticTransactionOptions * Method: setSetSnapshot * Signature: (JZ)V */ -void Java_org_rocksdb_OptimisticTransactionOptions_setSetSnapshot( +void Java_org_forstdb_OptimisticTransactionOptions_setSetSnapshot( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jset_snapshot) { auto* opts = reinterpret_cast( @@ -52,11 +52,11 @@ void Java_org_rocksdb_OptimisticTransactionOptions_setSetSnapshot( } /* - * Class: org_rocksdb_OptimisticTransactionOptions + * Class: org_forstdb_OptimisticTransactionOptions * Method: setComparator * Signature: (JJ)V */ -void Java_org_rocksdb_OptimisticTransactionOptions_setComparator( +void Java_org_forstdb_OptimisticTransactionOptions_setComparator( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jcomparator_handle) { auto* opts = @@ -67,11 +67,11 @@ void Java_org_rocksdb_OptimisticTransactionOptions_setComparator( } /* - * Class: org_rocksdb_OptimisticTransactionOptions + * Class: org_forstdb_OptimisticTransactionOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_OptimisticTransactionOptions_disposeInternal( +void Java_org_forstdb_OptimisticTransactionOptions_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast( jhandle); diff --git a/java/rocksjni/options.cc b/java/forstjni/options.cc similarity index 76% rename from java/rocksjni/options.cc rename to java/forstjni/options.cc index 0d84901c9..bc61f470d 100644 --- a/java/rocksjni/options.cc +++ b/java/forstjni/options.cc @@ -15,13 +15,13 @@ #include #include -#include "include/org_rocksdb_ColumnFamilyOptions.h" -#include "include/org_rocksdb_ComparatorOptions.h" -#include "include/org_rocksdb_DBOptions.h" -#include "include/org_rocksdb_FlushOptions.h" -#include "include/org_rocksdb_Options.h" -#include "include/org_rocksdb_ReadOptions.h" -#include "include/org_rocksdb_WriteOptions.h" +#include "include/org_forstdb_ColumnFamilyOptions.h" +#include "include/org_forstdb_ComparatorOptions.h" +#include "include/org_forstdb_DBOptions.h" +#include "include/org_forstdb_FlushOptions.h" +#include "include/org_forstdb_Options.h" +#include "include/org_forstdb_ReadOptions.h" +#include "include/org_forstdb_WriteOptions.h" #include "rocksdb/comparator.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" @@ -32,29 +32,29 @@ #include "rocksdb/sst_partitioner.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" -#include "rocksjni/comparatorjnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "rocksjni/statisticsjni.h" -#include "rocksjni/table_filter_jnicallback.h" +#include "forstjni/comparatorjnicallback.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" +#include "forstjni/statisticsjni.h" +#include "forstjni/table_filter_jnicallback.h" #include "utilities/merge_operators.h" /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: newOptions * Signature: ()J */ -jlong Java_org_rocksdb_Options_newOptions__(JNIEnv*, jclass) { +jlong Java_org_forstdb_Options_newOptions__(JNIEnv*, jclass) { auto* op = new ROCKSDB_NAMESPACE::Options(); return GET_CPLUSPLUS_POINTER(op); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: newOptions * Signature: (JJ)J */ -jlong Java_org_rocksdb_Options_newOptions__JJ(JNIEnv*, jclass, jlong jdboptions, +jlong Java_org_forstdb_Options_newOptions__JJ(JNIEnv*, jclass, jlong jdboptions, jlong jcfoptions) { auto* dbOpt = reinterpret_cast(jdboptions); @@ -65,33 +65,33 @@ jlong Java_org_rocksdb_Options_newOptions__JJ(JNIEnv*, jclass, jlong jdboptions, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: copyOptions * Signature: (J)J */ -jlong Java_org_rocksdb_Options_copyOptions(JNIEnv*, jclass, jlong jhandle) { +jlong Java_org_forstdb_Options_copyOptions(JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::Options( *(reinterpret_cast(jhandle))); return GET_CPLUSPLUS_POINTER(new_opt); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_Options_disposeInternal(JNIEnv*, jobject, jlong handle) { +void Java_org_forstdb_Options_disposeInternal(JNIEnv*, jobject, jlong handle) { auto* op = reinterpret_cast(handle); assert(op != nullptr); delete op; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setIncreaseParallelism * Signature: (JI)V */ -void Java_org_rocksdb_Options_setIncreaseParallelism(JNIEnv*, jobject, +void Java_org_forstdb_Options_setIncreaseParallelism(JNIEnv*, jobject, jlong jhandle, jint totalThreads) { reinterpret_cast(jhandle)->IncreaseParallelism( @@ -99,33 +99,33 @@ void Java_org_rocksdb_Options_setIncreaseParallelism(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCreateIfMissing * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setCreateIfMissing(JNIEnv*, jobject, +void Java_org_forstdb_Options_setCreateIfMissing(JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle)->create_if_missing = flag; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: createIfMissing * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_createIfMissing(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_createIfMissing(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_if_missing; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCreateMissingColumnFamilies * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setCreateMissingColumnFamilies(JNIEnv*, jobject, +void Java_org_forstdb_Options_setCreateMissingColumnFamilies(JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle) @@ -133,22 +133,22 @@ void Java_org_rocksdb_Options_setCreateMissingColumnFamilies(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: createMissingColumnFamilies * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_createMissingColumnFamilies(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_createMissingColumnFamilies(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_missing_column_families; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setComparatorHandle * Signature: (JI)V */ -void Java_org_rocksdb_Options_setComparatorHandle__JI(JNIEnv*, jobject, +void Java_org_forstdb_Options_setComparatorHandle__JI(JNIEnv*, jobject, jlong jhandle, jint builtinComparator) { switch (builtinComparator) { @@ -164,11 +164,11 @@ void Java_org_rocksdb_Options_setComparatorHandle__JI(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setComparatorHandle * Signature: (JJB)V */ -void Java_org_rocksdb_Options_setComparatorHandle__JJB(JNIEnv*, jobject, +void Java_org_forstdb_Options_setComparatorHandle__JJB(JNIEnv*, jobject, jlong jopt_handle, jlong jcomparator_handle, jbyte jcomparator_type) { @@ -191,11 +191,11 @@ void Java_org_rocksdb_Options_setComparatorHandle__JJB(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMergeOperatorName * Signature: (JJjava/lang/String)V */ -void Java_org_rocksdb_Options_setMergeOperatorName(JNIEnv* env, jobject, +void Java_org_forstdb_Options_setMergeOperatorName(JNIEnv* env, jobject, jlong jhandle, jstring jop_name) { const char* op_name = env->GetStringUTFChars(jop_name, nullptr); @@ -212,11 +212,11 @@ void Java_org_rocksdb_Options_setMergeOperatorName(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMergeOperator * Signature: (JJjava/lang/String)V */ -void Java_org_rocksdb_Options_setMergeOperator(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setMergeOperator(JNIEnv*, jobject, jlong jhandle, jlong mergeOperatorHandle) { reinterpret_cast(jhandle)->merge_operator = *(reinterpret_cast*>( @@ -224,11 +224,11 @@ void Java_org_rocksdb_Options_setMergeOperator(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionFilterHandle * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setCompactionFilterHandle( +void Java_org_forstdb_Options_setCompactionFilterHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilter_handle) { reinterpret_cast(jopt_handle) ->compaction_filter = @@ -237,11 +237,11 @@ void Java_org_rocksdb_Options_setCompactionFilterHandle( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionFilterFactoryHandle * Signature: (JJ)V */ -void JNICALL Java_org_rocksdb_Options_setCompactionFilterFactoryHandle( +void JNICALL Java_org_forstdb_Options_setCompactionFilterFactoryHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilterfactory_handle) { auto* cff_factory = reinterpret_cast< @@ -252,11 +252,11 @@ void JNICALL Java_org_rocksdb_Options_setCompactionFilterFactoryHandle( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWriteBufferSize * Signature: (JJ)I */ -void Java_org_rocksdb_Options_setWriteBufferSize(JNIEnv* env, jobject, +void Java_org_forstdb_Options_setWriteBufferSize(JNIEnv* env, jobject, jlong jhandle, jlong jwrite_buffer_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( @@ -270,11 +270,11 @@ void Java_org_rocksdb_Options_setWriteBufferSize(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWriteBufferManager * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWriteBufferManager( +void Java_org_forstdb_Options_setWriteBufferManager( JNIEnv*, jobject, jlong joptions_handle, jlong jwrite_buffer_manager_handle) { auto* write_buffer_manager = @@ -285,33 +285,33 @@ void Java_org_rocksdb_Options_setWriteBufferManager( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: writeBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_writeBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_writeBufferSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_buffer_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxWriteBufferNumber * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxWriteBufferNumber( +void Java_org_forstdb_Options_setMaxWriteBufferNumber( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number) { reinterpret_cast(jhandle) ->max_write_buffer_number = jmax_write_buffer_number; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setStatistics * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setStatistics(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setStatistics(JNIEnv*, jobject, jlong jhandle, jlong jstatistics_handle) { auto* opt = reinterpret_cast(jhandle); auto* pSptr = @@ -321,11 +321,11 @@ void Java_org_rocksdb_Options_setStatistics(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: statistics * Signature: (J)J */ -jlong Java_org_rocksdb_Options_statistics(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_statistics(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); std::shared_ptr sptr = opt->statistics; if (sptr == nullptr) { @@ -338,77 +338,77 @@ jlong Java_org_rocksdb_Options_statistics(JNIEnv*, jobject, jlong jhandle) { } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxWriteBufferNumber * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxWriteBufferNumber(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxWriteBufferNumber(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: errorIfExists * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_errorIfExists(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_errorIfExists(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->error_if_exists; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setErrorIfExists * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setErrorIfExists(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setErrorIfExists(JNIEnv*, jobject, jlong jhandle, jboolean error_if_exists) { reinterpret_cast(jhandle)->error_if_exists = static_cast(error_if_exists); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: paranoidChecks * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_paranoidChecks(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_paranoidChecks(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_checks; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setParanoidChecks * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setParanoidChecks(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setParanoidChecks(JNIEnv*, jobject, jlong jhandle, jboolean paranoid_checks) { reinterpret_cast(jhandle)->paranoid_checks = static_cast(paranoid_checks); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEnv * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setEnv(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setEnv(JNIEnv*, jobject, jlong jhandle, jlong jenv) { reinterpret_cast(jhandle)->env = reinterpret_cast(jenv); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxTotalWalSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxTotalWalSize(JNIEnv*, jobject, +void Java_org_forstdb_Options_setMaxTotalWalSize(JNIEnv*, jobject, jlong jhandle, jlong jmax_total_wal_size) { reinterpret_cast(jhandle)->max_total_wal_size = @@ -416,84 +416,84 @@ void Java_org_rocksdb_Options_setMaxTotalWalSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxTotalWalSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxTotalWalSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxTotalWalSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_total_wal_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxOpenFiles * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxOpenFiles(JNIEnv*, jobject, jlong jhandle) { +jint Java_org_forstdb_Options_maxOpenFiles(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->max_open_files; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxOpenFiles * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxOpenFiles(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setMaxOpenFiles(JNIEnv*, jobject, jlong jhandle, jint max_open_files) { reinterpret_cast(jhandle)->max_open_files = static_cast(max_open_files); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxFileOpeningThreads * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxFileOpeningThreads( +void Java_org_forstdb_Options_setMaxFileOpeningThreads( JNIEnv*, jobject, jlong jhandle, jint jmax_file_opening_threads) { reinterpret_cast(jhandle) ->max_file_opening_threads = static_cast(jmax_file_opening_threads); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxFileOpeningThreads * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxFileOpeningThreads(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxFileOpeningThreads(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_file_opening_threads); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: useFsync * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_useFsync(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_Options_useFsync(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->use_fsync; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setUseFsync * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setUseFsync(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setUseFsync(JNIEnv*, jobject, jlong jhandle, jboolean use_fsync) { reinterpret_cast(jhandle)->use_fsync = static_cast(use_fsync); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDbPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_Options_setDbPaths(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_Options_setDbPaths(JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { std::vector db_paths; @@ -535,21 +535,21 @@ void Java_org_rocksdb_Options_setDbPaths(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: dbPathsLen * Signature: (J)J */ -jlong Java_org_rocksdb_Options_dbPathsLen(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_dbPathsLen(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_paths.size()); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: dbPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_Options_dbPaths(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_Options_dbPaths(JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { jboolean is_copy; @@ -586,22 +586,22 @@ void Java_org_rocksdb_Options_dbPaths(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: dbLogDir * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_Options_dbLogDir(JNIEnv* env, jobject, jlong jhandle) { +jstring Java_org_forstdb_Options_dbLogDir(JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle) ->db_log_dir.c_str()); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDbLogDir * Signature: (JLjava/lang/String)V */ -void Java_org_rocksdb_Options_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_Options_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, jstring jdb_log_dir) { const char* log_dir = env->GetStringUTFChars(jdb_log_dir, nullptr); if (log_dir == nullptr) { @@ -614,21 +614,21 @@ void Java_org_rocksdb_Options_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: walDir * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_Options_walDir(JNIEnv* env, jobject, jlong jhandle) { +jstring Java_org_forstdb_Options_walDir(JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle)->wal_dir.c_str()); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWalDir * Signature: (JLjava/lang/String)V */ -void Java_org_rocksdb_Options_setWalDir(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_Options_setWalDir(JNIEnv* env, jobject, jlong jhandle, jstring jwal_dir) { const char* wal_dir = env->GetStringUTFChars(jwal_dir, nullptr); if (wal_dir == nullptr) { @@ -641,22 +641,22 @@ void Java_org_rocksdb_Options_setWalDir(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: deleteObsoleteFilesPeriodMicros * Signature: (J)J */ -jlong Java_org_rocksdb_Options_deleteObsoleteFilesPeriodMicros(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_deleteObsoleteFilesPeriodMicros(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDeleteObsoleteFilesPeriodMicros * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setDeleteObsoleteFilesPeriodMicros(JNIEnv*, +void Java_org_forstdb_Options_setDeleteObsoleteFilesPeriodMicros(JNIEnv*, jobject, jlong jhandle, jlong micros) { @@ -665,22 +665,22 @@ void Java_org_rocksdb_Options_setDeleteObsoleteFilesPeriodMicros(JNIEnv*, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBackgroundCompactions * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxBackgroundCompactions(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxBackgroundCompactions(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_compactions; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBackgroundCompactions * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxBackgroundCompactions(JNIEnv*, jobject, +void Java_org_forstdb_Options_setMaxBackgroundCompactions(JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle) @@ -688,66 +688,66 @@ void Java_org_rocksdb_Options_setMaxBackgroundCompactions(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxSubcompactions * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxSubcompactions(JNIEnv*, jobject, +void Java_org_forstdb_Options_setMaxSubcompactions(JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle)->max_subcompactions = static_cast(max); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxSubcompactions * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxSubcompactions(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxSubcompactions(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_subcompactions; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBackgroundFlushes * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxBackgroundFlushes(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxBackgroundFlushes(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_flushes; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBackgroundFlushes * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxBackgroundFlushes( +void Java_org_forstdb_Options_setMaxBackgroundFlushes( JNIEnv*, jobject, jlong jhandle, jint max_background_flushes) { reinterpret_cast(jhandle) ->max_background_flushes = static_cast(max_background_flushes); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBackgroundJobs * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxBackgroundJobs(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxBackgroundJobs(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_jobs; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBackgroundJobs * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxBackgroundJobs(JNIEnv*, jobject, +void Java_org_forstdb_Options_setMaxBackgroundJobs(JNIEnv*, jobject, jlong jhandle, jint max_background_jobs) { reinterpret_cast(jhandle)->max_background_jobs = @@ -755,21 +755,21 @@ void Java_org_rocksdb_Options_setMaxBackgroundJobs(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxLogFileSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxLogFileSize(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_maxLogFileSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_log_file_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxLogFileSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxLogFileSize(JNIEnv* env, jobject, +void Java_org_forstdb_Options_setMaxLogFileSize(JNIEnv* env, jobject, jlong jhandle, jlong max_log_file_size) { auto s = @@ -783,22 +783,22 @@ void Java_org_rocksdb_Options_setMaxLogFileSize(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: logFileTimeToRoll * Signature: (J)J */ -jlong Java_org_rocksdb_Options_logFileTimeToRoll(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_logFileTimeToRoll(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->log_file_time_to_roll; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLogFileTimeToRoll * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setLogFileTimeToRoll( +void Java_org_forstdb_Options_setLogFileTimeToRoll( JNIEnv* env, jobject, jlong jhandle, jlong log_file_time_to_roll) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( log_file_time_to_roll); @@ -811,21 +811,21 @@ void Java_org_rocksdb_Options_setLogFileTimeToRoll( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: keepLogFileNum * Signature: (J)J */ -jlong Java_org_rocksdb_Options_keepLogFileNum(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_keepLogFileNum(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->keep_log_file_num; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setKeepLogFileNum * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setKeepLogFileNum(JNIEnv* env, jobject, +void Java_org_forstdb_Options_setKeepLogFileNum(JNIEnv* env, jobject, jlong jhandle, jlong keep_log_file_num) { auto s = @@ -839,22 +839,22 @@ void Java_org_rocksdb_Options_setKeepLogFileNum(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: recycleLogFileNum * Signature: (J)J */ -jlong Java_org_rocksdb_Options_recycleLogFileNum(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_recycleLogFileNum(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->recycle_log_file_num; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setRecycleLogFileNum * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setRecycleLogFileNum(JNIEnv* env, jobject, +void Java_org_forstdb_Options_setRecycleLogFileNum(JNIEnv* env, jobject, jlong jhandle, jlong recycle_log_file_num) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( @@ -868,11 +868,11 @@ void Java_org_rocksdb_Options_setRecycleLogFileNum(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxManifestFileSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxManifestFileSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxManifestFileSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_manifest_file_size; @@ -882,7 +882,7 @@ jlong Java_org_rocksdb_Options_maxManifestFileSize(JNIEnv*, jobject, * Method: memTableFactoryName * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_Options_memTableFactoryName(JNIEnv* env, jobject, +jstring Java_org_forstdb_Options_memTableFactoryName(JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::MemTableRepFactory* tf = opt->memtable_factory.get(); @@ -950,11 +950,11 @@ rocksdb_convert_cf_paths_from_java_helper(JNIEnv* env, jobjectArray path_array, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCfPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_Options_setCfPaths(JNIEnv* env, jclass, jlong jhandle, +void Java_org_forstdb_Options_setCfPaths(JNIEnv* env, jclass, jlong jhandle, jobjectArray path_array, jlongArray size_array) { auto* options = reinterpret_cast(jhandle); @@ -968,11 +968,11 @@ void Java_org_rocksdb_Options_setCfPaths(JNIEnv* env, jclass, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: cfPathsLen * Signature: (J)J */ -jlong Java_org_rocksdb_Options_cfPathsLen(JNIEnv*, jclass, jlong jhandle) { +jlong Java_org_forstdb_Options_cfPathsLen(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->cf_paths.size()); } @@ -1017,11 +1017,11 @@ static void rocksdb_convert_cf_paths_to_java_helper(JNIEnv* env, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: cfPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_Options_cfPaths(JNIEnv* env, jclass, jlong jhandle, +void Java_org_forstdb_Options_cfPaths(JNIEnv* env, jclass, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { rocksdb_convert_cf_paths_to_java_helper( @@ -1029,11 +1029,11 @@ void Java_org_rocksdb_Options_cfPaths(JNIEnv* env, jclass, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxManifestFileSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxManifestFileSize( +void Java_org_forstdb_Options_setMaxManifestFileSize( JNIEnv*, jobject, jlong jhandle, jlong max_manifest_file_size) { reinterpret_cast(jhandle) ->max_manifest_file_size = static_cast(max_manifest_file_size); @@ -1043,7 +1043,7 @@ void Java_org_rocksdb_Options_setMaxManifestFileSize( * Method: setMemTableFactory * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMemTableFactory(JNIEnv*, jobject, +void Java_org_forstdb_Options_setMemTableFactory(JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { reinterpret_cast(jhandle) @@ -1053,11 +1053,11 @@ void Java_org_rocksdb_Options_setMemTableFactory(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setRateLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setRateLimiter(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setRateLimiter(JNIEnv*, jobject, jlong jhandle, jlong jrate_limiter_handle) { std::shared_ptr* pRateLimiter = reinterpret_cast*>( @@ -1067,11 +1067,11 @@ void Java_org_rocksdb_Options_setRateLimiter(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setSstFileManager * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setSstFileManager( +void Java_org_forstdb_Options_setSstFileManager( JNIEnv*, jobject, jlong jhandle, jlong jsst_file_manager_handle) { auto* sptr_sst_file_manager = reinterpret_cast*>( @@ -1081,11 +1081,11 @@ void Java_org_rocksdb_Options_setSstFileManager( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLogger * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setLogger(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setLogger(JNIEnv*, jobject, jlong jhandle, jlong jlogger_handle) { std::shared_ptr* pLogger = reinterpret_cast*>( @@ -1094,43 +1094,43 @@ void Java_org_rocksdb_Options_setLogger(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setInfoLogLevel * Signature: (JB)V */ -void Java_org_rocksdb_Options_setInfoLogLevel(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setInfoLogLevel(JNIEnv*, jobject, jlong jhandle, jbyte jlog_level) { reinterpret_cast(jhandle)->info_log_level = static_cast(jlog_level); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: infoLogLevel * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_infoLogLevel(JNIEnv*, jobject, jlong jhandle) { +jbyte Java_org_forstdb_Options_infoLogLevel(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle)->info_log_level); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: tableCacheNumshardbits * Signature: (J)I */ -jint Java_org_rocksdb_Options_tableCacheNumshardbits(JNIEnv*, jobject, +jint Java_org_forstdb_Options_tableCacheNumshardbits(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->table_cache_numshardbits; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setTableCacheNumshardbits * Signature: (JI)V */ -void Java_org_rocksdb_Options_setTableCacheNumshardbits( +void Java_org_forstdb_Options_setTableCacheNumshardbits( JNIEnv*, jobject, jlong jhandle, jint table_cache_numshardbits) { reinterpret_cast(jhandle) ->table_cache_numshardbits = static_cast(table_cache_numshardbits); @@ -1140,7 +1140,7 @@ void Java_org_rocksdb_Options_setTableCacheNumshardbits( * Method: useFixedLengthPrefixExtractor * Signature: (JI)V */ -void Java_org_rocksdb_Options_useFixedLengthPrefixExtractor( +void Java_org_forstdb_Options_useFixedLengthPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewFixedPrefixTransform( @@ -1151,7 +1151,7 @@ void Java_org_rocksdb_Options_useFixedLengthPrefixExtractor( * Method: useCappedPrefixExtractor * Signature: (JI)V */ -void Java_org_rocksdb_Options_useCappedPrefixExtractor(JNIEnv*, jobject, +void Java_org_forstdb_Options_useCappedPrefixExtractor(JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) @@ -1160,53 +1160,53 @@ void Java_org_rocksdb_Options_useCappedPrefixExtractor(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: walTtlSeconds * Signature: (J)J */ -jlong Java_org_rocksdb_Options_walTtlSeconds(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_walTtlSeconds(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_ttl_seconds; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWalTtlSeconds * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWalTtlSeconds(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setWalTtlSeconds(JNIEnv*, jobject, jlong jhandle, jlong WAL_ttl_seconds) { reinterpret_cast(jhandle)->WAL_ttl_seconds = static_cast(WAL_ttl_seconds); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: walTtlSeconds * Signature: (J)J */ -jlong Java_org_rocksdb_Options_walSizeLimitMB(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_walSizeLimitMB(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_size_limit_MB; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWalSizeLimitMB * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWalSizeLimitMB(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setWalSizeLimitMB(JNIEnv*, jobject, jlong jhandle, jlong WAL_size_limit_MB) { reinterpret_cast(jhandle)->WAL_size_limit_MB = static_cast(WAL_size_limit_MB); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxWriteBatchGroupSizeBytes * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxWriteBatchGroupSizeBytes( +void Java_org_forstdb_Options_setMaxWriteBatchGroupSizeBytes( JNIEnv*, jclass, jlong jhandle, jlong jmax_write_batch_group_size_bytes) { auto* opt = reinterpret_cast(jhandle); opt->max_write_batch_group_size_bytes = @@ -1214,33 +1214,33 @@ void Java_org_rocksdb_Options_setMaxWriteBatchGroupSizeBytes( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxWriteBatchGroupSizeBytes * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxWriteBatchGroupSizeBytes(JNIEnv*, jclass, +jlong Java_org_forstdb_Options_maxWriteBatchGroupSizeBytes(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_write_batch_group_size_bytes); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: manifestPreallocationSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_manifestPreallocationSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_manifestPreallocationSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->manifest_preallocation_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setManifestPreallocationSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setManifestPreallocationSize( +void Java_org_forstdb_Options_setManifestPreallocationSize( JNIEnv* env, jobject, jlong jhandle, jlong preallocation_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( preallocation_size); @@ -1256,7 +1256,7 @@ void Java_org_rocksdb_Options_setManifestPreallocationSize( * Method: setTableFactory * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setTableFactory(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setTableFactory(JNIEnv*, jobject, jlong jhandle, jlong jtable_factory_handle) { auto* options = reinterpret_cast(jhandle); auto* table_factory = @@ -1268,7 +1268,7 @@ void Java_org_rocksdb_Options_setTableFactory(JNIEnv*, jobject, jlong jhandle, * Method: setSstPartitionerFactory * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setSstPartitionerFactory(JNIEnv*, jobject, +void Java_org_forstdb_Options_setSstPartitionerFactory(JNIEnv*, jobject, jlong jhandle, jlong factory_handle) { auto* options = reinterpret_cast(jhandle); @@ -1279,11 +1279,11 @@ void Java_org_rocksdb_Options_setSstPartitionerFactory(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionThreadLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setCompactionThreadLimiter( +void Java_org_forstdb_Options_setCompactionThreadLimiter( JNIEnv*, jclass, jlong jhandle, jlong jlimiter_handle) { auto* options = reinterpret_cast(jhandle); auto* limiter = reinterpret_cast< @@ -1293,44 +1293,44 @@ void Java_org_rocksdb_Options_setCompactionThreadLimiter( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: allowMmapReads * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_allowMmapReads(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_allowMmapReads(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_reads; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAllowMmapReads * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAllowMmapReads(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setAllowMmapReads(JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_reads) { reinterpret_cast(jhandle)->allow_mmap_reads = static_cast(allow_mmap_reads); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: allowMmapWrites * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_allowMmapWrites(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_allowMmapWrites(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_writes; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAllowMmapWrites * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAllowMmapWrites(JNIEnv*, jobject, +void Java_org_forstdb_Options_setAllowMmapWrites(JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_writes) { reinterpret_cast(jhandle)->allow_mmap_writes = @@ -1338,44 +1338,44 @@ void Java_org_rocksdb_Options_setAllowMmapWrites(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: useDirectReads * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_useDirectReads(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_useDirectReads(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_reads; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setUseDirectReads * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setUseDirectReads(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setUseDirectReads(JNIEnv*, jobject, jlong jhandle, jboolean use_direct_reads) { reinterpret_cast(jhandle)->use_direct_reads = static_cast(use_direct_reads); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: useDirectIoForFlushAndCompaction * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_useDirectIoForFlushAndCompaction( +jboolean Java_org_forstdb_Options_useDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_io_for_flush_and_compaction; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setUseDirectIoForFlushAndCompaction * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setUseDirectIoForFlushAndCompaction( +void Java_org_forstdb_Options_setUseDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_io_for_flush_and_compaction) { reinterpret_cast(jhandle) @@ -1384,44 +1384,44 @@ void Java_org_rocksdb_Options_setUseDirectIoForFlushAndCompaction( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAllowFAllocate * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAllowFAllocate(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setAllowFAllocate(JNIEnv*, jobject, jlong jhandle, jboolean jallow_fallocate) { reinterpret_cast(jhandle)->allow_fallocate = static_cast(jallow_fallocate); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: allowFAllocate * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_allowFAllocate(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_allowFAllocate(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_fallocate); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: isFdCloseOnExec * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_isFdCloseOnExec(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_isFdCloseOnExec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->is_fd_close_on_exec; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setIsFdCloseOnExec * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setIsFdCloseOnExec(JNIEnv*, jobject, +void Java_org_forstdb_Options_setIsFdCloseOnExec(JNIEnv*, jobject, jlong jhandle, jboolean is_fd_close_on_exec) { reinterpret_cast(jhandle)->is_fd_close_on_exec = @@ -1429,22 +1429,22 @@ void Java_org_rocksdb_Options_setIsFdCloseOnExec(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: statsDumpPeriodSec * Signature: (J)I */ -jint Java_org_rocksdb_Options_statsDumpPeriodSec(JNIEnv*, jobject, +jint Java_org_forstdb_Options_statsDumpPeriodSec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_dump_period_sec; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setStatsDumpPeriodSec * Signature: (JI)V */ -void Java_org_rocksdb_Options_setStatsDumpPeriodSec( +void Java_org_forstdb_Options_setStatsDumpPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_dump_period_sec) { reinterpret_cast(jhandle) ->stats_dump_period_sec = @@ -1452,22 +1452,22 @@ void Java_org_rocksdb_Options_setStatsDumpPeriodSec( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: statsPersistPeriodSec * Signature: (J)I */ -jint Java_org_rocksdb_Options_statsPersistPeriodSec(JNIEnv*, jobject, +jint Java_org_forstdb_Options_statsPersistPeriodSec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_persist_period_sec; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setStatsPersistPeriodSec * Signature: (JI)V */ -void Java_org_rocksdb_Options_setStatsPersistPeriodSec( +void Java_org_forstdb_Options_setStatsPersistPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_persist_period_sec) { reinterpret_cast(jhandle) ->stats_persist_period_sec = @@ -1475,22 +1475,22 @@ void Java_org_rocksdb_Options_setStatsPersistPeriodSec( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: statsHistoryBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_statsHistoryBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_statsHistoryBufferSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_history_buffer_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setStatsHistoryBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setStatsHistoryBufferSize( +void Java_org_forstdb_Options_setStatsHistoryBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jstats_history_buffer_size) { reinterpret_cast(jhandle) ->stats_history_buffer_size = @@ -1498,55 +1498,55 @@ void Java_org_rocksdb_Options_setStatsHistoryBufferSize( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: adviseRandomOnOpen * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_adviseRandomOnOpen(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_adviseRandomOnOpen(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->advise_random_on_open; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAdviseRandomOnOpen * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAdviseRandomOnOpen( +void Java_org_forstdb_Options_setAdviseRandomOnOpen( JNIEnv*, jobject, jlong jhandle, jboolean advise_random_on_open) { reinterpret_cast(jhandle) ->advise_random_on_open = static_cast(advise_random_on_open); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDbWriteBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setDbWriteBufferSize( +void Java_org_forstdb_Options_setDbWriteBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jdb_write_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: dbWriteBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_dbWriteBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_dbWriteBufferSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_write_buffer_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAccessHintOnCompactionStart * Signature: (JB)V */ -void Java_org_rocksdb_Options_setAccessHintOnCompactionStart( +void Java_org_forstdb_Options_setAccessHintOnCompactionStart( JNIEnv*, jobject, jlong jhandle, jbyte jaccess_hint_value) { auto* opt = reinterpret_cast(jhandle); opt->access_hint_on_compaction_start = @@ -1554,11 +1554,11 @@ void Java_org_rocksdb_Options_setAccessHintOnCompactionStart( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: accessHintOnCompactionStart * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_accessHintOnCompactionStart(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_accessHintOnCompactionStart(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::AccessHintJni::toJavaAccessHint( @@ -1566,11 +1566,11 @@ jbyte Java_org_rocksdb_Options_accessHintOnCompactionStart(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setCompactionReadaheadSize( +void Java_org_forstdb_Options_setCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_readahead_size) { auto* opt = reinterpret_cast(jhandle); opt->compaction_readahead_size = @@ -1578,22 +1578,22 @@ void Java_org_rocksdb_Options_setCompactionReadaheadSize( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: compactionReadaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_compactionReadaheadSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_compactionReadaheadSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->compaction_readahead_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setRandomAccessMaxBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setRandomAccessMaxBufferSize( +void Java_org_forstdb_Options_setRandomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jrandom_access_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->random_access_max_buffer_size = @@ -1601,22 +1601,22 @@ void Java_org_rocksdb_Options_setRandomAccessMaxBufferSize( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: randomAccessMaxBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_randomAccessMaxBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_randomAccessMaxBufferSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->random_access_max_buffer_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWritableFileMaxBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWritableFileMaxBufferSize( +void Java_org_forstdb_Options_setWritableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jwritable_file_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->writable_file_max_buffer_size = @@ -1624,33 +1624,33 @@ void Java_org_rocksdb_Options_setWritableFileMaxBufferSize( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: writableFileMaxBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_writableFileMaxBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_writableFileMaxBufferSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->writable_file_max_buffer_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: useAdaptiveMutex * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_useAdaptiveMutex(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_useAdaptiveMutex(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_adaptive_mutex; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setUseAdaptiveMutex * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setUseAdaptiveMutex(JNIEnv*, jobject, +void Java_org_forstdb_Options_setUseAdaptiveMutex(JNIEnv*, jobject, jlong jhandle, jboolean use_adaptive_mutex) { reinterpret_cast(jhandle)->use_adaptive_mutex = @@ -1658,31 +1658,31 @@ void Java_org_rocksdb_Options_setUseAdaptiveMutex(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: bytesPerSync * Signature: (J)J */ -jlong Java_org_rocksdb_Options_bytesPerSync(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_bytesPerSync(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->bytes_per_sync; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBytesPerSync * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setBytesPerSync(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setBytesPerSync(JNIEnv*, jobject, jlong jhandle, jlong bytes_per_sync) { reinterpret_cast(jhandle)->bytes_per_sync = static_cast(bytes_per_sync); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWalBytesPerSync * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWalBytesPerSync(JNIEnv*, jobject, +void Java_org_forstdb_Options_setWalBytesPerSync(JNIEnv*, jobject, jlong jhandle, jlong jwal_bytes_per_sync) { reinterpret_cast(jhandle)->wal_bytes_per_sync = @@ -1690,33 +1690,33 @@ void Java_org_rocksdb_Options_setWalBytesPerSync(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: walBytesPerSync * Signature: (J)J */ -jlong Java_org_rocksdb_Options_walBytesPerSync(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_walBytesPerSync(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->wal_bytes_per_sync); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setStrictBytesPerSync * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setStrictBytesPerSync( +void Java_org_forstdb_Options_setStrictBytesPerSync( JNIEnv*, jobject, jlong jhandle, jboolean jstrict_bytes_per_sync) { reinterpret_cast(jhandle) ->strict_bytes_per_sync = jstrict_bytes_per_sync == JNI_TRUE; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: strictBytesPerSync * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_strictBytesPerSync(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_strictBytesPerSync(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->strict_bytes_per_sync); @@ -1746,11 +1746,11 @@ static void rocksdb_set_event_listeners_helper( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEventListeners * Signature: (J[J)V */ -void Java_org_rocksdb_Options_setEventListeners(JNIEnv* env, jclass, +void Java_org_forstdb_Options_setEventListeners(JNIEnv* env, jclass, jlong jhandle, jlongArray jlistener_array) { auto* opt = reinterpret_cast(jhandle); @@ -1782,44 +1782,44 @@ static jobjectArray rocksdb_get_event_listeners_helper( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: eventListeners * Signature: (J)[Lorg/rocksdb/AbstractEventListener; */ -jobjectArray Java_org_rocksdb_Options_eventListeners(JNIEnv* env, jclass, +jobjectArray Java_org_forstdb_Options_eventListeners(JNIEnv* env, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return rocksdb_get_event_listeners_helper(env, opt->listeners); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEnableThreadTracking * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setEnableThreadTracking( +void Java_org_forstdb_Options_setEnableThreadTracking( JNIEnv*, jobject, jlong jhandle, jboolean jenable_thread_tracking) { auto* opt = reinterpret_cast(jhandle); opt->enable_thread_tracking = static_cast(jenable_thread_tracking); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: enableThreadTracking * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_enableThreadTracking(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_enableThreadTracking(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_thread_tracking); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDelayedWriteRate * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setDelayedWriteRate(JNIEnv*, jobject, +void Java_org_forstdb_Options_setDelayedWriteRate(JNIEnv*, jobject, jlong jhandle, jlong jdelayed_write_rate) { auto* opt = reinterpret_cast(jhandle); @@ -1827,66 +1827,66 @@ void Java_org_rocksdb_Options_setDelayedWriteRate(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: delayedWriteRate * Signature: (J)J */ -jlong Java_org_rocksdb_Options_delayedWriteRate(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_delayedWriteRate(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->delayed_write_rate); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEnablePipelinedWrite * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setEnablePipelinedWrite( +void Java_org_forstdb_Options_setEnablePipelinedWrite( JNIEnv*, jobject, jlong jhandle, jboolean jenable_pipelined_write) { auto* opt = reinterpret_cast(jhandle); opt->enable_pipelined_write = jenable_pipelined_write == JNI_TRUE; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: enablePipelinedWrite * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_enablePipelinedWrite(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_enablePipelinedWrite(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_pipelined_write); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setUnorderedWrite * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setUnorderedWrite(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setUnorderedWrite(JNIEnv*, jobject, jlong jhandle, jboolean unordered_write) { reinterpret_cast(jhandle)->unordered_write = static_cast(unordered_write); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: unorderedWrite * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_unorderedWrite(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_unorderedWrite(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->unordered_write; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAllowConcurrentMemtableWrite * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAllowConcurrentMemtableWrite(JNIEnv*, jobject, +void Java_org_forstdb_Options_setAllowConcurrentMemtableWrite(JNIEnv*, jobject, jlong jhandle, jboolean allow) { reinterpret_cast(jhandle) @@ -1894,44 +1894,44 @@ void Java_org_rocksdb_Options_setAllowConcurrentMemtableWrite(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: allowConcurrentMemtableWrite * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_allowConcurrentMemtableWrite(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_allowConcurrentMemtableWrite(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_concurrent_memtable_write; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEnableWriteThreadAdaptiveYield * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setEnableWriteThreadAdaptiveYield( +void Java_org_forstdb_Options_setEnableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle, jboolean yield) { reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield = static_cast(yield); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: enableWriteThreadAdaptiveYield * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_enableWriteThreadAdaptiveYield( +jboolean Java_org_forstdb_Options_enableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWriteThreadMaxYieldUsec * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, +void Java_org_forstdb_Options_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, jlong jhandle, jlong max) { reinterpret_cast(jhandle) @@ -1939,22 +1939,22 @@ void Java_org_rocksdb_Options_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: writeThreadMaxYieldUsec * Signature: (J)J */ -jlong Java_org_rocksdb_Options_writeThreadMaxYieldUsec(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_writeThreadMaxYieldUsec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_max_yield_usec; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWriteThreadSlowYieldUsec * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, +void Java_org_forstdb_Options_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, jlong jhandle, jlong slow) { reinterpret_cast(jhandle) @@ -1962,22 +1962,22 @@ void Java_org_rocksdb_Options_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: writeThreadSlowYieldUsec * Signature: (J)J */ -jlong Java_org_rocksdb_Options_writeThreadSlowYieldUsec(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_writeThreadSlowYieldUsec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_slow_yield_usec; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setSkipStatsUpdateOnDbOpen * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setSkipStatsUpdateOnDbOpen( +void Java_org_forstdb_Options_setSkipStatsUpdateOnDbOpen( JNIEnv*, jobject, jlong jhandle, jboolean jskip_stats_update_on_db_open) { auto* opt = reinterpret_cast(jhandle); opt->skip_stats_update_on_db_open = @@ -1985,22 +1985,22 @@ void Java_org_rocksdb_Options_setSkipStatsUpdateOnDbOpen( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: skipStatsUpdateOnDbOpen * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_skipStatsUpdateOnDbOpen(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_skipStatsUpdateOnDbOpen(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_stats_update_on_db_open); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setSkipCheckingSstFileSizesOnDbOpen * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setSkipCheckingSstFileSizesOnDbOpen( +void Java_org_forstdb_Options_setSkipCheckingSstFileSizesOnDbOpen( JNIEnv*, jclass, jlong jhandle, jboolean jskip_checking_sst_file_sizes_on_db_open) { auto* opt = reinterpret_cast(jhandle); @@ -2009,22 +2009,22 @@ void Java_org_rocksdb_Options_setSkipCheckingSstFileSizesOnDbOpen( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: skipCheckingSstFileSizesOnDbOpen * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_skipCheckingSstFileSizesOnDbOpen( +jboolean Java_org_forstdb_Options_skipCheckingSstFileSizesOnDbOpen( JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_checking_sst_file_sizes_on_db_open); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWalRecoveryMode * Signature: (JB)V */ -void Java_org_rocksdb_Options_setWalRecoveryMode( +void Java_org_forstdb_Options_setWalRecoveryMode( JNIEnv*, jobject, jlong jhandle, jbyte jwal_recovery_mode_value) { auto* opt = reinterpret_cast(jhandle); opt->wal_recovery_mode = @@ -2033,11 +2033,11 @@ void Java_org_rocksdb_Options_setWalRecoveryMode( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: walRecoveryMode * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_walRecoveryMode(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_walRecoveryMode(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::WALRecoveryModeJni::toJavaWALRecoveryMode( @@ -2045,32 +2045,32 @@ jbyte Java_org_rocksdb_Options_walRecoveryMode(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAllow2pc * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAllow2pc(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setAllow2pc(JNIEnv*, jobject, jlong jhandle, jboolean jallow_2pc) { auto* opt = reinterpret_cast(jhandle); opt->allow_2pc = static_cast(jallow_2pc); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: allow2pc * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_allow2pc(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_Options_allow2pc(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_2pc); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setRowCache * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setRowCache(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setRowCache(JNIEnv*, jobject, jlong jhandle, jlong jrow_cache_handle) { auto* opt = reinterpret_cast(jhandle); auto* row_cache = @@ -2080,11 +2080,11 @@ void Java_org_rocksdb_Options_setRowCache(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWalFilter * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setWalFilter(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setWalFilter(JNIEnv*, jobject, jlong jhandle, jlong jwal_filter_handle) { auto* opt = reinterpret_cast(jhandle); auto* wal_filter = reinterpret_cast( @@ -2093,11 +2093,11 @@ void Java_org_rocksdb_Options_setWalFilter(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setFailIfOptionsFileError * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setFailIfOptionsFileError( +void Java_org_forstdb_Options_setFailIfOptionsFileError( JNIEnv*, jobject, jlong jhandle, jboolean jfail_if_options_file_error) { auto* opt = reinterpret_cast(jhandle); opt->fail_if_options_file_error = @@ -2105,22 +2105,22 @@ void Java_org_rocksdb_Options_setFailIfOptionsFileError( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: failIfOptionsFileError * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_failIfOptionsFileError(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_failIfOptionsFileError(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->fail_if_options_file_error); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDumpMallocStats * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setDumpMallocStats(JNIEnv*, jobject, +void Java_org_forstdb_Options_setDumpMallocStats(JNIEnv*, jobject, jlong jhandle, jboolean jdump_malloc_stats) { auto* opt = reinterpret_cast(jhandle); @@ -2128,22 +2128,22 @@ void Java_org_rocksdb_Options_setDumpMallocStats(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: dumpMallocStats * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_dumpMallocStats(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_dumpMallocStats(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->dump_malloc_stats); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAvoidFlushDuringRecovery * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAvoidFlushDuringRecovery( +void Java_org_forstdb_Options_setAvoidFlushDuringRecovery( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_recovery) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_recovery = @@ -2151,88 +2151,88 @@ void Java_org_rocksdb_Options_setAvoidFlushDuringRecovery( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: avoidFlushDuringRecovery * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_avoidFlushDuringRecovery(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_avoidFlushDuringRecovery(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_recovery); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAvoidUnnecessaryBlockingIO * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAvoidUnnecessaryBlockingIO( +void Java_org_forstdb_Options_setAvoidUnnecessaryBlockingIO( JNIEnv*, jclass, jlong jhandle, jboolean avoid_blocking_io) { auto* opt = reinterpret_cast(jhandle); opt->avoid_unnecessary_blocking_io = static_cast(avoid_blocking_io); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: avoidUnnecessaryBlockingIO * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_avoidUnnecessaryBlockingIO(JNIEnv*, jclass, +jboolean Java_org_forstdb_Options_avoidUnnecessaryBlockingIO(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_unnecessary_blocking_io); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setPersistStatsToDisk * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setPersistStatsToDisk( +void Java_org_forstdb_Options_setPersistStatsToDisk( JNIEnv*, jclass, jlong jhandle, jboolean persist_stats_to_disk) { auto* opt = reinterpret_cast(jhandle); opt->persist_stats_to_disk = static_cast(persist_stats_to_disk); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: persistStatsToDisk * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_persistStatsToDisk(JNIEnv*, jclass, +jboolean Java_org_forstdb_Options_persistStatsToDisk(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->persist_stats_to_disk); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setWriteDbidToManifest * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setWriteDbidToManifest( +void Java_org_forstdb_Options_setWriteDbidToManifest( JNIEnv*, jclass, jlong jhandle, jboolean jwrite_dbid_to_manifest) { auto* opt = reinterpret_cast(jhandle); opt->write_dbid_to_manifest = static_cast(jwrite_dbid_to_manifest); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: writeDbidToManifest * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_writeDbidToManifest(JNIEnv*, jclass, +jboolean Java_org_forstdb_Options_writeDbidToManifest(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->write_dbid_to_manifest); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLogReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setLogReadaheadSize(JNIEnv*, jclass, +void Java_org_forstdb_Options_setLogReadaheadSize(JNIEnv*, jclass, jlong jhandle, jlong jlog_readahead_size) { auto* opt = reinterpret_cast(jhandle); @@ -2240,66 +2240,66 @@ void Java_org_rocksdb_Options_setLogReadaheadSize(JNIEnv*, jclass, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: logReasaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_logReadaheadSize(JNIEnv*, jclass, +jlong Java_org_forstdb_Options_logReadaheadSize(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->log_readahead_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBestEffortsRecovery * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setBestEffortsRecovery( +void Java_org_forstdb_Options_setBestEffortsRecovery( JNIEnv*, jclass, jlong jhandle, jboolean jbest_efforts_recovery) { auto* opt = reinterpret_cast(jhandle); opt->best_efforts_recovery = static_cast(jbest_efforts_recovery); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: bestEffortsRecovery * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_bestEffortsRecovery(JNIEnv*, jclass, +jboolean Java_org_forstdb_Options_bestEffortsRecovery(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->best_efforts_recovery); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBgErrorResumeCount * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxBgErrorResumeCount( +void Java_org_forstdb_Options_setMaxBgErrorResumeCount( JNIEnv*, jclass, jlong jhandle, jint jmax_bgerror_resume_count) { auto* opt = reinterpret_cast(jhandle); opt->max_bgerror_resume_count = static_cast(jmax_bgerror_resume_count); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBgerrorResumeCount * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxBgerrorResumeCount(JNIEnv*, jclass, +jint Java_org_forstdb_Options_maxBgerrorResumeCount(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_bgerror_resume_count); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBgerrorResumeRetryInterval * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setBgerrorResumeRetryInterval( +void Java_org_forstdb_Options_setBgerrorResumeRetryInterval( JNIEnv*, jclass, jlong jhandle, jlong jbgerror_resume_retry_interval) { auto* opt = reinterpret_cast(jhandle); opt->bgerror_resume_retry_interval = @@ -2307,22 +2307,22 @@ void Java_org_rocksdb_Options_setBgerrorResumeRetryInterval( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: bgerrorResumeRetryInterval * Signature: (J)J */ -jlong Java_org_rocksdb_Options_bgerrorResumeRetryInterval(JNIEnv*, jclass, +jlong Java_org_forstdb_Options_bgerrorResumeRetryInterval(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->bgerror_resume_retry_interval); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAvoidFlushDuringShutdown * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAvoidFlushDuringShutdown( +void Java_org_forstdb_Options_setAvoidFlushDuringShutdown( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_shutdown) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_shutdown = @@ -2330,99 +2330,99 @@ void Java_org_rocksdb_Options_setAvoidFlushDuringShutdown( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: avoidFlushDuringShutdown * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_avoidFlushDuringShutdown(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_avoidFlushDuringShutdown(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_shutdown); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAllowIngestBehind * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAllowIngestBehind( +void Java_org_forstdb_Options_setAllowIngestBehind( JNIEnv*, jobject, jlong jhandle, jboolean jallow_ingest_behind) { auto* opt = reinterpret_cast(jhandle); opt->allow_ingest_behind = jallow_ingest_behind == JNI_TRUE; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: allowIngestBehind * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_allowIngestBehind(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_allowIngestBehind(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_ingest_behind); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setTwoWriteQueues * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setTwoWriteQueues(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setTwoWriteQueues(JNIEnv*, jobject, jlong jhandle, jboolean jtwo_write_queues) { auto* opt = reinterpret_cast(jhandle); opt->two_write_queues = jtwo_write_queues == JNI_TRUE; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: twoWriteQueues * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_twoWriteQueues(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_twoWriteQueues(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->two_write_queues); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setManualWalFlush * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setManualWalFlush(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setManualWalFlush(JNIEnv*, jobject, jlong jhandle, jboolean jmanual_wal_flush) { auto* opt = reinterpret_cast(jhandle); opt->manual_wal_flush = jmanual_wal_flush == JNI_TRUE; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: manualWalFlush * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_manualWalFlush(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_manualWalFlush(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->manual_wal_flush); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setAtomicFlush * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setAtomicFlush(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setAtomicFlush(JNIEnv*, jobject, jlong jhandle, jboolean jatomic_flush) { auto* opt = reinterpret_cast(jhandle); opt->atomic_flush = jatomic_flush == JNI_TRUE; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: atomicFlush * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_atomicFlush(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_Options_atomicFlush(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->atomic_flush); } @@ -2431,7 +2431,7 @@ jboolean Java_org_rocksdb_Options_atomicFlush(JNIEnv*, jobject, jlong jhandle) { * Method: tableFactoryName * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_Options_tableFactoryName(JNIEnv* env, jobject, +jstring Java_org_forstdb_Options_tableFactoryName(JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TableFactory* tf = opt->table_factory.get(); @@ -2444,44 +2444,44 @@ jstring Java_org_rocksdb_Options_tableFactoryName(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: minWriteBufferNumberToMerge * Signature: (J)I */ -jint Java_org_rocksdb_Options_minWriteBufferNumberToMerge(JNIEnv*, jobject, +jint Java_org_forstdb_Options_minWriteBufferNumberToMerge(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMinWriteBufferNumberToMerge * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMinWriteBufferNumberToMerge( +void Java_org_forstdb_Options_setMinWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle, jint jmin_write_buffer_number_to_merge) { reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge = static_cast(jmin_write_buffer_number_to_merge); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxWriteBufferNumberToMaintain * Signature: (J)I */ -jint Java_org_rocksdb_Options_maxWriteBufferNumberToMaintain(JNIEnv*, jobject, +jint Java_org_forstdb_Options_maxWriteBufferNumberToMaintain(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number_to_maintain; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxWriteBufferNumberToMaintain * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxWriteBufferNumberToMaintain( +void Java_org_forstdb_Options_setMaxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number_to_maintain) { reinterpret_cast(jhandle) @@ -2490,11 +2490,11 @@ void Java_org_rocksdb_Options_setMaxWriteBufferNumberToMaintain( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompressionType * Signature: (JB)V */ -void Java_org_rocksdb_Options_setCompressionType( +void Java_org_forstdb_Options_setCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* opts = reinterpret_cast(jhandle); opts->compression = @@ -2503,11 +2503,11 @@ void Java_org_rocksdb_Options_setCompressionType( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: compressionType * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_compressionType(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_compressionType(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( @@ -2594,11 +2594,11 @@ jbyteArray rocksdb_compression_list_helper( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompressionPerLevel * Signature: (J[B)V */ -void Java_org_rocksdb_Options_setCompressionPerLevel( +void Java_org_forstdb_Options_setCompressionPerLevel( JNIEnv* env, jobject, jlong jhandle, jbyteArray jcompressionLevels) { auto uptr_compression_levels = rocksdb_compression_vector_helper(env, jcompressionLevels); @@ -2611,22 +2611,22 @@ void Java_org_rocksdb_Options_setCompressionPerLevel( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: compressionPerLevel * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_Options_compressionPerLevel(JNIEnv* env, jobject, +jbyteArray Java_org_forstdb_Options_compressionPerLevel(JNIEnv* env, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return rocksdb_compression_list_helper(env, options->compression_per_level); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBottommostCompressionType * Signature: (JB)V */ -void Java_org_rocksdb_Options_setBottommostCompressionType( +void Java_org_forstdb_Options_setBottommostCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* options = reinterpret_cast(jhandle); options->bottommost_compression = @@ -2635,11 +2635,11 @@ void Java_org_rocksdb_Options_setBottommostCompressionType( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: bottommostCompressionType * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_bottommostCompressionType(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_bottommostCompressionType(JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( @@ -2647,11 +2647,11 @@ jbyte Java_org_rocksdb_Options_bottommostCompressionType(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBottommostCompressionOptions * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setBottommostCompressionOptions( +void Java_org_forstdb_Options_setBottommostCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jbottommost_compression_options_handle) { auto* options = reinterpret_cast(jhandle); @@ -2662,11 +2662,11 @@ void Java_org_rocksdb_Options_setBottommostCompressionOptions( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompressionOptions * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setCompressionOptions( +void Java_org_forstdb_Options_setCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jcompression_options_handle) { auto* options = reinterpret_cast(jhandle); auto* compression_options = @@ -2676,11 +2676,11 @@ void Java_org_rocksdb_Options_setCompressionOptions( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionStyle * Signature: (JB)V */ -void Java_org_rocksdb_Options_setCompactionStyle(JNIEnv*, jobject, +void Java_org_forstdb_Options_setCompactionStyle(JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_style) { auto* options = reinterpret_cast(jhandle); @@ -2690,11 +2690,11 @@ void Java_org_rocksdb_Options_setCompactionStyle(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: compactionStyle * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_compactionStyle(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_compactionStyle(JNIEnv*, jobject, jlong jhandle) { auto* options = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionStyleJni::toJavaCompactionStyle( @@ -2702,11 +2702,11 @@ jbyte Java_org_rocksdb_Options_compactionStyle(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxTableFilesSizeFIFO * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxTableFilesSizeFIFO( +void Java_org_forstdb_Options_setMaxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size = @@ -2714,42 +2714,42 @@ void Java_org_rocksdb_Options_setMaxTableFilesSizeFIFO( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxTableFilesSizeFIFO * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxTableFilesSizeFIFO(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxTableFilesSizeFIFO(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: numLevels * Signature: (J)I */ -jint Java_org_rocksdb_Options_numLevels(JNIEnv*, jobject, jlong jhandle) { +jint Java_org_forstdb_Options_numLevels(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->num_levels; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setNumLevels * Signature: (JI)V */ -void Java_org_rocksdb_Options_setNumLevels(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setNumLevels(JNIEnv*, jobject, jlong jhandle, jint jnum_levels) { reinterpret_cast(jhandle)->num_levels = static_cast(jnum_levels); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: levelZeroFileNumCompactionTrigger * Signature: (J)I */ -jint Java_org_rocksdb_Options_levelZeroFileNumCompactionTrigger(JNIEnv*, +jint Java_org_forstdb_Options_levelZeroFileNumCompactionTrigger(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) @@ -2757,11 +2757,11 @@ jint Java_org_rocksdb_Options_levelZeroFileNumCompactionTrigger(JNIEnv*, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevelZeroFileNumCompactionTrigger * Signature: (JI)V */ -void Java_org_rocksdb_Options_setLevelZeroFileNumCompactionTrigger( +void Java_org_forstdb_Options_setLevelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = @@ -2769,22 +2769,22 @@ void Java_org_rocksdb_Options_setLevelZeroFileNumCompactionTrigger( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: levelZeroSlowdownWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_Options_levelZeroSlowdownWritesTrigger(JNIEnv*, jobject, +jint Java_org_forstdb_Options_levelZeroSlowdownWritesTrigger(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevelSlowdownWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_Options_setLevelZeroSlowdownWritesTrigger( +void Java_org_forstdb_Options_setLevelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = @@ -2792,22 +2792,22 @@ void Java_org_rocksdb_Options_setLevelZeroSlowdownWritesTrigger( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: levelZeroStopWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_Options_levelZeroStopWritesTrigger(JNIEnv*, jobject, +jint Java_org_forstdb_Options_levelZeroStopWritesTrigger(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevelStopWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_Options_setLevelZeroStopWritesTrigger( +void Java_org_forstdb_Options_setLevelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = @@ -2815,44 +2815,44 @@ void Java_org_rocksdb_Options_setLevelZeroStopWritesTrigger( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: targetFileSizeBase * Signature: (J)J */ -jlong Java_org_rocksdb_Options_targetFileSizeBase(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_targetFileSizeBase(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_base; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setTargetFileSizeBase * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setTargetFileSizeBase( +void Java_org_forstdb_Options_setTargetFileSizeBase( JNIEnv*, jobject, jlong jhandle, jlong jtarget_file_size_base) { reinterpret_cast(jhandle) ->target_file_size_base = static_cast(jtarget_file_size_base); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: targetFileSizeMultiplier * Signature: (J)I */ -jint Java_org_rocksdb_Options_targetFileSizeMultiplier(JNIEnv*, jobject, +jint Java_org_forstdb_Options_targetFileSizeMultiplier(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_multiplier; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setTargetFileSizeMultiplier * Signature: (JI)V */ -void Java_org_rocksdb_Options_setTargetFileSizeMultiplier( +void Java_org_forstdb_Options_setTargetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle, jint jtarget_file_size_multiplier) { reinterpret_cast(jhandle) ->target_file_size_multiplier = @@ -2860,22 +2860,22 @@ void Java_org_rocksdb_Options_setTargetFileSizeMultiplier( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBytesForLevelBase * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxBytesForLevelBase(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxBytesForLevelBase(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_base; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBytesForLevelBase * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxBytesForLevelBase( +void Java_org_forstdb_Options_setMaxBytesForLevelBase( JNIEnv*, jobject, jlong jhandle, jlong jmax_bytes_for_level_base) { reinterpret_cast(jhandle) ->max_bytes_for_level_base = @@ -2883,44 +2883,44 @@ void Java_org_rocksdb_Options_setMaxBytesForLevelBase( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: levelCompactionDynamicLevelBytes * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_levelCompactionDynamicLevelBytes( +jboolean Java_org_forstdb_Options_levelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevelCompactionDynamicLevelBytes * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setLevelCompactionDynamicLevelBytes( +void Java_org_forstdb_Options_setLevelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle, jboolean jenable_dynamic_level_bytes) { reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes = (jenable_dynamic_level_bytes); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBytesForLevelMultiplier * Signature: (J)D */ -jdouble Java_org_rocksdb_Options_maxBytesForLevelMultiplier(JNIEnv*, jobject, +jdouble Java_org_forstdb_Options_maxBytesForLevelMultiplier(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBytesForLevelMultiplier * Signature: (JD)V */ -void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplier( +void Java_org_forstdb_Options_setMaxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle, jdouble jmax_bytes_for_level_multiplier) { reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier = @@ -2928,11 +2928,11 @@ void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplier( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxCompactionBytes * Signature: (J)I */ -jlong Java_org_rocksdb_Options_maxCompactionBytes(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxCompactionBytes(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle) @@ -2940,32 +2940,32 @@ jlong Java_org_rocksdb_Options_maxCompactionBytes(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxCompactionBytes * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMaxCompactionBytes( +void Java_org_forstdb_Options_setMaxCompactionBytes( JNIEnv*, jobject, jlong jhandle, jlong jmax_compaction_bytes) { reinterpret_cast(jhandle)->max_compaction_bytes = static_cast(jmax_compaction_bytes); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: arenaBlockSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_arenaBlockSize(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_arenaBlockSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->arena_block_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setArenaBlockSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setArenaBlockSize(JNIEnv* env, jobject, +void Java_org_forstdb_Options_setArenaBlockSize(JNIEnv* env, jobject, jlong jhandle, jlong jarena_block_size) { auto s = @@ -2979,44 +2979,44 @@ void Java_org_rocksdb_Options_setArenaBlockSize(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: disableAutoCompactions * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_disableAutoCompactions(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_disableAutoCompactions(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->disable_auto_compactions; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setDisableAutoCompactions * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setDisableAutoCompactions( +void Java_org_forstdb_Options_setDisableAutoCompactions( JNIEnv*, jobject, jlong jhandle, jboolean jdisable_auto_compactions) { reinterpret_cast(jhandle) ->disable_auto_compactions = static_cast(jdisable_auto_compactions); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxSequentialSkipInIterations * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxSequentialSkipInIterations(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxSequentialSkipInIterations(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxSequentialSkipInIterations * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxSequentialSkipInIterations( +void Java_org_forstdb_Options_setMaxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle, jlong jmax_sequential_skip_in_iterations) { reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations = @@ -3024,44 +3024,44 @@ void Java_org_rocksdb_Options_setMaxSequentialSkipInIterations( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: inplaceUpdateSupport * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_inplaceUpdateSupport(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_inplaceUpdateSupport(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_support; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setInplaceUpdateSupport * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setInplaceUpdateSupport( +void Java_org_forstdb_Options_setInplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle, jboolean jinplace_update_support) { reinterpret_cast(jhandle) ->inplace_update_support = static_cast(jinplace_update_support); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: inplaceUpdateNumLocks * Signature: (J)J */ -jlong Java_org_rocksdb_Options_inplaceUpdateNumLocks(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_inplaceUpdateNumLocks(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_num_locks; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setInplaceUpdateNumLocks * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setInplaceUpdateNumLocks( +void Java_org_forstdb_Options_setInplaceUpdateNumLocks( JNIEnv* env, jobject, jlong jhandle, jlong jinplace_update_num_locks) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jinplace_update_num_locks); @@ -3074,22 +3074,22 @@ void Java_org_rocksdb_Options_setInplaceUpdateNumLocks( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: memtablePrefixBloomSizeRatio * Signature: (J)I */ -jdouble Java_org_rocksdb_Options_memtablePrefixBloomSizeRatio(JNIEnv*, jobject, +jdouble Java_org_forstdb_Options_memtablePrefixBloomSizeRatio(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_prefix_bloom_size_ratio; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMemtablePrefixBloomSizeRatio * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMemtablePrefixBloomSizeRatio( +void Java_org_forstdb_Options_setMemtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle, jdouble jmemtable_prefix_bloom_size_ratio) { reinterpret_cast(jhandle) @@ -3098,22 +3098,22 @@ void Java_org_rocksdb_Options_setMemtablePrefixBloomSizeRatio( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: experimentalMempurgeThreshold * Signature: (J)I */ -jdouble Java_org_rocksdb_Options_experimentalMempurgeThreshold(JNIEnv*, jobject, +jdouble Java_org_forstdb_Options_experimentalMempurgeThreshold(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->experimental_mempurge_threshold; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setExperimentalMempurgeThreshold * Signature: (JI)V */ -void Java_org_rocksdb_Options_setExperimentalMempurgeThreshold( +void Java_org_forstdb_Options_setExperimentalMempurgeThreshold( JNIEnv*, jobject, jlong jhandle, jdouble jexperimental_mempurge_threshold) { reinterpret_cast(jhandle) ->experimental_mempurge_threshold = @@ -3121,22 +3121,22 @@ void Java_org_rocksdb_Options_setExperimentalMempurgeThreshold( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: memtableWholeKeyFiltering * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_memtableWholeKeyFiltering(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_memtableWholeKeyFiltering(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_whole_key_filtering; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMemtableWholeKeyFiltering * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setMemtableWholeKeyFiltering( +void Java_org_forstdb_Options_setMemtableWholeKeyFiltering( JNIEnv*, jobject, jlong jhandle, jboolean jmemtable_whole_key_filtering) { reinterpret_cast(jhandle) ->memtable_whole_key_filtering = @@ -3144,42 +3144,42 @@ void Java_org_rocksdb_Options_setMemtableWholeKeyFiltering( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: bloomLocality * Signature: (J)I */ -jint Java_org_rocksdb_Options_bloomLocality(JNIEnv*, jobject, jlong jhandle) { +jint Java_org_forstdb_Options_bloomLocality(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->bloom_locality; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBloomLocality * Signature: (JI)V */ -void Java_org_rocksdb_Options_setBloomLocality(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setBloomLocality(JNIEnv*, jobject, jlong jhandle, jint jbloom_locality) { reinterpret_cast(jhandle)->bloom_locality = static_cast(jbloom_locality); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxSuccessiveMerges * Signature: (J)J */ -jlong Java_org_rocksdb_Options_maxSuccessiveMerges(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_maxSuccessiveMerges(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_successive_merges; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxSuccessiveMerges * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMaxSuccessiveMerges( +void Java_org_forstdb_Options_setMaxSuccessiveMerges( JNIEnv* env, jobject, jlong jhandle, jlong jmax_successive_merges) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmax_successive_merges); @@ -3192,22 +3192,22 @@ void Java_org_rocksdb_Options_setMaxSuccessiveMerges( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: optimizeFiltersForHits * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_optimizeFiltersForHits(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_optimizeFiltersForHits(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->optimize_filters_for_hits; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setOptimizeFiltersForHits * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setOptimizeFiltersForHits( +void Java_org_forstdb_Options_setOptimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle, jboolean joptimize_filters_for_hits) { reinterpret_cast(jhandle) ->optimize_filters_for_hits = @@ -3215,11 +3215,11 @@ void Java_org_rocksdb_Options_setOptimizeFiltersForHits( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: oldDefaults * Signature: (JII)V */ -void Java_org_rocksdb_Options_oldDefaults(JNIEnv*, jclass, jlong jhandle, +void Java_org_forstdb_Options_oldDefaults(JNIEnv*, jclass, jlong jhandle, jint major_version, jint minor_version) { reinterpret_cast(jhandle)->OldDefaults( @@ -3227,21 +3227,21 @@ void Java_org_rocksdb_Options_oldDefaults(JNIEnv*, jclass, jlong jhandle, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: optimizeForSmallDb * Signature: (J)V */ -void Java_org_rocksdb_Options_optimizeForSmallDb__J(JNIEnv*, jobject, +void Java_org_forstdb_Options_optimizeForSmallDb__J(JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle)->OptimizeForSmallDb(); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: optimizeForSmallDb * Signature: (JJ)V */ -void Java_org_rocksdb_Options_optimizeForSmallDb__JJ(JNIEnv*, jclass, +void Java_org_forstdb_Options_optimizeForSmallDb__JJ(JNIEnv*, jclass, jlong jhandle, jlong cache_handle) { auto* cache_sptr_ptr = @@ -3254,65 +3254,65 @@ void Java_org_rocksdb_Options_optimizeForSmallDb__JJ(JNIEnv*, jclass, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: optimizeForPointLookup * Signature: (JJ)V */ -void Java_org_rocksdb_Options_optimizeForPointLookup( +void Java_org_forstdb_Options_optimizeForPointLookup( JNIEnv*, jobject, jlong jhandle, jlong block_cache_size_mb) { reinterpret_cast(jhandle) ->OptimizeForPointLookup(block_cache_size_mb); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: optimizeLevelStyleCompaction * Signature: (JJ)V */ -void Java_org_rocksdb_Options_optimizeLevelStyleCompaction( +void Java_org_forstdb_Options_optimizeLevelStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeLevelStyleCompaction(memtable_memory_budget); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: optimizeUniversalStyleCompaction * Signature: (JJ)V */ -void Java_org_rocksdb_Options_optimizeUniversalStyleCompaction( +void Java_org_forstdb_Options_optimizeUniversalStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeUniversalStyleCompaction(memtable_memory_budget); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: prepareForBulkLoad * Signature: (J)V */ -void Java_org_rocksdb_Options_prepareForBulkLoad(JNIEnv*, jobject, +void Java_org_forstdb_Options_prepareForBulkLoad(JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle)->PrepareForBulkLoad(); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: memtableHugePageSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_memtableHugePageSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_memtableHugePageSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_huge_page_size; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMemtableHugePageSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMemtableHugePageSize( +void Java_org_forstdb_Options_setMemtableHugePageSize( JNIEnv* env, jobject, jlong jhandle, jlong jmemtable_huge_page_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmemtable_huge_page_size); @@ -3325,22 +3325,22 @@ void Java_org_rocksdb_Options_setMemtableHugePageSize( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: softPendingCompactionBytesLimit * Signature: (J)J */ -jlong Java_org_rocksdb_Options_softPendingCompactionBytesLimit(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_softPendingCompactionBytesLimit(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->soft_pending_compaction_bytes_limit; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setSoftPendingCompactionBytesLimit * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setSoftPendingCompactionBytesLimit( +void Java_org_forstdb_Options_setSoftPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jsoft_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) @@ -3349,22 +3349,22 @@ void Java_org_rocksdb_Options_setSoftPendingCompactionBytesLimit( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: softHardCompactionBytesLimit * Signature: (J)J */ -jlong Java_org_rocksdb_Options_hardPendingCompactionBytesLimit(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_hardPendingCompactionBytesLimit(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->hard_pending_compaction_bytes_limit; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setHardPendingCompactionBytesLimit * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setHardPendingCompactionBytesLimit( +void Java_org_forstdb_Options_setHardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jhard_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) @@ -3373,22 +3373,22 @@ void Java_org_rocksdb_Options_setHardPendingCompactionBytesLimit( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: level0FileNumCompactionTrigger * Signature: (J)I */ -jint Java_org_rocksdb_Options_level0FileNumCompactionTrigger(JNIEnv*, jobject, +jint Java_org_forstdb_Options_level0FileNumCompactionTrigger(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevel0FileNumCompactionTrigger * Signature: (JI)V */ -void Java_org_rocksdb_Options_setLevel0FileNumCompactionTrigger( +void Java_org_forstdb_Options_setLevel0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = @@ -3396,22 +3396,22 @@ void Java_org_rocksdb_Options_setLevel0FileNumCompactionTrigger( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: level0SlowdownWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_Options_level0SlowdownWritesTrigger(JNIEnv*, jobject, +jint Java_org_forstdb_Options_level0SlowdownWritesTrigger(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevel0SlowdownWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_Options_setLevel0SlowdownWritesTrigger( +void Java_org_forstdb_Options_setLevel0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = @@ -3419,22 +3419,22 @@ void Java_org_rocksdb_Options_setLevel0SlowdownWritesTrigger( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: level0StopWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_Options_level0StopWritesTrigger(JNIEnv*, jobject, +jint Java_org_forstdb_Options_level0StopWritesTrigger(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setLevel0StopWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_Options_setLevel0StopWritesTrigger( +void Java_org_forstdb_Options_setLevel0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = @@ -3442,11 +3442,11 @@ void Java_org_rocksdb_Options_setLevel0StopWritesTrigger( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: maxBytesForLevelMultiplierAdditional * Signature: (J)[I */ -jintArray Java_org_rocksdb_Options_maxBytesForLevelMultiplierAdditional( +jintArray Java_org_forstdb_Options_maxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle) { auto mbflma = reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier_additional; @@ -3480,11 +3480,11 @@ jintArray Java_org_rocksdb_Options_maxBytesForLevelMultiplierAdditional( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMaxBytesForLevelMultiplierAdditional * Signature: (J[I)V */ -void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplierAdditional( +void Java_org_forstdb_Options_setMaxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle, jintArray jmax_bytes_for_level_multiplier_additional) { jsize len = env->GetArrayLength(jmax_bytes_for_level_multiplier_additional); @@ -3507,33 +3507,33 @@ void Java_org_rocksdb_Options_setMaxBytesForLevelMultiplierAdditional( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: paranoidFileChecks * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_paranoidFileChecks(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_paranoidFileChecks(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_file_checks; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setParanoidFileChecks * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setParanoidFileChecks( +void Java_org_forstdb_Options_setParanoidFileChecks( JNIEnv*, jobject, jlong jhandle, jboolean jparanoid_file_checks) { reinterpret_cast(jhandle)->paranoid_file_checks = static_cast(jparanoid_file_checks); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionPriority * Signature: (JB)V */ -void Java_org_rocksdb_Options_setCompactionPriority( +void Java_org_forstdb_Options_setCompactionPriority( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_priority_value) { auto* opts = reinterpret_cast(jhandle); opts->compaction_pri = @@ -3542,11 +3542,11 @@ void Java_org_rocksdb_Options_setCompactionPriority( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: compactionPriority * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_compactionPriority(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_compactionPriority(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompactionPriorityJni::toJavaCompactionPriority( @@ -3554,11 +3554,11 @@ jbyte Java_org_rocksdb_Options_compactionPriority(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setReportBgIoStats * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setReportBgIoStats(JNIEnv*, jobject, +void Java_org_forstdb_Options_setReportBgIoStats(JNIEnv*, jobject, jlong jhandle, jboolean jreport_bg_io_stats) { auto* opts = reinterpret_cast(jhandle); @@ -3566,43 +3566,43 @@ void Java_org_rocksdb_Options_setReportBgIoStats(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: reportBgIoStats * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_reportBgIoStats(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_reportBgIoStats(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->report_bg_io_stats); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setTtl * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setTtl(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setTtl(JNIEnv*, jobject, jlong jhandle, jlong jttl) { auto* opts = reinterpret_cast(jhandle); opts->ttl = static_cast(jttl); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: ttl * Signature: (J)J */ -jlong Java_org_rocksdb_Options_ttl(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_ttl(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->ttl); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setPeriodicCompactionSeconds * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setPeriodicCompactionSeconds( +void Java_org_forstdb_Options_setPeriodicCompactionSeconds( JNIEnv*, jobject, jlong jhandle, jlong jperiodicCompactionSeconds) { auto* opts = reinterpret_cast(jhandle); opts->periodic_compaction_seconds = @@ -3610,22 +3610,22 @@ void Java_org_rocksdb_Options_setPeriodicCompactionSeconds( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: periodicCompactionSeconds * Signature: (J)J */ -jlong Java_org_rocksdb_Options_periodicCompactionSeconds(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_periodicCompactionSeconds(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->periodic_compaction_seconds); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionOptionsUniversal * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setCompactionOptionsUniversal( +void Java_org_forstdb_Options_setCompactionOptionsUniversal( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_universal_handle) { auto* opts = reinterpret_cast(jhandle); @@ -3636,11 +3636,11 @@ void Java_org_rocksdb_Options_setCompactionOptionsUniversal( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setCompactionOptionsFIFO * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setCompactionOptionsFIFO( +void Java_org_forstdb_Options_setCompactionOptionsFIFO( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_fifo_handle) { auto* opts = reinterpret_cast(jhandle); auto* opts_fifo = reinterpret_cast( @@ -3649,22 +3649,22 @@ void Java_org_rocksdb_Options_setCompactionOptionsFIFO( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setForceConsistencyChecks * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setForceConsistencyChecks( +void Java_org_forstdb_Options_setForceConsistencyChecks( JNIEnv*, jobject, jlong jhandle, jboolean jforce_consistency_checks) { auto* opts = reinterpret_cast(jhandle); opts->force_consistency_checks = static_cast(jforce_consistency_checks); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: forceConsistencyChecks * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_forceConsistencyChecks(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_forceConsistencyChecks(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->force_consistency_checks); @@ -3673,11 +3673,11 @@ jboolean Java_org_rocksdb_Options_forceConsistencyChecks(JNIEnv*, jobject, /// BLOB options /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEnableBlobFiles * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setEnableBlobFiles(JNIEnv*, jobject, +void Java_org_forstdb_Options_setEnableBlobFiles(JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_files) { auto* opts = reinterpret_cast(jhandle); @@ -3685,64 +3685,64 @@ void Java_org_rocksdb_Options_setEnableBlobFiles(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: enableBlobFiles * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_enableBlobFiles(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_enableBlobFiles(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->enable_blob_files); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMinBlobSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setMinBlobSize(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setMinBlobSize(JNIEnv*, jobject, jlong jhandle, jlong jmin_blob_size) { auto* opts = reinterpret_cast(jhandle); opts->min_blob_size = static_cast(jmin_blob_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: minBlobSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_minBlobSize(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_minBlobSize(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->min_blob_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBlobFileSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setBlobFileSize(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Options_setBlobFileSize(JNIEnv*, jobject, jlong jhandle, jlong jblob_file_size) { auto* opts = reinterpret_cast(jhandle); opts->blob_file_size = static_cast(jblob_file_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: blobFileSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_blobFileSize(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_Options_blobFileSize(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->blob_file_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBlobCompressionType * Signature: (JB)V */ -void Java_org_rocksdb_Options_setBlobCompressionType( +void Java_org_forstdb_Options_setBlobCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jblob_compression_type_value) { auto* opts = reinterpret_cast(jhandle); opts->blob_compression_type = @@ -3751,11 +3751,11 @@ void Java_org_rocksdb_Options_setBlobCompressionType( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: blobCompressionType * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_blobCompressionType(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_blobCompressionType(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::CompressionTypeJni::toJavaCompressionType( @@ -3763,11 +3763,11 @@ jbyte Java_org_rocksdb_Options_blobCompressionType(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setEnableBlobGarbageCollection * Signature: (JZ)V */ -void Java_org_rocksdb_Options_setEnableBlobGarbageCollection( +void Java_org_forstdb_Options_setEnableBlobGarbageCollection( JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_garbage_collection) { auto* opts = reinterpret_cast(jhandle); opts->enable_blob_garbage_collection = @@ -3775,22 +3775,22 @@ void Java_org_rocksdb_Options_setEnableBlobGarbageCollection( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: enableBlobGarbageCollection * Signature: (J)Z */ -jboolean Java_org_rocksdb_Options_enableBlobGarbageCollection(JNIEnv*, jobject, +jboolean Java_org_forstdb_Options_enableBlobGarbageCollection(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->enable_blob_garbage_collection); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBlobGarbageCollectionAgeCutoff * Signature: (JD)V */ -void Java_org_rocksdb_Options_setBlobGarbageCollectionAgeCutoff( +void Java_org_forstdb_Options_setBlobGarbageCollectionAgeCutoff( JNIEnv*, jobject, jlong jhandle, jdouble jblob_garbage_collection_age_cutoff) { auto* opts = reinterpret_cast(jhandle); @@ -3799,11 +3799,11 @@ void Java_org_rocksdb_Options_setBlobGarbageCollectionAgeCutoff( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: blobGarbageCollectionAgeCutoff * Signature: (J)D */ -jdouble Java_org_rocksdb_Options_blobGarbageCollectionAgeCutoff(JNIEnv*, +jdouble Java_org_forstdb_Options_blobGarbageCollectionAgeCutoff(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -3811,11 +3811,11 @@ jdouble Java_org_rocksdb_Options_blobGarbageCollectionAgeCutoff(JNIEnv*, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBlobGarbageCollectionForceThreshold * Signature: (JD)V */ -void Java_org_rocksdb_Options_setBlobGarbageCollectionForceThreshold( +void Java_org_forstdb_Options_setBlobGarbageCollectionForceThreshold( JNIEnv*, jobject, jlong jhandle, jdouble jblob_garbage_collection_force_threshold) { auto* opts = reinterpret_cast(jhandle); @@ -3824,22 +3824,22 @@ void Java_org_rocksdb_Options_setBlobGarbageCollectionForceThreshold( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: blobGarbageCollectionForceThreshold * Signature: (J)D */ -jdouble Java_org_rocksdb_Options_blobGarbageCollectionForceThreshold( +jdouble Java_org_forstdb_Options_blobGarbageCollectionForceThreshold( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->blob_garbage_collection_force_threshold); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBlobCompactionReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_Options_setBlobCompactionReadaheadSize( +void Java_org_forstdb_Options_setBlobCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jblob_compaction_readahead_size) { auto* opts = reinterpret_cast(jhandle); opts->blob_compaction_readahead_size = @@ -3847,44 +3847,44 @@ void Java_org_rocksdb_Options_setBlobCompactionReadaheadSize( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: blobCompactionReadaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_Options_blobCompactionReadaheadSize(JNIEnv*, jobject, +jlong Java_org_forstdb_Options_blobCompactionReadaheadSize(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->blob_compaction_readahead_size); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setBlobFileStartingLevel * Signature: (JI)V */ -void Java_org_rocksdb_Options_setBlobFileStartingLevel( +void Java_org_forstdb_Options_setBlobFileStartingLevel( JNIEnv*, jobject, jlong jhandle, jint jblob_file_starting_level) { auto* opts = reinterpret_cast(jhandle); opts->blob_file_starting_level = jblob_file_starting_level; } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: blobFileStartingLevel * Signature: (J)I */ -jint Java_org_rocksdb_Options_blobFileStartingLevel(JNIEnv*, jobject, +jint Java_org_forstdb_Options_blobFileStartingLevel(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->blob_file_starting_level); } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setPrepopulateBlobCache * Signature: (JB)V */ -void Java_org_rocksdb_Options_setPrepopulateBlobCache( +void Java_org_forstdb_Options_setPrepopulateBlobCache( JNIEnv*, jobject, jlong jhandle, jbyte jprepopulate_blob_cache_value) { auto* opts = reinterpret_cast(jhandle); opts->prepopulate_blob_cache = @@ -3893,11 +3893,11 @@ void Java_org_rocksdb_Options_setPrepopulateBlobCache( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: prepopulateBlobCache * Signature: (J)B */ -jbyte Java_org_rocksdb_Options_prepopulateBlobCache(JNIEnv*, jobject, +jbyte Java_org_forstdb_Options_prepopulateBlobCache(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::PrepopulateBlobCacheJni::toJavaPrepopulateBlobCache( @@ -3905,11 +3905,11 @@ jbyte Java_org_rocksdb_Options_prepopulateBlobCache(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: setMemtableMaxRangeDeletions * Signature: (JI)V */ -void Java_org_rocksdb_Options_setMemtableMaxRangeDeletions( +void Java_org_forstdb_Options_setMemtableMaxRangeDeletions( JNIEnv*, jobject, jlong jhandle, jint jmemtable_max_range_deletions) { auto* opts = reinterpret_cast(jhandle); opts->memtable_max_range_deletions = @@ -3917,11 +3917,11 @@ void Java_org_rocksdb_Options_setMemtableMaxRangeDeletions( } /* - * Class: org_rocksdb_Options + * Class: org_forstdb_Options * Method: memtableMaxRangeDeletions * Signature: (J)I */ -jint Java_org_rocksdb_Options_memtableMaxRangeDeletions(JNIEnv*, jobject, +jint Java_org_forstdb_Options_memtableMaxRangeDeletions(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); return static_cast(opts->memtable_max_range_deletions); @@ -3931,22 +3931,22 @@ jint Java_org_rocksdb_Options_memtableMaxRangeDeletions(JNIEnv*, jobject, // ROCKSDB_NAMESPACE::ColumnFamilyOptions /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: newColumnFamilyOptions * Signature: ()J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptions(JNIEnv*, +jlong Java_org_forstdb_ColumnFamilyOptions_newColumnFamilyOptions(JNIEnv*, jclass) { auto* op = new ROCKSDB_NAMESPACE::ColumnFamilyOptions(); return GET_CPLUSPLUS_POINTER(op); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: copyColumnFamilyOptions * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_copyColumnFamilyOptions( +jlong Java_org_forstdb_ColumnFamilyOptions_copyColumnFamilyOptions( JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::ColumnFamilyOptions( *(reinterpret_cast(jhandle))); @@ -3954,11 +3954,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_copyColumnFamilyOptions( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: newColumnFamilyOptionsFromOptions * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptionsFromOptions( +jlong Java_org_forstdb_ColumnFamilyOptions_newColumnFamilyOptionsFromOptions( JNIEnv*, jclass, jlong joptions_handle) { auto new_opt = new ROCKSDB_NAMESPACE::ColumnFamilyOptions( *reinterpret_cast(joptions_handle)); @@ -3966,11 +3966,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_newColumnFamilyOptionsFromOptions( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: getColumnFamilyOptionsFromProps * Signature: (JLjava/lang/String;)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__JLjava_lang_String_2( +jlong Java_org_forstdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__JLjava_lang_String_2( JNIEnv* env, jclass, jlong cfg_handle, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { @@ -4000,11 +4000,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__JLja } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: getColumnFamilyOptionsFromProps * Signature: (Ljava/util/String;)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__Ljava_lang_String_2( +jlong Java_org_forstdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__Ljava_lang_String_2( JNIEnv* env, jclass, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { @@ -4036,11 +4036,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__Ljav } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ColumnFamilyOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_ColumnFamilyOptions_disposeInternal(JNIEnv*, jobject, jlong handle) { auto* cfo = reinterpret_cast(handle); assert(cfo != nullptr); @@ -4048,11 +4048,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: oldDefaults * Signature: (JII)V */ -void Java_org_rocksdb_ColumnFamilyOptions_oldDefaults(JNIEnv*, jclass, +void Java_org_forstdb_ColumnFamilyOptions_oldDefaults(JNIEnv*, jclass, jlong jhandle, jint major_version, jint minor_version) { @@ -4061,11 +4061,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_oldDefaults(JNIEnv*, jclass, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: optimizeForSmallDb * Signature: (J)V */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb__J(JNIEnv*, +void Java_org_forstdb_ColumnFamilyOptions_optimizeForSmallDb__J(JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle) @@ -4073,11 +4073,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb__J(JNIEnv*, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: optimizeForSmallDb * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb__JJ( +void Java_org_forstdb_ColumnFamilyOptions_optimizeForSmallDb__JJ( JNIEnv*, jclass, jlong jhandle, jlong cache_handle) { auto* cache_sptr_ptr = reinterpret_cast*>( @@ -4087,44 +4087,44 @@ void Java_org_rocksdb_ColumnFamilyOptions_optimizeForSmallDb__JJ( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: optimizeForPointLookup * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeForPointLookup( +void Java_org_forstdb_ColumnFamilyOptions_optimizeForPointLookup( JNIEnv*, jobject, jlong jhandle, jlong block_cache_size_mb) { reinterpret_cast(jhandle) ->OptimizeForPointLookup(block_cache_size_mb); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: optimizeLevelStyleCompaction * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeLevelStyleCompaction( +void Java_org_forstdb_ColumnFamilyOptions_optimizeLevelStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeLevelStyleCompaction(memtable_memory_budget); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: optimizeUniversalStyleCompaction * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_optimizeUniversalStyleCompaction( +void Java_org_forstdb_ColumnFamilyOptions_optimizeUniversalStyleCompaction( JNIEnv*, jobject, jlong jhandle, jlong memtable_memory_budget) { reinterpret_cast(jhandle) ->OptimizeUniversalStyleCompaction(memtable_memory_budget); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setComparatorHandle * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JI( +void Java_org_forstdb_ColumnFamilyOptions_setComparatorHandle__JI( JNIEnv*, jobject, jlong jhandle, jint builtinComparator) { switch (builtinComparator) { case 1: @@ -4139,11 +4139,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JI( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setComparatorHandle * Signature: (JJB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JJB( +void Java_org_forstdb_ColumnFamilyOptions_setComparatorHandle__JJB( JNIEnv*, jobject, jlong jopt_handle, jlong jcomparator_handle, jbyte jcomparator_type) { ROCKSDB_NAMESPACE::Comparator* comparator = nullptr; @@ -4166,11 +4166,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setComparatorHandle__JJB( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMergeOperatorName * Signature: (JJjava/lang/String)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperatorName( +void Java_org_forstdb_ColumnFamilyOptions_setMergeOperatorName( JNIEnv* env, jobject, jlong jhandle, jstring jop_name) { auto* options = reinterpret_cast(jhandle); @@ -4186,11 +4186,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperatorName( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMergeOperator * Signature: (JJjava/lang/String)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperator( +void Java_org_forstdb_ColumnFamilyOptions_setMergeOperator( JNIEnv*, jobject, jlong jhandle, jlong mergeOperatorHandle) { reinterpret_cast(jhandle) ->merge_operator = @@ -4199,11 +4199,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMergeOperator( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionFilterHandle * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterHandle( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionFilterHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilter_handle) { reinterpret_cast(jopt_handle) ->compaction_filter = @@ -4212,11 +4212,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterHandle( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionFilterFactoryHandle * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle( JNIEnv*, jobject, jlong jopt_handle, jlong jcompactionfilterfactory_handle) { auto* cff_factory = reinterpret_cast< @@ -4227,11 +4227,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setWriteBufferSize * Signature: (JJ)I */ -void Java_org_rocksdb_ColumnFamilyOptions_setWriteBufferSize( +void Java_org_forstdb_ColumnFamilyOptions_setWriteBufferSize( JNIEnv* env, jobject, jlong jhandle, jlong jwrite_buffer_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jwrite_buffer_size); @@ -4244,33 +4244,33 @@ void Java_org_rocksdb_ColumnFamilyOptions_setWriteBufferSize( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: writeBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_writeBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_writeBufferSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_buffer_size; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxWriteBufferNumber * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumber( +void Java_org_forstdb_ColumnFamilyOptions_setMaxWriteBufferNumber( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number) { reinterpret_cast(jhandle) ->max_write_buffer_number = jmax_write_buffer_number; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxWriteBufferNumber * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumber(JNIEnv*, jobject, +jint Java_org_forstdb_ColumnFamilyOptions_maxWriteBufferNumber(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number; @@ -4280,7 +4280,7 @@ jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumber(JNIEnv*, jobject, * Method: setMemTableFactory * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemTableFactory( +void Java_org_forstdb_ColumnFamilyOptions_setMemTableFactory( JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { reinterpret_cast(jhandle) ->memtable_factory.reset( @@ -4289,11 +4289,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMemTableFactory( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: memTableFactoryName * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_ColumnFamilyOptions_memTableFactoryName( +jstring Java_org_forstdb_ColumnFamilyOptions_memTableFactoryName( JNIEnv* env, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -4315,7 +4315,7 @@ jstring Java_org_rocksdb_ColumnFamilyOptions_memTableFactoryName( * Method: useFixedLengthPrefixExtractor * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_useFixedLengthPrefixExtractor( +void Java_org_forstdb_ColumnFamilyOptions_useFixedLengthPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewFixedPrefixTransform( @@ -4326,7 +4326,7 @@ void Java_org_rocksdb_ColumnFamilyOptions_useFixedLengthPrefixExtractor( * Method: useCappedPrefixExtractor * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_useCappedPrefixExtractor( +void Java_org_forstdb_ColumnFamilyOptions_useCappedPrefixExtractor( JNIEnv*, jobject, jlong jhandle, jint jprefix_length) { reinterpret_cast(jhandle) ->prefix_extractor.reset(ROCKSDB_NAMESPACE::NewCappedPrefixTransform( @@ -4337,7 +4337,7 @@ void Java_org_rocksdb_ColumnFamilyOptions_useCappedPrefixExtractor( * Method: setTableFactory * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setTableFactory( +void Java_org_forstdb_ColumnFamilyOptions_setTableFactory( JNIEnv*, jobject, jlong jhandle, jlong jfactory_handle) { reinterpret_cast(jhandle) ->table_factory.reset( @@ -4348,7 +4348,7 @@ void Java_org_rocksdb_ColumnFamilyOptions_setTableFactory( * Method: setSstPartitionerFactory * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setSstPartitionerFactory( +void Java_org_forstdb_ColumnFamilyOptions_setSstPartitionerFactory( JNIEnv*, jobject, jlong jhandle, jlong factory_handle) { auto* options = reinterpret_cast(jhandle); @@ -4359,11 +4359,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setSstPartitionerFactory( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionThreadLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionThreadLimiter( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionThreadLimiter( JNIEnv*, jclass, jlong jhandle, jlong jlimiter_handle) { auto* options = reinterpret_cast(jhandle); @@ -4377,7 +4377,7 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionThreadLimiter( * Method: tableFactoryName * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_ColumnFamilyOptions_tableFactoryName(JNIEnv* env, +jstring Java_org_forstdb_ColumnFamilyOptions_tableFactoryName(JNIEnv* env, jobject, jlong jhandle) { auto* opt = @@ -4392,11 +4392,11 @@ jstring Java_org_rocksdb_ColumnFamilyOptions_tableFactoryName(JNIEnv* env, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCfPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCfPaths(JNIEnv* env, jclass, +void Java_org_forstdb_ColumnFamilyOptions_setCfPaths(JNIEnv* env, jclass, jlong jhandle, jobjectArray path_array, jlongArray size_array) { @@ -4412,11 +4412,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCfPaths(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: cfPathsLen * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_cfPathsLen(JNIEnv*, jclass, +jlong Java_org_forstdb_ColumnFamilyOptions_cfPathsLen(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); @@ -4424,11 +4424,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_cfPathsLen(JNIEnv*, jclass, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: cfPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_ColumnFamilyOptions_cfPaths(JNIEnv* env, jclass, +void Java_org_forstdb_ColumnFamilyOptions_cfPaths(JNIEnv* env, jclass, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { @@ -4438,22 +4438,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_cfPaths(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: minWriteBufferNumberToMerge * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_minWriteBufferNumberToMerge( +jint Java_org_forstdb_ColumnFamilyOptions_minWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMinWriteBufferNumberToMerge * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMinWriteBufferNumberToMerge( +void Java_org_forstdb_ColumnFamilyOptions_setMinWriteBufferNumberToMerge( JNIEnv*, jobject, jlong jhandle, jint jmin_write_buffer_number_to_merge) { reinterpret_cast(jhandle) ->min_write_buffer_number_to_merge = @@ -4461,22 +4461,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMinWriteBufferNumberToMerge( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxWriteBufferNumberToMaintain * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_maxWriteBufferNumberToMaintain( +jint Java_org_forstdb_ColumnFamilyOptions_maxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_write_buffer_number_to_maintain; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxWriteBufferNumberToMaintain * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumberToMaintain( +void Java_org_forstdb_ColumnFamilyOptions_setMaxWriteBufferNumberToMaintain( JNIEnv*, jobject, jlong jhandle, jint jmax_write_buffer_number_to_maintain) { reinterpret_cast(jhandle) @@ -4485,11 +4485,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxWriteBufferNumberToMaintain( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompressionType * Signature: (JB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompressionType( +void Java_org_forstdb_ColumnFamilyOptions_setCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* cf_opts = reinterpret_cast(jhandle); @@ -4499,11 +4499,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompressionType( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: compressionType * Signature: (J)B */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_compressionType(JNIEnv*, jobject, +jbyte Java_org_forstdb_ColumnFamilyOptions_compressionType(JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); @@ -4512,11 +4512,11 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_compressionType(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompressionPerLevel * Signature: (J[B)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompressionPerLevel( +void Java_org_forstdb_ColumnFamilyOptions_setCompressionPerLevel( JNIEnv* env, jobject, jlong jhandle, jbyteArray jcompressionLevels) { auto* options = reinterpret_cast(jhandle); @@ -4530,11 +4530,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompressionPerLevel( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: compressionPerLevel * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_ColumnFamilyOptions_compressionPerLevel( +jbyteArray Java_org_forstdb_ColumnFamilyOptions_compressionPerLevel( JNIEnv* env, jobject, jlong jhandle) { auto* cf_options = reinterpret_cast(jhandle); @@ -4543,11 +4543,11 @@ jbyteArray Java_org_rocksdb_ColumnFamilyOptions_compressionPerLevel( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBottommostCompressionType * Signature: (JB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionType( +void Java_org_forstdb_ColumnFamilyOptions_setBottommostCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jcompression_type_value) { auto* cf_options = reinterpret_cast(jhandle); @@ -4557,11 +4557,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionType( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: bottommostCompressionType * Signature: (J)B */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_bottommostCompressionType( +jbyte Java_org_forstdb_ColumnFamilyOptions_bottommostCompressionType( JNIEnv*, jobject, jlong jhandle) { auto* cf_options = reinterpret_cast(jhandle); @@ -4569,11 +4569,11 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_bottommostCompressionType( cf_options->bottommost_compression); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBottommostCompressionOptions * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionOptions( +void Java_org_forstdb_ColumnFamilyOptions_setBottommostCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jbottommost_compression_options_handle) { auto* cf_options = @@ -4585,11 +4585,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBottommostCompressionOptions( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompressionOptions * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompressionOptions( +void Java_org_forstdb_ColumnFamilyOptions_setCompressionOptions( JNIEnv*, jobject, jlong jhandle, jlong jcompression_options_handle) { auto* cf_options = reinterpret_cast(jhandle); @@ -4600,11 +4600,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompressionOptions( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionStyle * Signature: (JB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionStyle( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionStyle( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_style) { auto* cf_options = reinterpret_cast(jhandle); @@ -4614,11 +4614,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionStyle( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: compactionStyle * Signature: (J)B */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionStyle(JNIEnv*, jobject, +jbyte Java_org_forstdb_ColumnFamilyOptions_compactionStyle(JNIEnv*, jobject, jlong jhandle) { auto* cf_options = reinterpret_cast(jhandle); @@ -4627,11 +4627,11 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionStyle(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxTableFilesSizeFIFO * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO( +void Java_org_forstdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle, jlong jmax_table_files_size) { reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size = @@ -4639,33 +4639,33 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxTableFilesSizeFIFO * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxTableFilesSizeFIFO( +jlong Java_org_forstdb_ColumnFamilyOptions_maxTableFilesSizeFIFO( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->compaction_options_fifo.max_table_files_size; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: numLevels * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_numLevels(JNIEnv*, jobject, +jint Java_org_forstdb_ColumnFamilyOptions_numLevels(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->num_levels; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setNumLevels * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setNumLevels(JNIEnv*, jobject, +void Java_org_forstdb_ColumnFamilyOptions_setNumLevels(JNIEnv*, jobject, jlong jhandle, jint jnum_levels) { reinterpret_cast(jhandle) @@ -4673,22 +4673,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setNumLevels(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: levelZeroFileNumCompactionTrigger * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroFileNumCompactionTrigger( +jint Java_org_forstdb_ColumnFamilyOptions_levelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevelZeroFileNumCompactionTrigger * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroFileNumCompactionTrigger( +void Java_org_forstdb_ColumnFamilyOptions_setLevelZeroFileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = @@ -4696,22 +4696,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroFileNumCompactionTrigger( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: levelZeroSlowdownWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroSlowdownWritesTrigger( +jint Java_org_forstdb_ColumnFamilyOptions_levelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevelSlowdownWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroSlowdownWritesTrigger( +void Java_org_forstdb_ColumnFamilyOptions_setLevelZeroSlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = @@ -4719,22 +4719,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroSlowdownWritesTrigger( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: levelZeroStopWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_levelZeroStopWritesTrigger( +jint Java_org_forstdb_ColumnFamilyOptions_levelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevelStopWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroStopWritesTrigger( +void Java_org_forstdb_ColumnFamilyOptions_setLevelZeroStopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = @@ -4742,44 +4742,44 @@ void Java_org_rocksdb_ColumnFamilyOptions_setLevelZeroStopWritesTrigger( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: targetFileSizeBase * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeBase(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_targetFileSizeBase(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_base; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setTargetFileSizeBase * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeBase( +void Java_org_forstdb_ColumnFamilyOptions_setTargetFileSizeBase( JNIEnv*, jobject, jlong jhandle, jlong jtarget_file_size_base) { reinterpret_cast(jhandle) ->target_file_size_base = static_cast(jtarget_file_size_base); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: targetFileSizeMultiplier * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_targetFileSizeMultiplier( +jint Java_org_forstdb_ColumnFamilyOptions_targetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->target_file_size_multiplier; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setTargetFileSizeMultiplier * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeMultiplier( +void Java_org_forstdb_ColumnFamilyOptions_setTargetFileSizeMultiplier( JNIEnv*, jobject, jlong jhandle, jint jtarget_file_size_multiplier) { reinterpret_cast(jhandle) ->target_file_size_multiplier = @@ -4787,11 +4787,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setTargetFileSizeMultiplier( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxBytesForLevelBase * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelBase(JNIEnv*, +jlong Java_org_forstdb_ColumnFamilyOptions_maxBytesForLevelBase(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) @@ -4799,11 +4799,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelBase(JNIEnv*, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxBytesForLevelBase * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelBase( +void Java_org_forstdb_ColumnFamilyOptions_setMaxBytesForLevelBase( JNIEnv*, jobject, jlong jhandle, jlong jmax_bytes_for_level_base) { reinterpret_cast(jhandle) ->max_bytes_for_level_base = @@ -4811,44 +4811,44 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelBase( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: levelCompactionDynamicLevelBytes * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_levelCompactionDynamicLevelBytes( +jboolean Java_org_forstdb_ColumnFamilyOptions_levelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevelCompactionDynamicLevelBytes * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevelCompactionDynamicLevelBytes( +void Java_org_forstdb_ColumnFamilyOptions_setLevelCompactionDynamicLevelBytes( JNIEnv*, jobject, jlong jhandle, jboolean jenable_dynamic_level_bytes) { reinterpret_cast(jhandle) ->level_compaction_dynamic_level_bytes = (jenable_dynamic_level_bytes); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxBytesForLevelMultiplier * Signature: (J)D */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplier( +jdouble Java_org_forstdb_ColumnFamilyOptions_maxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxBytesForLevelMultiplier * Signature: (JD)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplier( +void Java_org_forstdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplier( JNIEnv*, jobject, jlong jhandle, jdouble jmax_bytes_for_level_multiplier) { reinterpret_cast(jhandle) ->max_bytes_for_level_multiplier = @@ -4856,11 +4856,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplier( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxCompactionBytes * Signature: (J)I */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxCompactionBytes(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_maxCompactionBytes(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle) @@ -4868,33 +4868,33 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_maxCompactionBytes(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxCompactionBytes * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxCompactionBytes( +void Java_org_forstdb_ColumnFamilyOptions_setMaxCompactionBytes( JNIEnv*, jobject, jlong jhandle, jlong jmax_compaction_bytes) { reinterpret_cast(jhandle) ->max_compaction_bytes = static_cast(jmax_compaction_bytes); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: arenaBlockSize * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_arenaBlockSize(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_arenaBlockSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->arena_block_size; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setArenaBlockSize * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setArenaBlockSize( +void Java_org_forstdb_ColumnFamilyOptions_setArenaBlockSize( JNIEnv* env, jobject, jlong jhandle, jlong jarena_block_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t(jarena_block_size); @@ -4907,44 +4907,44 @@ void Java_org_rocksdb_ColumnFamilyOptions_setArenaBlockSize( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: disableAutoCompactions * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_disableAutoCompactions( +jboolean Java_org_forstdb_ColumnFamilyOptions_disableAutoCompactions( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->disable_auto_compactions; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setDisableAutoCompactions * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setDisableAutoCompactions( +void Java_org_forstdb_ColumnFamilyOptions_setDisableAutoCompactions( JNIEnv*, jobject, jlong jhandle, jboolean jdisable_auto_compactions) { reinterpret_cast(jhandle) ->disable_auto_compactions = static_cast(jdisable_auto_compactions); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxSequentialSkipInIterations * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxSequentialSkipInIterations( +jlong Java_org_forstdb_ColumnFamilyOptions_maxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxSequentialSkipInIterations * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxSequentialSkipInIterations( +void Java_org_forstdb_ColumnFamilyOptions_setMaxSequentialSkipInIterations( JNIEnv*, jobject, jlong jhandle, jlong jmax_sequential_skip_in_iterations) { reinterpret_cast(jhandle) ->max_sequential_skip_in_iterations = @@ -4952,44 +4952,44 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxSequentialSkipInIterations( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: inplaceUpdateSupport * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_inplaceUpdateSupport( +jboolean Java_org_forstdb_ColumnFamilyOptions_inplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_support; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setInplaceUpdateSupport * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateSupport( +void Java_org_forstdb_ColumnFamilyOptions_setInplaceUpdateSupport( JNIEnv*, jobject, jlong jhandle, jboolean jinplace_update_support) { reinterpret_cast(jhandle) ->inplace_update_support = static_cast(jinplace_update_support); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: inplaceUpdateNumLocks * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_inplaceUpdateNumLocks( +jlong Java_org_forstdb_ColumnFamilyOptions_inplaceUpdateNumLocks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->inplace_update_num_locks; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setInplaceUpdateNumLocks * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateNumLocks( +void Java_org_forstdb_ColumnFamilyOptions_setInplaceUpdateNumLocks( JNIEnv* env, jobject, jlong jhandle, jlong jinplace_update_num_locks) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jinplace_update_num_locks); @@ -5002,22 +5002,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setInplaceUpdateNumLocks( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: memtablePrefixBloomSizeRatio * Signature: (J)I */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_memtablePrefixBloomSizeRatio( +jdouble Java_org_forstdb_ColumnFamilyOptions_memtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_prefix_bloom_size_ratio; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMemtablePrefixBloomSizeRatio * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtablePrefixBloomSizeRatio( +void Java_org_forstdb_ColumnFamilyOptions_setMemtablePrefixBloomSizeRatio( JNIEnv*, jobject, jlong jhandle, jdouble jmemtable_prefix_bloom_size_ratio) { reinterpret_cast(jhandle) @@ -5026,22 +5026,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMemtablePrefixBloomSizeRatio( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: experimentalMempurgeThreshold * Signature: (J)I */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_experimentalMempurgeThreshold( +jdouble Java_org_forstdb_ColumnFamilyOptions_experimentalMempurgeThreshold( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->experimental_mempurge_threshold; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setExperimentalMempurgeThreshold * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setExperimentalMempurgeThreshold( +void Java_org_forstdb_ColumnFamilyOptions_setExperimentalMempurgeThreshold( JNIEnv*, jobject, jlong jhandle, jdouble jexperimental_mempurge_threshold) { reinterpret_cast(jhandle) ->experimental_mempurge_threshold = @@ -5049,22 +5049,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setExperimentalMempurgeThreshold( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: memtableWholeKeyFiltering * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_memtableWholeKeyFiltering( +jboolean Java_org_forstdb_ColumnFamilyOptions_memtableWholeKeyFiltering( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_whole_key_filtering; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMemtableWholeKeyFiltering * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtableWholeKeyFiltering( +void Java_org_forstdb_ColumnFamilyOptions_setMemtableWholeKeyFiltering( JNIEnv*, jobject, jlong jhandle, jboolean jmemtable_whole_key_filtering) { reinterpret_cast(jhandle) ->memtable_whole_key_filtering = @@ -5072,44 +5072,44 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMemtableWholeKeyFiltering( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: bloomLocality * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_bloomLocality(JNIEnv*, jobject, +jint Java_org_forstdb_ColumnFamilyOptions_bloomLocality(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->bloom_locality; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBloomLocality * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBloomLocality( +void Java_org_forstdb_ColumnFamilyOptions_setBloomLocality( JNIEnv*, jobject, jlong jhandle, jint jbloom_locality) { reinterpret_cast(jhandle) ->bloom_locality = static_cast(jbloom_locality); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxSuccessiveMerges * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_maxSuccessiveMerges(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_maxSuccessiveMerges(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_successive_merges; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxSuccessiveMerges * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxSuccessiveMerges( +void Java_org_forstdb_ColumnFamilyOptions_setMaxSuccessiveMerges( JNIEnv* env, jobject, jlong jhandle, jlong jmax_successive_merges) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmax_successive_merges); @@ -5122,22 +5122,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxSuccessiveMerges( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: optimizeFiltersForHits * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_optimizeFiltersForHits( +jboolean Java_org_forstdb_ColumnFamilyOptions_optimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->optimize_filters_for_hits; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setOptimizeFiltersForHits * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setOptimizeFiltersForHits( +void Java_org_forstdb_ColumnFamilyOptions_setOptimizeFiltersForHits( JNIEnv*, jobject, jlong jhandle, jboolean joptimize_filters_for_hits) { reinterpret_cast(jhandle) ->optimize_filters_for_hits = @@ -5145,11 +5145,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setOptimizeFiltersForHits( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: memtableHugePageSize * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_memtableHugePageSize(JNIEnv*, +jlong Java_org_forstdb_ColumnFamilyOptions_memtableHugePageSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) @@ -5157,11 +5157,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_memtableHugePageSize(JNIEnv*, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMemtableHugePageSize * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtableHugePageSize( +void Java_org_forstdb_ColumnFamilyOptions_setMemtableHugePageSize( JNIEnv* env, jobject, jlong jhandle, jlong jmemtable_huge_page_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( jmemtable_huge_page_size); @@ -5174,22 +5174,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMemtableHugePageSize( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: softPendingCompactionBytesLimit * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_softPendingCompactionBytesLimit( +jlong Java_org_forstdb_ColumnFamilyOptions_softPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->soft_pending_compaction_bytes_limit; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setSoftPendingCompactionBytesLimit * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setSoftPendingCompactionBytesLimit( +void Java_org_forstdb_ColumnFamilyOptions_setSoftPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jsoft_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) @@ -5198,22 +5198,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setSoftPendingCompactionBytesLimit( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: softHardCompactionBytesLimit * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_hardPendingCompactionBytesLimit( +jlong Java_org_forstdb_ColumnFamilyOptions_hardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->hard_pending_compaction_bytes_limit; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setHardPendingCompactionBytesLimit * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setHardPendingCompactionBytesLimit( +void Java_org_forstdb_ColumnFamilyOptions_setHardPendingCompactionBytesLimit( JNIEnv*, jobject, jlong jhandle, jlong jhard_pending_compaction_bytes_limit) { reinterpret_cast(jhandle) @@ -5222,22 +5222,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setHardPendingCompactionBytesLimit( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: level0FileNumCompactionTrigger * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_level0FileNumCompactionTrigger( +jint Java_org_forstdb_ColumnFamilyOptions_level0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevel0FileNumCompactionTrigger * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevel0FileNumCompactionTrigger( +void Java_org_forstdb_ColumnFamilyOptions_setLevel0FileNumCompactionTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_file_num_compaction_trigger) { reinterpret_cast(jhandle) ->level0_file_num_compaction_trigger = @@ -5245,22 +5245,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setLevel0FileNumCompactionTrigger( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: level0SlowdownWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_level0SlowdownWritesTrigger( +jint Java_org_forstdb_ColumnFamilyOptions_level0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevel0SlowdownWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevel0SlowdownWritesTrigger( +void Java_org_forstdb_ColumnFamilyOptions_setLevel0SlowdownWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_slowdown_writes_trigger) { reinterpret_cast(jhandle) ->level0_slowdown_writes_trigger = @@ -5268,22 +5268,22 @@ void Java_org_rocksdb_ColumnFamilyOptions_setLevel0SlowdownWritesTrigger( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: level0StopWritesTrigger * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_level0StopWritesTrigger( +jint Java_org_forstdb_ColumnFamilyOptions_level0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->level0_stop_writes_trigger; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setLevel0StopWritesTrigger * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setLevel0StopWritesTrigger( +void Java_org_forstdb_ColumnFamilyOptions_setLevel0StopWritesTrigger( JNIEnv*, jobject, jlong jhandle, jint jlevel0_stop_writes_trigger) { reinterpret_cast(jhandle) ->level0_stop_writes_trigger = @@ -5291,12 +5291,12 @@ void Java_org_rocksdb_ColumnFamilyOptions_setLevel0StopWritesTrigger( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: maxBytesForLevelMultiplierAdditional * Signature: (J)[I */ jintArray -Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplierAdditional( +Java_org_forstdb_ColumnFamilyOptions_maxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle) { auto mbflma = reinterpret_cast(jhandle) @@ -5330,11 +5330,11 @@ Java_org_rocksdb_ColumnFamilyOptions_maxBytesForLevelMultiplierAdditional( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMaxBytesForLevelMultiplierAdditional * Signature: (J[I)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplierAdditional( +void Java_org_forstdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplierAdditional( JNIEnv* env, jobject, jlong jhandle, jintArray jmax_bytes_for_level_multiplier_additional) { jsize len = env->GetArrayLength(jmax_bytes_for_level_multiplier_additional); @@ -5358,33 +5358,33 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplierAdditiona } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: paranoidFileChecks * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_paranoidFileChecks( +jboolean Java_org_forstdb_ColumnFamilyOptions_paranoidFileChecks( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_file_checks; } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setParanoidFileChecks * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setParanoidFileChecks( +void Java_org_forstdb_ColumnFamilyOptions_setParanoidFileChecks( JNIEnv*, jobject, jlong jhandle, jboolean jparanoid_file_checks) { reinterpret_cast(jhandle) ->paranoid_file_checks = static_cast(jparanoid_file_checks); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionPriority * Signature: (JB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionPriority( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionPriority( JNIEnv*, jobject, jlong jhandle, jbyte jcompaction_priority_value) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5394,11 +5394,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionPriority( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: compactionPriority * Signature: (J)B */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionPriority(JNIEnv*, jobject, +jbyte Java_org_forstdb_ColumnFamilyOptions_compactionPriority(JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5407,11 +5407,11 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_compactionPriority(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setReportBgIoStats * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setReportBgIoStats( +void Java_org_forstdb_ColumnFamilyOptions_setReportBgIoStats( JNIEnv*, jobject, jlong jhandle, jboolean jreport_bg_io_stats) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5419,11 +5419,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setReportBgIoStats( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: reportBgIoStats * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_reportBgIoStats(JNIEnv*, jobject, +jboolean Java_org_forstdb_ColumnFamilyOptions_reportBgIoStats(JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5431,11 +5431,11 @@ jboolean Java_org_rocksdb_ColumnFamilyOptions_reportBgIoStats(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setTtl * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setTtl(JNIEnv*, jobject, +void Java_org_forstdb_ColumnFamilyOptions_setTtl(JNIEnv*, jobject, jlong jhandle, jlong jttl) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5443,23 +5443,23 @@ void Java_org_rocksdb_ColumnFamilyOptions_setTtl(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: ttl * Signature: (J)J */ JNIEXPORT jlong JNICALL -Java_org_rocksdb_ColumnFamilyOptions_ttl(JNIEnv*, jobject, jlong jhandle) { +Java_org_forstdb_ColumnFamilyOptions_ttl(JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); return static_cast(cf_opts->ttl); } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setPeriodicCompactionSeconds * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setPeriodicCompactionSeconds( +void Java_org_forstdb_ColumnFamilyOptions_setPeriodicCompactionSeconds( JNIEnv*, jobject, jlong jhandle, jlong jperiodicCompactionSeconds) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5468,12 +5468,12 @@ void Java_org_rocksdb_ColumnFamilyOptions_setPeriodicCompactionSeconds( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: periodicCompactionSeconds * Signature: (J)J */ JNIEXPORT jlong JNICALL -Java_org_rocksdb_ColumnFamilyOptions_periodicCompactionSeconds(JNIEnv*, jobject, +Java_org_forstdb_ColumnFamilyOptions_periodicCompactionSeconds(JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5481,11 +5481,11 @@ Java_org_rocksdb_ColumnFamilyOptions_periodicCompactionSeconds(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionOptionsUniversal * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsUniversal( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionOptionsUniversal( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_universal_handle) { auto* cf_opts = @@ -5497,11 +5497,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsUniversal( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setCompactionOptionsFIFO * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsFIFO( +void Java_org_forstdb_ColumnFamilyOptions_setCompactionOptionsFIFO( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_options_fifo_handle) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5511,11 +5511,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setCompactionOptionsFIFO( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setForceConsistencyChecks * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setForceConsistencyChecks( +void Java_org_forstdb_ColumnFamilyOptions_setForceConsistencyChecks( JNIEnv*, jobject, jlong jhandle, jboolean jforce_consistency_checks) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5524,11 +5524,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setForceConsistencyChecks( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: forceConsistencyChecks * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_forceConsistencyChecks( +jboolean Java_org_forstdb_ColumnFamilyOptions_forceConsistencyChecks( JNIEnv*, jobject, jlong jhandle) { auto* cf_opts = reinterpret_cast(jhandle); @@ -5538,11 +5538,11 @@ jboolean Java_org_rocksdb_ColumnFamilyOptions_forceConsistencyChecks( /// BLOB options /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setEnableBlobFiles * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setEnableBlobFiles( +void Java_org_forstdb_ColumnFamilyOptions_setEnableBlobFiles( JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_files) { auto* opts = reinterpret_cast(jhandle); @@ -5550,11 +5550,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setEnableBlobFiles( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: enableBlobFiles * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_enableBlobFiles(JNIEnv*, jobject, +jboolean Java_org_forstdb_ColumnFamilyOptions_enableBlobFiles(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5562,11 +5562,11 @@ jboolean Java_org_rocksdb_ColumnFamilyOptions_enableBlobFiles(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMinBlobSize * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMinBlobSize(JNIEnv*, jobject, +void Java_org_forstdb_ColumnFamilyOptions_setMinBlobSize(JNIEnv*, jobject, jlong jhandle, jlong jmin_blob_size) { auto* opts = @@ -5575,11 +5575,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMinBlobSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: minBlobSize * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_minBlobSize(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_minBlobSize(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5587,11 +5587,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_minBlobSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBlobFileSize * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobFileSize( +void Java_org_forstdb_ColumnFamilyOptions_setBlobFileSize( JNIEnv*, jobject, jlong jhandle, jlong jblob_file_size) { auto* opts = reinterpret_cast(jhandle); @@ -5599,11 +5599,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBlobFileSize( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: blobFileSize * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_blobFileSize(JNIEnv*, jobject, +jlong Java_org_forstdb_ColumnFamilyOptions_blobFileSize(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5611,11 +5611,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_blobFileSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBlobCompressionType * Signature: (JB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobCompressionType( +void Java_org_forstdb_ColumnFamilyOptions_setBlobCompressionType( JNIEnv*, jobject, jlong jhandle, jbyte jblob_compression_type_value) { auto* opts = reinterpret_cast(jhandle); @@ -5625,11 +5625,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBlobCompressionType( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: blobCompressionType * Signature: (J)B */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_blobCompressionType(JNIEnv*, jobject, +jbyte Java_org_forstdb_ColumnFamilyOptions_blobCompressionType(JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5638,11 +5638,11 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_blobCompressionType(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setEnableBlobGarbageCollection * Signature: (JZ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setEnableBlobGarbageCollection( +void Java_org_forstdb_ColumnFamilyOptions_setEnableBlobGarbageCollection( JNIEnv*, jobject, jlong jhandle, jboolean jenable_blob_garbage_collection) { auto* opts = reinterpret_cast(jhandle); @@ -5651,11 +5651,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setEnableBlobGarbageCollection( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: enableBlobGarbageCollection * Signature: (J)Z */ -jboolean Java_org_rocksdb_ColumnFamilyOptions_enableBlobGarbageCollection( +jboolean Java_org_forstdb_ColumnFamilyOptions_enableBlobGarbageCollection( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5663,11 +5663,11 @@ jboolean Java_org_rocksdb_ColumnFamilyOptions_enableBlobGarbageCollection( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBlobGarbageCollectionAgeCutoff * Signature: (JD)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobGarbageCollectionAgeCutoff( +void Java_org_forstdb_ColumnFamilyOptions_setBlobGarbageCollectionAgeCutoff( JNIEnv*, jobject, jlong jhandle, jdouble jblob_garbage_collection_age_cutoff) { auto* opts = @@ -5677,11 +5677,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBlobGarbageCollectionAgeCutoff( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: blobGarbageCollectionAgeCutoff * Signature: (J)D */ -jdouble Java_org_rocksdb_ColumnFamilyOptions_blobGarbageCollectionAgeCutoff( +jdouble Java_org_forstdb_ColumnFamilyOptions_blobGarbageCollectionAgeCutoff( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5689,11 +5689,11 @@ jdouble Java_org_rocksdb_ColumnFamilyOptions_blobGarbageCollectionAgeCutoff( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBlobGarbageCollectionForceThreshold * Signature: (JD)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobGarbageCollectionForceThreshold( +void Java_org_forstdb_ColumnFamilyOptions_setBlobGarbageCollectionForceThreshold( JNIEnv*, jobject, jlong jhandle, jdouble jblob_garbage_collection_force_threshold) { auto* opts = @@ -5703,12 +5703,12 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBlobGarbageCollectionForceThreshold } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: blobGarbageCollectionForceThreshold * Signature: (J)D */ jdouble -Java_org_rocksdb_ColumnFamilyOptions_blobGarbageCollectionForceThreshold( +Java_org_forstdb_ColumnFamilyOptions_blobGarbageCollectionForceThreshold( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5716,11 +5716,11 @@ Java_org_rocksdb_ColumnFamilyOptions_blobGarbageCollectionForceThreshold( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBlobCompactionReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobCompactionReadaheadSize( +void Java_org_forstdb_ColumnFamilyOptions_setBlobCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jblob_compaction_readahead_size) { auto* opts = reinterpret_cast(jhandle); @@ -5729,11 +5729,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBlobCompactionReadaheadSize( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: blobCompactionReadaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_ColumnFamilyOptions_blobCompactionReadaheadSize( +jlong Java_org_forstdb_ColumnFamilyOptions_blobCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5741,11 +5741,11 @@ jlong Java_org_rocksdb_ColumnFamilyOptions_blobCompactionReadaheadSize( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setBlobFileStartingLevel * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setBlobFileStartingLevel( +void Java_org_forstdb_ColumnFamilyOptions_setBlobFileStartingLevel( JNIEnv*, jobject, jlong jhandle, jint jblob_file_starting_level) { auto* opts = reinterpret_cast(jhandle); @@ -5753,11 +5753,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setBlobFileStartingLevel( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: blobFileStartingLevel * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_blobFileStartingLevel(JNIEnv*, +jint Java_org_forstdb_ColumnFamilyOptions_blobFileStartingLevel(JNIEnv*, jobject, jlong jhandle) { auto* opts = @@ -5766,11 +5766,11 @@ jint Java_org_rocksdb_ColumnFamilyOptions_blobFileStartingLevel(JNIEnv*, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setPrepopulateBlobCache * Signature: (JB)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setPrepopulateBlobCache( +void Java_org_forstdb_ColumnFamilyOptions_setPrepopulateBlobCache( JNIEnv*, jobject, jlong jhandle, jbyte jprepopulate_blob_cache_value) { auto* opts = reinterpret_cast(jhandle); @@ -5780,11 +5780,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setPrepopulateBlobCache( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: prepopulateBlobCache * Signature: (J)B */ -jbyte Java_org_rocksdb_ColumnFamilyOptions_prepopulateBlobCache(JNIEnv*, +jbyte Java_org_forstdb_ColumnFamilyOptions_prepopulateBlobCache(JNIEnv*, jobject, jlong jhandle) { auto* opts = @@ -5794,11 +5794,11 @@ jbyte Java_org_rocksdb_ColumnFamilyOptions_prepopulateBlobCache(JNIEnv*, } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: setMemtableMaxRangeDeletions * Signature: (JI)V */ -void Java_org_rocksdb_ColumnFamilyOptions_setMemtableMaxRangeDeletions( +void Java_org_forstdb_ColumnFamilyOptions_setMemtableMaxRangeDeletions( JNIEnv*, jobject, jlong jhandle, jint jmemtable_max_range_deletions) { auto* opts = reinterpret_cast(jhandle); @@ -5806,11 +5806,11 @@ void Java_org_rocksdb_ColumnFamilyOptions_setMemtableMaxRangeDeletions( } /* - * Class: org_rocksdb_ColumnFamilyOptions + * Class: org_forstdb_ColumnFamilyOptions * Method: memtableMaxRangeDeletions * Signature: (J)I */ -jint Java_org_rocksdb_ColumnFamilyOptions_memtableMaxRangeDeletions( +jint Java_org_forstdb_ColumnFamilyOptions_memtableMaxRangeDeletions( JNIEnv*, jobject, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -5821,32 +5821,32 @@ jint Java_org_rocksdb_ColumnFamilyOptions_memtableMaxRangeDeletions( // ROCKSDB_NAMESPACE::DBOptions /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: newDBOptions * Signature: ()J */ -jlong Java_org_rocksdb_DBOptions_newDBOptions(JNIEnv*, jclass) { +jlong Java_org_forstdb_DBOptions_newDBOptions(JNIEnv*, jclass) { auto* dbop = new ROCKSDB_NAMESPACE::DBOptions(); return GET_CPLUSPLUS_POINTER(dbop); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: copyDBOptions * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_copyDBOptions(JNIEnv*, jclass, jlong jhandle) { +jlong Java_org_forstdb_DBOptions_copyDBOptions(JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::DBOptions( *(reinterpret_cast(jhandle))); return GET_CPLUSPLUS_POINTER(new_opt); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: newDBOptionsFromOptions * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_newDBOptionsFromOptions( +jlong Java_org_forstdb_DBOptions_newDBOptionsFromOptions( JNIEnv*, jclass, jlong joptions_handle) { auto new_opt = new ROCKSDB_NAMESPACE::DBOptions( *reinterpret_cast(joptions_handle)); @@ -5854,11 +5854,11 @@ jlong Java_org_rocksdb_DBOptions_newDBOptionsFromOptions( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: getDBOptionsFromProps * Signature: (JLjava/lang/String;)J */ -jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__JLjava_lang_String_2( +jlong Java_org_forstdb_DBOptions_getDBOptionsFromProps__JLjava_lang_String_2( JNIEnv* env, jclass, jlong config_handle, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { @@ -5887,11 +5887,11 @@ jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__JLjava_lang_String_2( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: getDBOptionsFromProps * Signature: (Ljava/util/String;)J */ -jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__Ljava_lang_String_2( +jlong Java_org_forstdb_DBOptions_getDBOptionsFromProps__Ljava_lang_String_2( JNIEnv* env, jclass, jstring jopt_string) { const char* opt_string = env->GetStringUTFChars(jopt_string, nullptr); if (opt_string == nullptr) { @@ -5922,11 +5922,11 @@ jlong Java_org_rocksdb_DBOptions_getDBOptionsFromProps__Ljava_lang_String_2( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_DBOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_disposeInternal(JNIEnv*, jobject, jlong handle) { auto* dbo = reinterpret_cast(handle); assert(dbo != nullptr); @@ -5934,33 +5934,33 @@ void Java_org_rocksdb_DBOptions_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: optimizeForSmallDb * Signature: (J)V */ -void Java_org_rocksdb_DBOptions_optimizeForSmallDb(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_optimizeForSmallDb(JNIEnv*, jobject, jlong jhandle) { reinterpret_cast(jhandle) ->OptimizeForSmallDb(); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setEnv * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setEnv(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setEnv(JNIEnv*, jobject, jlong jhandle, jlong jenv_handle) { reinterpret_cast(jhandle)->env = reinterpret_cast(jenv_handle); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setIncreaseParallelism * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setIncreaseParallelism(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setIncreaseParallelism(JNIEnv*, jobject, jlong jhandle, jint totalThreads) { reinterpret_cast(jhandle)->IncreaseParallelism( @@ -5968,11 +5968,11 @@ void Java_org_rocksdb_DBOptions_setIncreaseParallelism(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setCreateIfMissing * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setCreateIfMissing(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setCreateIfMissing(JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle)->create_if_missing = @@ -5980,22 +5980,22 @@ void Java_org_rocksdb_DBOptions_setCreateIfMissing(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: createIfMissing * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_createIfMissing(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_createIfMissing(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->create_if_missing; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setCreateMissingColumnFamilies * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setCreateMissingColumnFamilies(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setCreateMissingColumnFamilies(JNIEnv*, jobject, jlong jhandle, jboolean flag) { reinterpret_cast(jhandle) @@ -6003,11 +6003,11 @@ void Java_org_rocksdb_DBOptions_setCreateMissingColumnFamilies(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: createMissingColumnFamilies * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_createMissingColumnFamilies(JNIEnv*, +jboolean Java_org_forstdb_DBOptions_createMissingColumnFamilies(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) @@ -6015,11 +6015,11 @@ jboolean Java_org_rocksdb_DBOptions_createMissingColumnFamilies(JNIEnv*, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setErrorIfExists * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setErrorIfExists(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setErrorIfExists(JNIEnv*, jobject, jlong jhandle, jboolean error_if_exists) { reinterpret_cast(jhandle)->error_if_exists = @@ -6027,22 +6027,22 @@ void Java_org_rocksdb_DBOptions_setErrorIfExists(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: errorIfExists * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_errorIfExists(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_errorIfExists(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->error_if_exists; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setParanoidChecks * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setParanoidChecks(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setParanoidChecks(JNIEnv*, jobject, jlong jhandle, jboolean paranoid_checks) { reinterpret_cast(jhandle)->paranoid_checks = @@ -6050,22 +6050,22 @@ void Java_org_rocksdb_DBOptions_setParanoidChecks(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: paranoidChecks * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_paranoidChecks(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_paranoidChecks(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->paranoid_checks; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setRateLimiter * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setRateLimiter(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setRateLimiter(JNIEnv*, jobject, jlong jhandle, jlong jrate_limiter_handle) { std::shared_ptr* pRateLimiter = reinterpret_cast*>( @@ -6075,11 +6075,11 @@ void Java_org_rocksdb_DBOptions_setRateLimiter(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setSstFileManager * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setSstFileManager( +void Java_org_forstdb_DBOptions_setSstFileManager( JNIEnv*, jobject, jlong jhandle, jlong jsst_file_manager_handle) { auto* sptr_sst_file_manager = reinterpret_cast*>( @@ -6089,11 +6089,11 @@ void Java_org_rocksdb_DBOptions_setSstFileManager( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setLogger * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setLogger(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setLogger(JNIEnv*, jobject, jlong jhandle, jlong jlogger_handle) { std::shared_ptr* pLogger = reinterpret_cast*>( @@ -6102,32 +6102,32 @@ void Java_org_rocksdb_DBOptions_setLogger(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setInfoLogLevel * Signature: (JB)V */ -void Java_org_rocksdb_DBOptions_setInfoLogLevel(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setInfoLogLevel(JNIEnv*, jobject, jlong jhandle, jbyte jlog_level) { reinterpret_cast(jhandle)->info_log_level = static_cast(jlog_level); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: infoLogLevel * Signature: (J)B */ -jbyte Java_org_rocksdb_DBOptions_infoLogLevel(JNIEnv*, jobject, jlong jhandle) { +jbyte Java_org_forstdb_DBOptions_infoLogLevel(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle)->info_log_level); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxTotalWalSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setMaxTotalWalSize(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setMaxTotalWalSize(JNIEnv*, jobject, jlong jhandle, jlong jmax_total_wal_size) { reinterpret_cast(jhandle)->max_total_wal_size = @@ -6135,65 +6135,65 @@ void Java_org_rocksdb_DBOptions_setMaxTotalWalSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxTotalWalSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_maxTotalWalSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_maxTotalWalSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_total_wal_size; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxOpenFiles * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxOpenFiles(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setMaxOpenFiles(JNIEnv*, jobject, jlong jhandle, jint max_open_files) { reinterpret_cast(jhandle)->max_open_files = static_cast(max_open_files); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxOpenFiles * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxOpenFiles(JNIEnv*, jobject, jlong jhandle) { +jint Java_org_forstdb_DBOptions_maxOpenFiles(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_open_files; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxFileOpeningThreads * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxFileOpeningThreads( +void Java_org_forstdb_DBOptions_setMaxFileOpeningThreads( JNIEnv*, jobject, jlong jhandle, jint jmax_file_opening_threads) { reinterpret_cast(jhandle) ->max_file_opening_threads = static_cast(jmax_file_opening_threads); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxFileOpeningThreads * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxFileOpeningThreads(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_maxFileOpeningThreads(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_file_opening_threads); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setStatistics * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setStatistics(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setStatistics(JNIEnv*, jobject, jlong jhandle, jlong jstatistics_handle) { auto* opt = reinterpret_cast(jhandle); auto* pSptr = @@ -6203,11 +6203,11 @@ void Java_org_rocksdb_DBOptions_setStatistics(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: statistics * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_statistics(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_DBOptions_statistics(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); std::shared_ptr sptr = opt->statistics; if (sptr == nullptr) { @@ -6220,31 +6220,31 @@ jlong Java_org_rocksdb_DBOptions_statistics(JNIEnv*, jobject, jlong jhandle) { } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setUseFsync * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setUseFsync(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setUseFsync(JNIEnv*, jobject, jlong jhandle, jboolean use_fsync) { reinterpret_cast(jhandle)->use_fsync = static_cast(use_fsync); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: useFsync * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_useFsync(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_DBOptions_useFsync(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->use_fsync; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setDbPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_DBOptions_setDbPaths(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setDbPaths(JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { std::vector db_paths; @@ -6286,21 +6286,21 @@ void Java_org_rocksdb_DBOptions_setDbPaths(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: dbPathsLen * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_dbPathsLen(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_DBOptions_dbPathsLen(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_paths.size()); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: dbPaths * Signature: (J[Ljava/lang/String;[J)V */ -void Java_org_rocksdb_DBOptions_dbPaths(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_dbPaths(JNIEnv* env, jobject, jlong jhandle, jobjectArray jpaths, jlongArray jtarget_sizes) { jboolean is_copy; @@ -6337,11 +6337,11 @@ void Java_org_rocksdb_DBOptions_dbPaths(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setDbLogDir * Signature: (JLjava/lang/String)V */ -void Java_org_rocksdb_DBOptions_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, jstring jdb_log_dir) { const char* log_dir = env->GetStringUTFChars(jdb_log_dir, nullptr); if (log_dir == nullptr) { @@ -6355,11 +6355,11 @@ void Java_org_rocksdb_DBOptions_setDbLogDir(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: dbLogDir * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_DBOptions_dbLogDir(JNIEnv* env, jobject, +jstring Java_org_forstdb_DBOptions_dbLogDir(JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle) @@ -6367,11 +6367,11 @@ jstring Java_org_rocksdb_DBOptions_dbLogDir(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWalDir * Signature: (JLjava/lang/String)V */ -void Java_org_rocksdb_DBOptions_setWalDir(JNIEnv* env, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setWalDir(JNIEnv* env, jobject, jlong jhandle, jstring jwal_dir) { const char* wal_dir = env->GetStringUTFChars(jwal_dir, 0); reinterpret_cast(jhandle)->wal_dir.assign( @@ -6380,44 +6380,44 @@ void Java_org_rocksdb_DBOptions_setWalDir(JNIEnv* env, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: walDir * Signature: (J)Ljava/lang/String */ -jstring Java_org_rocksdb_DBOptions_walDir(JNIEnv* env, jobject, jlong jhandle) { +jstring Java_org_forstdb_DBOptions_walDir(JNIEnv* env, jobject, jlong jhandle) { return env->NewStringUTF( reinterpret_cast(jhandle) ->wal_dir.c_str()); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setDeleteObsoleteFilesPeriodMicros * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setDeleteObsoleteFilesPeriodMicros( +void Java_org_forstdb_DBOptions_setDeleteObsoleteFilesPeriodMicros( JNIEnv*, jobject, jlong jhandle, jlong micros) { reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros = static_cast(micros); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: deleteObsoleteFilesPeriodMicros * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_deleteObsoleteFilesPeriodMicros( +jlong Java_org_forstdb_DBOptions_deleteObsoleteFilesPeriodMicros( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->delete_obsolete_files_period_micros; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxBackgroundCompactions * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxBackgroundCompactions(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setMaxBackgroundCompactions(JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle) @@ -6425,66 +6425,66 @@ void Java_org_rocksdb_DBOptions_setMaxBackgroundCompactions(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxBackgroundCompactions * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxBackgroundCompactions(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_maxBackgroundCompactions(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_compactions; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxSubcompactions * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxSubcompactions(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setMaxSubcompactions(JNIEnv*, jobject, jlong jhandle, jint max) { reinterpret_cast(jhandle)->max_subcompactions = static_cast(max); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxSubcompactions * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxSubcompactions(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_maxSubcompactions(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_subcompactions; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxBackgroundFlushes * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxBackgroundFlushes( +void Java_org_forstdb_DBOptions_setMaxBackgroundFlushes( JNIEnv*, jobject, jlong jhandle, jint max_background_flushes) { reinterpret_cast(jhandle) ->max_background_flushes = static_cast(max_background_flushes); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxBackgroundFlushes * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxBackgroundFlushes(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_maxBackgroundFlushes(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_flushes; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxBackgroundJobs * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxBackgroundJobs(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setMaxBackgroundJobs(JNIEnv*, jobject, jlong jhandle, jint max_background_jobs) { reinterpret_cast(jhandle) @@ -6492,22 +6492,22 @@ void Java_org_rocksdb_DBOptions_setMaxBackgroundJobs(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxBackgroundJobs * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxBackgroundJobs(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_maxBackgroundJobs(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_background_jobs; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxLogFileSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setMaxLogFileSize(JNIEnv* env, jobject, +void Java_org_forstdb_DBOptions_setMaxLogFileSize(JNIEnv* env, jobject, jlong jhandle, jlong max_log_file_size) { auto s = @@ -6521,22 +6521,22 @@ void Java_org_rocksdb_DBOptions_setMaxLogFileSize(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxLogFileSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_maxLogFileSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_maxLogFileSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_log_file_size; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setLogFileTimeToRoll * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setLogFileTimeToRoll( +void Java_org_forstdb_DBOptions_setLogFileTimeToRoll( JNIEnv* env, jobject, jlong jhandle, jlong log_file_time_to_roll) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( log_file_time_to_roll); @@ -6549,22 +6549,22 @@ void Java_org_rocksdb_DBOptions_setLogFileTimeToRoll( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: logFileTimeToRoll * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_logFileTimeToRoll(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_logFileTimeToRoll(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->log_file_time_to_roll; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setKeepLogFileNum * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setKeepLogFileNum(JNIEnv* env, jobject, +void Java_org_forstdb_DBOptions_setKeepLogFileNum(JNIEnv* env, jobject, jlong jhandle, jlong keep_log_file_num) { auto s = @@ -6578,22 +6578,22 @@ void Java_org_rocksdb_DBOptions_setKeepLogFileNum(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: keepLogFileNum * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_keepLogFileNum(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_keepLogFileNum(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->keep_log_file_num; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setRecycleLogFileNum * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setRecycleLogFileNum( +void Java_org_forstdb_DBOptions_setRecycleLogFileNum( JNIEnv* env, jobject, jlong jhandle, jlong recycle_log_file_num) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( recycle_log_file_num); @@ -6606,66 +6606,66 @@ void Java_org_rocksdb_DBOptions_setRecycleLogFileNum( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: recycleLogFileNum * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_recycleLogFileNum(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_recycleLogFileNum(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->recycle_log_file_num; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxManifestFileSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setMaxManifestFileSize( +void Java_org_forstdb_DBOptions_setMaxManifestFileSize( JNIEnv*, jobject, jlong jhandle, jlong max_manifest_file_size) { reinterpret_cast(jhandle) ->max_manifest_file_size = static_cast(max_manifest_file_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxManifestFileSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_maxManifestFileSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_maxManifestFileSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->max_manifest_file_size; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setTableCacheNumshardbits * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setTableCacheNumshardbits( +void Java_org_forstdb_DBOptions_setTableCacheNumshardbits( JNIEnv*, jobject, jlong jhandle, jint table_cache_numshardbits) { reinterpret_cast(jhandle) ->table_cache_numshardbits = static_cast(table_cache_numshardbits); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: tableCacheNumshardbits * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_tableCacheNumshardbits(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_tableCacheNumshardbits(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->table_cache_numshardbits; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWalTtlSeconds * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWalTtlSeconds(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setWalTtlSeconds(JNIEnv*, jobject, jlong jhandle, jlong WAL_ttl_seconds) { reinterpret_cast(jhandle)->WAL_ttl_seconds = @@ -6673,22 +6673,22 @@ void Java_org_rocksdb_DBOptions_setWalTtlSeconds(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: walTtlSeconds * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_walTtlSeconds(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_walTtlSeconds(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_ttl_seconds; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWalSizeLimitMB * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWalSizeLimitMB(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setWalSizeLimitMB(JNIEnv*, jobject, jlong jhandle, jlong WAL_size_limit_MB) { reinterpret_cast(jhandle)->WAL_size_limit_MB = @@ -6696,22 +6696,22 @@ void Java_org_rocksdb_DBOptions_setWalSizeLimitMB(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: walTtlSeconds * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_walSizeLimitMB(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_walSizeLimitMB(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->WAL_size_limit_MB; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxWriteBatchGroupSizeBytes * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setMaxWriteBatchGroupSizeBytes( +void Java_org_forstdb_DBOptions_setMaxWriteBatchGroupSizeBytes( JNIEnv*, jclass, jlong jhandle, jlong jmax_write_batch_group_size_bytes) { auto* opt = reinterpret_cast(jhandle); opt->max_write_batch_group_size_bytes = @@ -6719,22 +6719,22 @@ void Java_org_rocksdb_DBOptions_setMaxWriteBatchGroupSizeBytes( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxWriteBatchGroupSizeBytes * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_maxWriteBatchGroupSizeBytes(JNIEnv*, jclass, +jlong Java_org_forstdb_DBOptions_maxWriteBatchGroupSizeBytes(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_write_batch_group_size_bytes); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setManifestPreallocationSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setManifestPreallocationSize( +void Java_org_forstdb_DBOptions_setManifestPreallocationSize( JNIEnv* env, jobject, jlong jhandle, jlong preallocation_size) { auto s = ROCKSDB_NAMESPACE::JniUtil::check_if_jlong_fits_size_t( preallocation_size); @@ -6747,33 +6747,33 @@ void Java_org_rocksdb_DBOptions_setManifestPreallocationSize( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: manifestPreallocationSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_manifestPreallocationSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_manifestPreallocationSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->manifest_preallocation_size; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: useDirectReads * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_useDirectReads(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_useDirectReads(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_reads; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setUseDirectReads * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setUseDirectReads(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setUseDirectReads(JNIEnv*, jobject, jlong jhandle, jboolean use_direct_reads) { reinterpret_cast(jhandle)->use_direct_reads = @@ -6781,22 +6781,22 @@ void Java_org_rocksdb_DBOptions_setUseDirectReads(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: useDirectIoForFlushAndCompaction * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_useDirectIoForFlushAndCompaction( +jboolean Java_org_forstdb_DBOptions_useDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_direct_io_for_flush_and_compaction; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setUseDirectReads * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setUseDirectIoForFlushAndCompaction( +void Java_org_forstdb_DBOptions_setUseDirectIoForFlushAndCompaction( JNIEnv*, jobject, jlong jhandle, jboolean use_direct_io_for_flush_and_compaction) { reinterpret_cast(jhandle) @@ -6805,11 +6805,11 @@ void Java_org_rocksdb_DBOptions_setUseDirectIoForFlushAndCompaction( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAllowFAllocate * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAllowFAllocate(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setAllowFAllocate(JNIEnv*, jobject, jlong jhandle, jboolean jallow_fallocate) { reinterpret_cast(jhandle)->allow_fallocate = @@ -6817,22 +6817,22 @@ void Java_org_rocksdb_DBOptions_setAllowFAllocate(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: allowFAllocate * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_allowFAllocate(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_allowFAllocate(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_fallocate); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAllowMmapReads * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAllowMmapReads(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setAllowMmapReads(JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_reads) { reinterpret_cast(jhandle)->allow_mmap_reads = @@ -6840,22 +6840,22 @@ void Java_org_rocksdb_DBOptions_setAllowMmapReads(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: allowMmapReads * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_allowMmapReads(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_allowMmapReads(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_reads; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAllowMmapWrites * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAllowMmapWrites(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setAllowMmapWrites(JNIEnv*, jobject, jlong jhandle, jboolean allow_mmap_writes) { reinterpret_cast(jhandle)->allow_mmap_writes = @@ -6863,44 +6863,44 @@ void Java_org_rocksdb_DBOptions_setAllowMmapWrites(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: allowMmapWrites * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_allowMmapWrites(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_allowMmapWrites(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_mmap_writes; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setIsFdCloseOnExec * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setIsFdCloseOnExec( +void Java_org_forstdb_DBOptions_setIsFdCloseOnExec( JNIEnv*, jobject, jlong jhandle, jboolean is_fd_close_on_exec) { reinterpret_cast(jhandle) ->is_fd_close_on_exec = static_cast(is_fd_close_on_exec); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: isFdCloseOnExec * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_isFdCloseOnExec(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_isFdCloseOnExec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->is_fd_close_on_exec; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setStatsDumpPeriodSec * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setStatsDumpPeriodSec( +void Java_org_forstdb_DBOptions_setStatsDumpPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_dump_period_sec) { reinterpret_cast(jhandle) ->stats_dump_period_sec = @@ -6908,22 +6908,22 @@ void Java_org_rocksdb_DBOptions_setStatsDumpPeriodSec( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: statsDumpPeriodSec * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_statsDumpPeriodSec(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_statsDumpPeriodSec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_dump_period_sec; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setStatsPersistPeriodSec * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setStatsPersistPeriodSec( +void Java_org_forstdb_DBOptions_setStatsPersistPeriodSec( JNIEnv*, jobject, jlong jhandle, jint jstats_persist_period_sec) { reinterpret_cast(jhandle) ->stats_persist_period_sec = @@ -6931,22 +6931,22 @@ void Java_org_rocksdb_DBOptions_setStatsPersistPeriodSec( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: statsPersistPeriodSec * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_statsPersistPeriodSec(JNIEnv*, jobject, +jint Java_org_forstdb_DBOptions_statsPersistPeriodSec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_persist_period_sec; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setStatsHistoryBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setStatsHistoryBufferSize( +void Java_org_forstdb_DBOptions_setStatsHistoryBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jstats_history_buffer_size) { reinterpret_cast(jhandle) ->stats_history_buffer_size = @@ -6954,55 +6954,55 @@ void Java_org_rocksdb_DBOptions_setStatsHistoryBufferSize( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: statsHistoryBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_statsHistoryBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_statsHistoryBufferSize(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->stats_history_buffer_size; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAdviseRandomOnOpen * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAdviseRandomOnOpen( +void Java_org_forstdb_DBOptions_setAdviseRandomOnOpen( JNIEnv*, jobject, jlong jhandle, jboolean advise_random_on_open) { reinterpret_cast(jhandle) ->advise_random_on_open = static_cast(advise_random_on_open); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: adviseRandomOnOpen * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_adviseRandomOnOpen(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_adviseRandomOnOpen(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->advise_random_on_open; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setDbWriteBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setDbWriteBufferSize( +void Java_org_forstdb_DBOptions_setDbWriteBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jdb_write_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->db_write_buffer_size = static_cast(jdb_write_buffer_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWriteBufferManager * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWriteBufferManager( +void Java_org_forstdb_DBOptions_setWriteBufferManager( JNIEnv*, jobject, jlong jdb_options_handle, jlong jwrite_buffer_manager_handle) { auto* write_buffer_manager = @@ -7013,22 +7013,22 @@ void Java_org_rocksdb_DBOptions_setWriteBufferManager( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: dbWriteBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_dbWriteBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_dbWriteBufferSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->db_write_buffer_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAccessHintOnCompactionStart * Signature: (JB)V */ -void Java_org_rocksdb_DBOptions_setAccessHintOnCompactionStart( +void Java_org_forstdb_DBOptions_setAccessHintOnCompactionStart( JNIEnv*, jobject, jlong jhandle, jbyte jaccess_hint_value) { auto* opt = reinterpret_cast(jhandle); opt->access_hint_on_compaction_start = @@ -7036,11 +7036,11 @@ void Java_org_rocksdb_DBOptions_setAccessHintOnCompactionStart( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: accessHintOnCompactionStart * Signature: (J)B */ -jbyte Java_org_rocksdb_DBOptions_accessHintOnCompactionStart(JNIEnv*, jobject, +jbyte Java_org_forstdb_DBOptions_accessHintOnCompactionStart(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::AccessHintJni::toJavaAccessHint( @@ -7048,11 +7048,11 @@ jbyte Java_org_rocksdb_DBOptions_accessHintOnCompactionStart(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setCompactionReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setCompactionReadaheadSize( +void Java_org_forstdb_DBOptions_setCompactionReadaheadSize( JNIEnv*, jobject, jlong jhandle, jlong jcompaction_readahead_size) { auto* opt = reinterpret_cast(jhandle); opt->compaction_readahead_size = @@ -7060,22 +7060,22 @@ void Java_org_rocksdb_DBOptions_setCompactionReadaheadSize( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: compactionReadaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_compactionReadaheadSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_compactionReadaheadSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->compaction_readahead_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setRandomAccessMaxBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setRandomAccessMaxBufferSize( +void Java_org_forstdb_DBOptions_setRandomAccessMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jrandom_access_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->random_access_max_buffer_size = @@ -7083,22 +7083,22 @@ void Java_org_rocksdb_DBOptions_setRandomAccessMaxBufferSize( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: randomAccessMaxBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_randomAccessMaxBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_randomAccessMaxBufferSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->random_access_max_buffer_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWritableFileMaxBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWritableFileMaxBufferSize( +void Java_org_forstdb_DBOptions_setWritableFileMaxBufferSize( JNIEnv*, jobject, jlong jhandle, jlong jwritable_file_max_buffer_size) { auto* opt = reinterpret_cast(jhandle); opt->writable_file_max_buffer_size = @@ -7106,65 +7106,65 @@ void Java_org_rocksdb_DBOptions_setWritableFileMaxBufferSize( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: writableFileMaxBufferSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_writableFileMaxBufferSize(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_writableFileMaxBufferSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->writable_file_max_buffer_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setUseAdaptiveMutex * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setUseAdaptiveMutex( +void Java_org_forstdb_DBOptions_setUseAdaptiveMutex( JNIEnv*, jobject, jlong jhandle, jboolean use_adaptive_mutex) { reinterpret_cast(jhandle)->use_adaptive_mutex = static_cast(use_adaptive_mutex); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: useAdaptiveMutex * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_useAdaptiveMutex(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_useAdaptiveMutex(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->use_adaptive_mutex; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setBytesPerSync * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setBytesPerSync(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setBytesPerSync(JNIEnv*, jobject, jlong jhandle, jlong bytes_per_sync) { reinterpret_cast(jhandle)->bytes_per_sync = static_cast(bytes_per_sync); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: bytesPerSync * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_bytesPerSync(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_DBOptions_bytesPerSync(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->bytes_per_sync; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWalBytesPerSync * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWalBytesPerSync(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setWalBytesPerSync(JNIEnv*, jobject, jlong jhandle, jlong jwal_bytes_per_sync) { reinterpret_cast(jhandle)->wal_bytes_per_sync = @@ -7172,33 +7172,33 @@ void Java_org_rocksdb_DBOptions_setWalBytesPerSync(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: walBytesPerSync * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_walBytesPerSync(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_walBytesPerSync(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->wal_bytes_per_sync); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setStrictBytesPerSync * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setStrictBytesPerSync( +void Java_org_forstdb_DBOptions_setStrictBytesPerSync( JNIEnv*, jobject, jlong jhandle, jboolean jstrict_bytes_per_sync) { reinterpret_cast(jhandle) ->strict_bytes_per_sync = jstrict_bytes_per_sync == JNI_TRUE; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: strictBytesPerSync * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_strictBytesPerSync(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_strictBytesPerSync(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle) @@ -7206,11 +7206,11 @@ jboolean Java_org_rocksdb_DBOptions_strictBytesPerSync(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setEventListeners * Signature: (J[J)V */ -void Java_org_rocksdb_DBOptions_setEventListeners(JNIEnv* env, jclass, +void Java_org_forstdb_DBOptions_setEventListeners(JNIEnv* env, jclass, jlong jhandle, jlongArray jlistener_array) { auto* opt = reinterpret_cast(jhandle); @@ -7218,22 +7218,22 @@ void Java_org_rocksdb_DBOptions_setEventListeners(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: eventListeners * Signature: (J)[Lorg/rocksdb/AbstractEventListener; */ -jobjectArray Java_org_rocksdb_DBOptions_eventListeners(JNIEnv* env, jclass, +jobjectArray Java_org_forstdb_DBOptions_eventListeners(JNIEnv* env, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return rocksdb_get_event_listeners_helper(env, opt->listeners); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setDelayedWriteRate * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setDelayedWriteRate(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setDelayedWriteRate(JNIEnv*, jobject, jlong jhandle, jlong jdelayed_write_rate) { auto* opt = reinterpret_cast(jhandle); @@ -7241,44 +7241,44 @@ void Java_org_rocksdb_DBOptions_setDelayedWriteRate(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: delayedWriteRate * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_delayedWriteRate(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_delayedWriteRate(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->delayed_write_rate); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setEnablePipelinedWrite * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setEnablePipelinedWrite( +void Java_org_forstdb_DBOptions_setEnablePipelinedWrite( JNIEnv*, jobject, jlong jhandle, jboolean jenable_pipelined_write) { auto* opt = reinterpret_cast(jhandle); opt->enable_pipelined_write = jenable_pipelined_write == JNI_TRUE; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: enablePipelinedWrite * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_enablePipelinedWrite(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_enablePipelinedWrite(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_pipelined_write); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setUnorderedWrite * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setUnorderedWrite(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setUnorderedWrite(JNIEnv*, jobject, jlong jhandle, jboolean junordered_write) { auto* opt = reinterpret_cast(jhandle); @@ -7286,88 +7286,88 @@ void Java_org_rocksdb_DBOptions_setUnorderedWrite(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: unorderedWrite * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_unorderedWrite(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_unorderedWrite(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->unordered_write); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setEnableThreadTracking * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setEnableThreadTracking( +void Java_org_forstdb_DBOptions_setEnableThreadTracking( JNIEnv*, jobject, jlong jhandle, jboolean jenable_thread_tracking) { auto* opt = reinterpret_cast(jhandle); opt->enable_thread_tracking = jenable_thread_tracking == JNI_TRUE; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: enableThreadTracking * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_enableThreadTracking(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_enableThreadTracking(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->enable_thread_tracking); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAllowConcurrentMemtableWrite * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAllowConcurrentMemtableWrite( +void Java_org_forstdb_DBOptions_setAllowConcurrentMemtableWrite( JNIEnv*, jobject, jlong jhandle, jboolean allow) { reinterpret_cast(jhandle) ->allow_concurrent_memtable_write = static_cast(allow); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: allowConcurrentMemtableWrite * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_allowConcurrentMemtableWrite( +jboolean Java_org_forstdb_DBOptions_allowConcurrentMemtableWrite( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->allow_concurrent_memtable_write; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setEnableWriteThreadAdaptiveYield * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setEnableWriteThreadAdaptiveYield( +void Java_org_forstdb_DBOptions_setEnableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle, jboolean yield) { reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield = static_cast(yield); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: enableWriteThreadAdaptiveYield * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_enableWriteThreadAdaptiveYield( +jboolean Java_org_forstdb_DBOptions_enableWriteThreadAdaptiveYield( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->enable_write_thread_adaptive_yield; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWriteThreadMaxYieldUsec * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, jlong jhandle, jlong max) { reinterpret_cast(jhandle) @@ -7375,22 +7375,22 @@ void Java_org_rocksdb_DBOptions_setWriteThreadMaxYieldUsec(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: writeThreadMaxYieldUsec * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_writeThreadMaxYieldUsec(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_writeThreadMaxYieldUsec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_max_yield_usec; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWriteThreadSlowYieldUsec * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, jlong jhandle, jlong slow) { reinterpret_cast(jhandle) @@ -7398,22 +7398,22 @@ void Java_org_rocksdb_DBOptions_setWriteThreadSlowYieldUsec(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: writeThreadSlowYieldUsec * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_writeThreadSlowYieldUsec(JNIEnv*, jobject, +jlong Java_org_forstdb_DBOptions_writeThreadSlowYieldUsec(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->write_thread_slow_yield_usec; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setSkipStatsUpdateOnDbOpen * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setSkipStatsUpdateOnDbOpen( +void Java_org_forstdb_DBOptions_setSkipStatsUpdateOnDbOpen( JNIEnv*, jobject, jlong jhandle, jboolean jskip_stats_update_on_db_open) { auto* opt = reinterpret_cast(jhandle); opt->skip_stats_update_on_db_open = @@ -7421,22 +7421,22 @@ void Java_org_rocksdb_DBOptions_setSkipStatsUpdateOnDbOpen( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: skipStatsUpdateOnDbOpen * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_skipStatsUpdateOnDbOpen(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_skipStatsUpdateOnDbOpen(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_stats_update_on_db_open); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setSkipCheckingSstFileSizesOnDbOpen * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setSkipCheckingSstFileSizesOnDbOpen( +void Java_org_forstdb_DBOptions_setSkipCheckingSstFileSizesOnDbOpen( JNIEnv*, jclass, jlong jhandle, jboolean jskip_checking_sst_file_sizes_on_db_open) { auto* opt = reinterpret_cast(jhandle); @@ -7445,22 +7445,22 @@ void Java_org_rocksdb_DBOptions_setSkipCheckingSstFileSizesOnDbOpen( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: skipCheckingSstFileSizesOnDbOpen * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_skipCheckingSstFileSizesOnDbOpen( +jboolean Java_org_forstdb_DBOptions_skipCheckingSstFileSizesOnDbOpen( JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->skip_checking_sst_file_sizes_on_db_open); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWalRecoveryMode * Signature: (JB)V */ -void Java_org_rocksdb_DBOptions_setWalRecoveryMode( +void Java_org_forstdb_DBOptions_setWalRecoveryMode( JNIEnv*, jobject, jlong jhandle, jbyte jwal_recovery_mode_value) { auto* opt = reinterpret_cast(jhandle); opt->wal_recovery_mode = @@ -7469,11 +7469,11 @@ void Java_org_rocksdb_DBOptions_setWalRecoveryMode( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: walRecoveryMode * Signature: (J)B */ -jbyte Java_org_rocksdb_DBOptions_walRecoveryMode(JNIEnv*, jobject, +jbyte Java_org_forstdb_DBOptions_walRecoveryMode(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return ROCKSDB_NAMESPACE::WALRecoveryModeJni::toJavaWALRecoveryMode( @@ -7481,32 +7481,32 @@ jbyte Java_org_rocksdb_DBOptions_walRecoveryMode(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAllow2pc * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAllow2pc(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setAllow2pc(JNIEnv*, jobject, jlong jhandle, jboolean jallow_2pc) { auto* opt = reinterpret_cast(jhandle); opt->allow_2pc = static_cast(jallow_2pc); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: allow2pc * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_allow2pc(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_DBOptions_allow2pc(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_2pc); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setRowCache * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setRowCache(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setRowCache(JNIEnv*, jobject, jlong jhandle, jlong jrow_cache_handle) { auto* opt = reinterpret_cast(jhandle); auto* row_cache = @@ -7516,11 +7516,11 @@ void Java_org_rocksdb_DBOptions_setRowCache(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWalFilter * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setWalFilter(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setWalFilter(JNIEnv*, jobject, jlong jhandle, jlong jwal_filter_handle) { auto* opt = reinterpret_cast(jhandle); auto* wal_filter = reinterpret_cast( @@ -7529,11 +7529,11 @@ void Java_org_rocksdb_DBOptions_setWalFilter(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setFailIfOptionsFileError * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setFailIfOptionsFileError( +void Java_org_forstdb_DBOptions_setFailIfOptionsFileError( JNIEnv*, jobject, jlong jhandle, jboolean jfail_if_options_file_error) { auto* opt = reinterpret_cast(jhandle); opt->fail_if_options_file_error = @@ -7541,44 +7541,44 @@ void Java_org_rocksdb_DBOptions_setFailIfOptionsFileError( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: failIfOptionsFileError * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_failIfOptionsFileError(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_failIfOptionsFileError(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->fail_if_options_file_error); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setDumpMallocStats * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setDumpMallocStats( +void Java_org_forstdb_DBOptions_setDumpMallocStats( JNIEnv*, jobject, jlong jhandle, jboolean jdump_malloc_stats) { auto* opt = reinterpret_cast(jhandle); opt->dump_malloc_stats = static_cast(jdump_malloc_stats); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: dumpMallocStats * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_dumpMallocStats(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_dumpMallocStats(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->dump_malloc_stats); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAvoidFlushDuringRecovery * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAvoidFlushDuringRecovery( +void Java_org_forstdb_DBOptions_setAvoidFlushDuringRecovery( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_recovery) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_recovery = @@ -7586,44 +7586,44 @@ void Java_org_rocksdb_DBOptions_setAvoidFlushDuringRecovery( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: avoidFlushDuringRecovery * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_avoidFlushDuringRecovery(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_avoidFlushDuringRecovery(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_recovery); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAllowIngestBehind * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAllowIngestBehind( +void Java_org_forstdb_DBOptions_setAllowIngestBehind( JNIEnv*, jobject, jlong jhandle, jboolean jallow_ingest_behind) { auto* opt = reinterpret_cast(jhandle); opt->allow_ingest_behind = jallow_ingest_behind == JNI_TRUE; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: allowIngestBehind * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_allowIngestBehind(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_allowIngestBehind(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->allow_ingest_behind); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setTwoWriteQueues * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setTwoWriteQueues(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setTwoWriteQueues(JNIEnv*, jobject, jlong jhandle, jboolean jtwo_write_queues) { auto* opt = reinterpret_cast(jhandle); @@ -7631,22 +7631,22 @@ void Java_org_rocksdb_DBOptions_setTwoWriteQueues(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: twoWriteQueues * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_twoWriteQueues(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_twoWriteQueues(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->two_write_queues); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setManualWalFlush * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setManualWalFlush(JNIEnv*, jobject, +void Java_org_forstdb_DBOptions_setManualWalFlush(JNIEnv*, jobject, jlong jhandle, jboolean jmanual_wal_flush) { auto* opt = reinterpret_cast(jhandle); @@ -7654,44 +7654,44 @@ void Java_org_rocksdb_DBOptions_setManualWalFlush(JNIEnv*, jobject, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: manualWalFlush * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_manualWalFlush(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_manualWalFlush(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->manual_wal_flush); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAtomicFlush * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAtomicFlush(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_DBOptions_setAtomicFlush(JNIEnv*, jobject, jlong jhandle, jboolean jatomic_flush) { auto* opt = reinterpret_cast(jhandle); opt->atomic_flush = jatomic_flush == JNI_TRUE; } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: atomicFlush * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_atomicFlush(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_atomicFlush(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->atomic_flush); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAvoidFlushDuringShutdown * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAvoidFlushDuringShutdown( +void Java_org_forstdb_DBOptions_setAvoidFlushDuringShutdown( JNIEnv*, jobject, jlong jhandle, jboolean javoid_flush_during_shutdown) { auto* opt = reinterpret_cast(jhandle); opt->avoid_flush_during_shutdown = @@ -7699,88 +7699,88 @@ void Java_org_rocksdb_DBOptions_setAvoidFlushDuringShutdown( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: avoidFlushDuringShutdown * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_avoidFlushDuringShutdown(JNIEnv*, jobject, +jboolean Java_org_forstdb_DBOptions_avoidFlushDuringShutdown(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_flush_during_shutdown); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setAvoidUnnecessaryBlockingIO * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setAvoidUnnecessaryBlockingIO( +void Java_org_forstdb_DBOptions_setAvoidUnnecessaryBlockingIO( JNIEnv*, jclass, jlong jhandle, jboolean avoid_blocking_io) { auto* opt = reinterpret_cast(jhandle); opt->avoid_unnecessary_blocking_io = static_cast(avoid_blocking_io); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: avoidUnnecessaryBlockingIO * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_avoidUnnecessaryBlockingIO(JNIEnv*, jclass, +jboolean Java_org_forstdb_DBOptions_avoidUnnecessaryBlockingIO(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->avoid_unnecessary_blocking_io); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setPersistStatsToDisk * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setPersistStatsToDisk( +void Java_org_forstdb_DBOptions_setPersistStatsToDisk( JNIEnv*, jclass, jlong jhandle, jboolean persist_stats_to_disk) { auto* opt = reinterpret_cast(jhandle); opt->persist_stats_to_disk = static_cast(persist_stats_to_disk); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: persistStatsToDisk * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_persistStatsToDisk(JNIEnv*, jclass, +jboolean Java_org_forstdb_DBOptions_persistStatsToDisk(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->persist_stats_to_disk); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setWriteDbidToManifest * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setWriteDbidToManifest( +void Java_org_forstdb_DBOptions_setWriteDbidToManifest( JNIEnv*, jclass, jlong jhandle, jboolean jwrite_dbid_to_manifest) { auto* opt = reinterpret_cast(jhandle); opt->write_dbid_to_manifest = static_cast(jwrite_dbid_to_manifest); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: writeDbidToManifest * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_writeDbidToManifest(JNIEnv*, jclass, +jboolean Java_org_forstdb_DBOptions_writeDbidToManifest(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->write_dbid_to_manifest); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setLogReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setLogReadaheadSize(JNIEnv*, jclass, +void Java_org_forstdb_DBOptions_setLogReadaheadSize(JNIEnv*, jclass, jlong jhandle, jlong jlog_readahead_size) { auto* opt = reinterpret_cast(jhandle); @@ -7788,66 +7788,66 @@ void Java_org_rocksdb_DBOptions_setLogReadaheadSize(JNIEnv*, jclass, } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: logReasaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_logReadaheadSize(JNIEnv*, jclass, +jlong Java_org_forstdb_DBOptions_logReadaheadSize(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->log_readahead_size); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setBestEffortsRecovery * Signature: (JZ)V */ -void Java_org_rocksdb_DBOptions_setBestEffortsRecovery( +void Java_org_forstdb_DBOptions_setBestEffortsRecovery( JNIEnv*, jclass, jlong jhandle, jboolean jbest_efforts_recovery) { auto* opt = reinterpret_cast(jhandle); opt->best_efforts_recovery = static_cast(jbest_efforts_recovery); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: bestEffortsRecovery * Signature: (J)Z */ -jboolean Java_org_rocksdb_DBOptions_bestEffortsRecovery(JNIEnv*, jclass, +jboolean Java_org_forstdb_DBOptions_bestEffortsRecovery(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->best_efforts_recovery); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setMaxBgErrorResumeCount * Signature: (JI)V */ -void Java_org_rocksdb_DBOptions_setMaxBgErrorResumeCount( +void Java_org_forstdb_DBOptions_setMaxBgErrorResumeCount( JNIEnv*, jclass, jlong jhandle, jint jmax_bgerror_resume_count) { auto* opt = reinterpret_cast(jhandle); opt->max_bgerror_resume_count = static_cast(jmax_bgerror_resume_count); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: maxBgerrorResumeCount * Signature: (J)I */ -jint Java_org_rocksdb_DBOptions_maxBgerrorResumeCount(JNIEnv*, jclass, +jint Java_org_forstdb_DBOptions_maxBgerrorResumeCount(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_bgerror_resume_count); } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: setBgerrorResumeRetryInterval * Signature: (JJ)V */ -void Java_org_rocksdb_DBOptions_setBgerrorResumeRetryInterval( +void Java_org_forstdb_DBOptions_setBgerrorResumeRetryInterval( JNIEnv*, jclass, jlong jhandle, jlong jbgerror_resume_retry_interval) { auto* opt = reinterpret_cast(jhandle); opt->bgerror_resume_retry_interval = @@ -7855,11 +7855,11 @@ void Java_org_rocksdb_DBOptions_setBgerrorResumeRetryInterval( } /* - * Class: org_rocksdb_DBOptions + * Class: org_forstdb_DBOptions * Method: bgerrorResumeRetryInterval * Signature: (J)J */ -jlong Java_org_rocksdb_DBOptions_bgerrorResumeRetryInterval(JNIEnv*, jclass, +jlong Java_org_forstdb_DBOptions_bgerrorResumeRetryInterval(JNIEnv*, jclass, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->bgerror_resume_retry_interval); @@ -7869,21 +7869,21 @@ jlong Java_org_rocksdb_DBOptions_bgerrorResumeRetryInterval(JNIEnv*, jclass, // ROCKSDB_NAMESPACE::WriteOptions /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: newWriteOptions * Signature: ()J */ -jlong Java_org_rocksdb_WriteOptions_newWriteOptions(JNIEnv*, jclass) { +jlong Java_org_forstdb_WriteOptions_newWriteOptions(JNIEnv*, jclass) { auto* op = new ROCKSDB_NAMESPACE::WriteOptions(); return GET_CPLUSPLUS_POINTER(op); } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: copyWriteOptions * Signature: (J)J */ -jlong Java_org_rocksdb_WriteOptions_copyWriteOptions(JNIEnv*, jclass, +jlong Java_org_forstdb_WriteOptions_copyWriteOptions(JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::WriteOptions( *(reinterpret_cast(jhandle))); @@ -7891,11 +7891,11 @@ jlong Java_org_rocksdb_WriteOptions_copyWriteOptions(JNIEnv*, jclass, } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: disposeInternal * Signature: ()V */ -void Java_org_rocksdb_WriteOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_WriteOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* write_options = reinterpret_cast(jhandle); @@ -7904,30 +7904,30 @@ void Java_org_rocksdb_WriteOptions_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: setSync * Signature: (JZ)V */ -void Java_org_rocksdb_WriteOptions_setSync(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_WriteOptions_setSync(JNIEnv*, jobject, jlong jhandle, jboolean jflag) { reinterpret_cast(jhandle)->sync = jflag; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: sync * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteOptions_sync(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_WriteOptions_sync(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->sync; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: setDisableWAL * Signature: (JZ)V */ -void Java_org_rocksdb_WriteOptions_setDisableWAL(JNIEnv*, jobject, +void Java_org_forstdb_WriteOptions_setDisableWAL(JNIEnv*, jobject, jlong jhandle, jboolean jflag) { reinterpret_cast(jhandle)->disableWAL = @@ -7935,22 +7935,22 @@ void Java_org_rocksdb_WriteOptions_setDisableWAL(JNIEnv*, jobject, } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: disableWAL * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteOptions_disableWAL(JNIEnv*, jobject, +jboolean Java_org_forstdb_WriteOptions_disableWAL(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->disableWAL; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: setIgnoreMissingColumnFamilies * Signature: (JZ)V */ -void Java_org_rocksdb_WriteOptions_setIgnoreMissingColumnFamilies( +void Java_org_forstdb_WriteOptions_setIgnoreMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle, jboolean jignore_missing_column_families) { reinterpret_cast(jhandle) ->ignore_missing_column_families = @@ -7958,22 +7958,22 @@ void Java_org_rocksdb_WriteOptions_setIgnoreMissingColumnFamilies( } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: ignoreMissingColumnFamilies * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteOptions_ignoreMissingColumnFamilies( +jboolean Java_org_forstdb_WriteOptions_ignoreMissingColumnFamilies( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->ignore_missing_column_families; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: setNoSlowdown * Signature: (JZ)V */ -void Java_org_rocksdb_WriteOptions_setNoSlowdown(JNIEnv*, jobject, +void Java_org_forstdb_WriteOptions_setNoSlowdown(JNIEnv*, jobject, jlong jhandle, jboolean jno_slowdown) { reinterpret_cast(jhandle)->no_slowdown = @@ -7981,53 +7981,53 @@ void Java_org_rocksdb_WriteOptions_setNoSlowdown(JNIEnv*, jobject, } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: noSlowdown * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteOptions_noSlowdown(JNIEnv*, jobject, +jboolean Java_org_forstdb_WriteOptions_noSlowdown(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->no_slowdown; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: setLowPri * Signature: (JZ)V */ -void Java_org_rocksdb_WriteOptions_setLowPri(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_WriteOptions_setLowPri(JNIEnv*, jobject, jlong jhandle, jboolean jlow_pri) { reinterpret_cast(jhandle)->low_pri = static_cast(jlow_pri); } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: lowPri * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteOptions_lowPri(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_WriteOptions_lowPri(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->low_pri; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: memtableInsertHintPerBatch * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteOptions_memtableInsertHintPerBatch( +jboolean Java_org_forstdb_WriteOptions_memtableInsertHintPerBatch( JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->memtable_insert_hint_per_batch; } /* - * Class: org_rocksdb_WriteOptions + * Class: org_forstdb_WriteOptions * Method: setMemtableInsertHintPerBatch * Signature: (JZ)V */ -void Java_org_rocksdb_WriteOptions_setMemtableInsertHintPerBatch( +void Java_org_forstdb_WriteOptions_setMemtableInsertHintPerBatch( JNIEnv*, jobject, jlong jhandle, jboolean jmemtable_insert_hint_per_batch) { reinterpret_cast(jhandle) ->memtable_insert_hint_per_batch = @@ -8038,21 +8038,21 @@ void Java_org_rocksdb_WriteOptions_setMemtableInsertHintPerBatch( // ROCKSDB_NAMESPACE::ReadOptions /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: newReadOptions * Signature: ()J */ -jlong Java_org_rocksdb_ReadOptions_newReadOptions__(JNIEnv*, jclass) { +jlong Java_org_forstdb_ReadOptions_newReadOptions__(JNIEnv*, jclass) { auto* read_options = new ROCKSDB_NAMESPACE::ReadOptions(); return GET_CPLUSPLUS_POINTER(read_options); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: newReadOptions * Signature: (ZZ)J */ -jlong Java_org_rocksdb_ReadOptions_newReadOptions__ZZ( +jlong Java_org_forstdb_ReadOptions_newReadOptions__ZZ( JNIEnv*, jclass, jboolean jverify_checksums, jboolean jfill_cache) { auto* read_options = new ROCKSDB_NAMESPACE::ReadOptions( static_cast(jverify_checksums), static_cast(jfill_cache)); @@ -8060,11 +8060,11 @@ jlong Java_org_rocksdb_ReadOptions_newReadOptions__ZZ( } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: copyReadOptions * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_copyReadOptions(JNIEnv*, jclass, +jlong Java_org_forstdb_ReadOptions_copyReadOptions(JNIEnv*, jclass, jlong jhandle) { auto new_opt = new ROCKSDB_NAMESPACE::ReadOptions( *(reinterpret_cast(jhandle))); @@ -8072,11 +8072,11 @@ jlong Java_org_rocksdb_ReadOptions_copyReadOptions(JNIEnv*, jclass, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ReadOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_ReadOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* read_options = reinterpret_cast(jhandle); @@ -8085,169 +8085,169 @@ void Java_org_rocksdb_ReadOptions_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setVerifyChecksums * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setVerifyChecksums( +void Java_org_forstdb_ReadOptions_setVerifyChecksums( JNIEnv*, jobject, jlong jhandle, jboolean jverify_checksums) { reinterpret_cast(jhandle)->verify_checksums = static_cast(jverify_checksums); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: verifyChecksums * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_verifyChecksums(JNIEnv*, jobject, +jboolean Java_org_forstdb_ReadOptions_verifyChecksums(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->verify_checksums; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setFillCache * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setFillCache(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setFillCache(JNIEnv*, jobject, jlong jhandle, jboolean jfill_cache) { reinterpret_cast(jhandle)->fill_cache = static_cast(jfill_cache); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: fillCache * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_fillCache(JNIEnv*, jobject, +jboolean Java_org_forstdb_ReadOptions_fillCache(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->fill_cache; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setTailing * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setTailing(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setTailing(JNIEnv*, jobject, jlong jhandle, jboolean jtailing) { reinterpret_cast(jhandle)->tailing = static_cast(jtailing); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: tailing * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_tailing(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_ReadOptions_tailing(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->tailing; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: managed * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_managed(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_ReadOptions_managed(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->managed; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setManaged * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setManaged(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setManaged(JNIEnv*, jobject, jlong jhandle, jboolean jmanaged) { reinterpret_cast(jhandle)->managed = static_cast(jmanaged); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: totalOrderSeek * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_totalOrderSeek(JNIEnv*, jobject, +jboolean Java_org_forstdb_ReadOptions_totalOrderSeek(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->total_order_seek; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setTotalOrderSeek * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setTotalOrderSeek( +void Java_org_forstdb_ReadOptions_setTotalOrderSeek( JNIEnv*, jobject, jlong jhandle, jboolean jtotal_order_seek) { reinterpret_cast(jhandle)->total_order_seek = static_cast(jtotal_order_seek); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: prefixSameAsStart * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_prefixSameAsStart(JNIEnv*, jobject, +jboolean Java_org_forstdb_ReadOptions_prefixSameAsStart(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle) ->prefix_same_as_start; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setPrefixSameAsStart * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setPrefixSameAsStart( +void Java_org_forstdb_ReadOptions_setPrefixSameAsStart( JNIEnv*, jobject, jlong jhandle, jboolean jprefix_same_as_start) { reinterpret_cast(jhandle) ->prefix_same_as_start = static_cast(jprefix_same_as_start); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: pinData * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_pinData(JNIEnv*, jobject, jlong jhandle) { +jboolean Java_org_forstdb_ReadOptions_pinData(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->pin_data; } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setPinData * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setPinData(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setPinData(JNIEnv*, jobject, jlong jhandle, jboolean jpin_data) { reinterpret_cast(jhandle)->pin_data = static_cast(jpin_data); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: backgroundPurgeOnIteratorCleanup * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_backgroundPurgeOnIteratorCleanup( +jboolean Java_org_forstdb_ReadOptions_backgroundPurgeOnIteratorCleanup( JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->background_purge_on_iterator_cleanup); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setBackgroundPurgeOnIteratorCleanup * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setBackgroundPurgeOnIteratorCleanup( +void Java_org_forstdb_ReadOptions_setBackgroundPurgeOnIteratorCleanup( JNIEnv*, jobject, jlong jhandle, jboolean jbackground_purge_on_iterator_cleanup) { auto* opt = reinterpret_cast(jhandle); @@ -8256,22 +8256,22 @@ void Java_org_rocksdb_ReadOptions_setBackgroundPurgeOnIteratorCleanup( } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: readaheadSize * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_readaheadSize(JNIEnv*, jobject, +jlong Java_org_forstdb_ReadOptions_readaheadSize(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->readahead_size); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setReadaheadSize * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setReadaheadSize(JNIEnv*, jobject, +void Java_org_forstdb_ReadOptions_setReadaheadSize(JNIEnv*, jobject, jlong jhandle, jlong jreadahead_size) { auto* opt = reinterpret_cast(jhandle); @@ -8279,22 +8279,22 @@ void Java_org_rocksdb_ReadOptions_setReadaheadSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: maxSkippableInternalKeys * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_maxSkippableInternalKeys(JNIEnv*, jobject, +jlong Java_org_forstdb_ReadOptions_maxSkippableInternalKeys(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->max_skippable_internal_keys); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setMaxSkippableInternalKeys * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setMaxSkippableInternalKeys( +void Java_org_forstdb_ReadOptions_setMaxSkippableInternalKeys( JNIEnv*, jobject, jlong jhandle, jlong jmax_skippable_internal_keys) { auto* opt = reinterpret_cast(jhandle); opt->max_skippable_internal_keys = @@ -8302,76 +8302,76 @@ void Java_org_rocksdb_ReadOptions_setMaxSkippableInternalKeys( } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: ignoreRangeDeletions * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_ignoreRangeDeletions(JNIEnv*, jobject, +jboolean Java_org_forstdb_ReadOptions_ignoreRangeDeletions(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->ignore_range_deletions); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setIgnoreRangeDeletions * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setIgnoreRangeDeletions( +void Java_org_forstdb_ReadOptions_setIgnoreRangeDeletions( JNIEnv*, jobject, jlong jhandle, jboolean jignore_range_deletions) { auto* opt = reinterpret_cast(jhandle); opt->ignore_range_deletions = static_cast(jignore_range_deletions); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setSnapshot * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setSnapshot(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setSnapshot(JNIEnv*, jobject, jlong jhandle, jlong jsnapshot) { reinterpret_cast(jhandle)->snapshot = reinterpret_cast(jsnapshot); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: snapshot * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_snapshot(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_ReadOptions_snapshot(JNIEnv*, jobject, jlong jhandle) { auto& snapshot = reinterpret_cast(jhandle)->snapshot; return GET_CPLUSPLUS_POINTER(snapshot); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: readTier * Signature: (J)B */ -jbyte Java_org_rocksdb_ReadOptions_readTier(JNIEnv*, jobject, jlong jhandle) { +jbyte Java_org_forstdb_ReadOptions_readTier(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast(jhandle)->read_tier); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setReadTier * Signature: (JB)V */ -void Java_org_rocksdb_ReadOptions_setReadTier(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setReadTier(JNIEnv*, jobject, jlong jhandle, jbyte jread_tier) { reinterpret_cast(jhandle)->read_tier = static_cast(jread_tier); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setIterateUpperBound * Signature: (JJ)I */ -void Java_org_rocksdb_ReadOptions_setIterateUpperBound( +void Java_org_forstdb_ReadOptions_setIterateUpperBound( JNIEnv*, jobject, jlong jhandle, jlong jupper_bound_slice_handle) { reinterpret_cast(jhandle) ->iterate_upper_bound = @@ -8379,11 +8379,11 @@ void Java_org_rocksdb_ReadOptions_setIterateUpperBound( } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: iterateUpperBound * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_iterateUpperBound(JNIEnv*, jobject, +jlong Java_org_forstdb_ReadOptions_iterateUpperBound(JNIEnv*, jobject, jlong jhandle) { auto& upper_bound_slice_handle = reinterpret_cast(jhandle) @@ -8392,11 +8392,11 @@ jlong Java_org_rocksdb_ReadOptions_iterateUpperBound(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setIterateLowerBound * Signature: (JJ)I */ -void Java_org_rocksdb_ReadOptions_setIterateLowerBound( +void Java_org_forstdb_ReadOptions_setIterateLowerBound( JNIEnv*, jobject, jlong jhandle, jlong jlower_bound_slice_handle) { reinterpret_cast(jhandle) ->iterate_lower_bound = @@ -8404,11 +8404,11 @@ void Java_org_rocksdb_ReadOptions_setIterateLowerBound( } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: iterateLowerBound * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_iterateLowerBound(JNIEnv*, jobject, +jlong Java_org_forstdb_ReadOptions_iterateLowerBound(JNIEnv*, jobject, jlong jhandle) { auto& lower_bound_slice_handle = reinterpret_cast(jhandle) @@ -8417,11 +8417,11 @@ jlong Java_org_rocksdb_ReadOptions_iterateLowerBound(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setTableFilter * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setTableFilter( +void Java_org_forstdb_ReadOptions_setTableFilter( JNIEnv*, jobject, jlong jhandle, jlong jjni_table_filter_handle) { auto* opt = reinterpret_cast(jhandle); auto* jni_table_filter = @@ -8431,44 +8431,44 @@ void Java_org_rocksdb_ReadOptions_setTableFilter( } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: autoPrefixMode * Signature: (J)Z */ -jboolean Java_org_rocksdb_ReadOptions_autoPrefixMode(JNIEnv*, jobject, +jboolean Java_org_forstdb_ReadOptions_autoPrefixMode(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->auto_prefix_mode); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setAutoPrefixMode * Signature: (JZ)V */ -void Java_org_rocksdb_ReadOptions_setAutoPrefixMode( +void Java_org_forstdb_ReadOptions_setAutoPrefixMode( JNIEnv*, jobject, jlong jhandle, jboolean jauto_prefix_mode) { auto* opt = reinterpret_cast(jhandle); opt->auto_prefix_mode = static_cast(jauto_prefix_mode); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: timestamp * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_timestamp(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_ReadOptions_timestamp(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); auto& timestamp_slice_handle = opt->timestamp; return reinterpret_cast(timestamp_slice_handle); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setTimestamp * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setTimestamp(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setTimestamp(JNIEnv*, jobject, jlong jhandle, jlong jtimestamp_slice_handle) { auto* opt = reinterpret_cast(jhandle); opt->timestamp = @@ -8476,11 +8476,11 @@ void Java_org_rocksdb_ReadOptions_setTimestamp(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: iterStartTs * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_iterStartTs(JNIEnv*, jobject, +jlong Java_org_forstdb_ReadOptions_iterStartTs(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); auto& iter_start_ts_handle = opt->iter_start_ts; @@ -8488,11 +8488,11 @@ jlong Java_org_rocksdb_ReadOptions_iterStartTs(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setIterStartTs * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setIterStartTs(JNIEnv*, jobject, +void Java_org_forstdb_ReadOptions_setIterStartTs(JNIEnv*, jobject, jlong jhandle, jlong jiter_start_ts_handle) { auto* opt = reinterpret_cast(jhandle); @@ -8501,42 +8501,42 @@ void Java_org_rocksdb_ReadOptions_setIterStartTs(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: deadline * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_deadline(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_ReadOptions_deadline(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->deadline.count()); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setDeadline * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setDeadline(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setDeadline(JNIEnv*, jobject, jlong jhandle, jlong jdeadline) { auto* opt = reinterpret_cast(jhandle); opt->deadline = std::chrono::microseconds(static_cast(jdeadline)); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: ioTimeout * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_ioTimeout(JNIEnv*, jobject, jlong jhandle) { +jlong Java_org_forstdb_ReadOptions_ioTimeout(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->io_timeout.count()); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setIoTimeout * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setIoTimeout(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_ReadOptions_setIoTimeout(JNIEnv*, jobject, jlong jhandle, jlong jio_timeout) { auto* opt = reinterpret_cast(jhandle); opt->io_timeout = @@ -8544,22 +8544,22 @@ void Java_org_rocksdb_ReadOptions_setIoTimeout(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: valueSizeSofLimit * Signature: (J)J */ -jlong Java_org_rocksdb_ReadOptions_valueSizeSoftLimit(JNIEnv*, jobject, +jlong Java_org_forstdb_ReadOptions_valueSizeSoftLimit(JNIEnv*, jobject, jlong jhandle) { auto* opt = reinterpret_cast(jhandle); return static_cast(opt->value_size_soft_limit); } /* - * Class: org_rocksdb_ReadOptions + * Class: org_forstdb_ReadOptions * Method: setValueSizeSofLimit * Signature: (JJ)V */ -void Java_org_rocksdb_ReadOptions_setValueSizeSoftLimit( +void Java_org_forstdb_ReadOptions_setValueSizeSoftLimit( JNIEnv*, jobject, jlong jhandle, jlong jvalue_size_soft_limit) { auto* opt = reinterpret_cast(jhandle); opt->value_size_soft_limit = static_cast(jvalue_size_soft_limit); @@ -8569,21 +8569,21 @@ void Java_org_rocksdb_ReadOptions_setValueSizeSoftLimit( // ROCKSDB_NAMESPACE::ComparatorOptions /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: newComparatorOptions * Signature: ()J */ -jlong Java_org_rocksdb_ComparatorOptions_newComparatorOptions(JNIEnv*, jclass) { +jlong Java_org_forstdb_ComparatorOptions_newComparatorOptions(JNIEnv*, jclass) { auto* comparator_opt = new ROCKSDB_NAMESPACE::ComparatorJniCallbackOptions(); return GET_CPLUSPLUS_POINTER(comparator_opt); } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: reusedSynchronisationType * Signature: (J)B */ -jbyte Java_org_rocksdb_ComparatorOptions_reusedSynchronisationType( +jbyte Java_org_forstdb_ComparatorOptions_reusedSynchronisationType( JNIEnv*, jobject, jlong jhandle) { auto* comparator_opt = reinterpret_cast( @@ -8594,11 +8594,11 @@ jbyte Java_org_rocksdb_ComparatorOptions_reusedSynchronisationType( } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: setReusedSynchronisationType * Signature: (JB)V */ -void Java_org_rocksdb_ComparatorOptions_setReusedSynchronisationType( +void Java_org_forstdb_ComparatorOptions_setReusedSynchronisationType( JNIEnv*, jobject, jlong jhandle, jbyte jreused_synhcronisation_type) { auto* comparator_opt = reinterpret_cast( @@ -8609,11 +8609,11 @@ void Java_org_rocksdb_ComparatorOptions_setReusedSynchronisationType( } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: useDirectBuffer * Signature: (J)Z */ -jboolean Java_org_rocksdb_ComparatorOptions_useDirectBuffer(JNIEnv*, jobject, +jboolean Java_org_forstdb_ComparatorOptions_useDirectBuffer(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast( @@ -8622,22 +8622,22 @@ jboolean Java_org_rocksdb_ComparatorOptions_useDirectBuffer(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: setUseDirectBuffer * Signature: (JZ)V */ -void Java_org_rocksdb_ComparatorOptions_setUseDirectBuffer( +void Java_org_forstdb_ComparatorOptions_setUseDirectBuffer( JNIEnv*, jobject, jlong jhandle, jboolean jdirect_buffer) { reinterpret_cast(jhandle) ->direct_buffer = jdirect_buffer == JNI_TRUE; } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: maxReusedBufferSize * Signature: (J)I */ -jint Java_org_rocksdb_ComparatorOptions_maxReusedBufferSize(JNIEnv*, jobject, +jint Java_org_forstdb_ComparatorOptions_maxReusedBufferSize(JNIEnv*, jobject, jlong jhandle) { return static_cast( reinterpret_cast( @@ -8646,22 +8646,22 @@ jint Java_org_rocksdb_ComparatorOptions_maxReusedBufferSize(JNIEnv*, jobject, } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: setMaxReusedBufferSize * Signature: (JI)V */ -void Java_org_rocksdb_ComparatorOptions_setMaxReusedBufferSize( +void Java_org_forstdb_ComparatorOptions_setMaxReusedBufferSize( JNIEnv*, jobject, jlong jhandle, jint jmax_reused_buffer_size) { reinterpret_cast(jhandle) ->max_reused_buffer_size = static_cast(jmax_reused_buffer_size); } /* - * Class: org_rocksdb_ComparatorOptions + * Class: org_forstdb_ComparatorOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_ComparatorOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_ComparatorOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* comparator_opt = reinterpret_cast( @@ -8674,21 +8674,21 @@ void Java_org_rocksdb_ComparatorOptions_disposeInternal(JNIEnv*, jobject, // ROCKSDB_NAMESPACE::FlushOptions /* - * Class: org_rocksdb_FlushOptions + * Class: org_forstdb_FlushOptions * Method: newFlushOptions * Signature: ()J */ -jlong Java_org_rocksdb_FlushOptions_newFlushOptions(JNIEnv*, jclass) { +jlong Java_org_forstdb_FlushOptions_newFlushOptions(JNIEnv*, jclass) { auto* flush_opt = new ROCKSDB_NAMESPACE::FlushOptions(); return GET_CPLUSPLUS_POINTER(flush_opt); } /* - * Class: org_rocksdb_FlushOptions + * Class: org_forstdb_FlushOptions * Method: setWaitForFlush * Signature: (JZ)V */ -void Java_org_rocksdb_FlushOptions_setWaitForFlush(JNIEnv*, jobject, +void Java_org_forstdb_FlushOptions_setWaitForFlush(JNIEnv*, jobject, jlong jhandle, jboolean jwait) { reinterpret_cast(jhandle)->wait = @@ -8696,21 +8696,21 @@ void Java_org_rocksdb_FlushOptions_setWaitForFlush(JNIEnv*, jobject, } /* - * Class: org_rocksdb_FlushOptions + * Class: org_forstdb_FlushOptions * Method: waitForFlush * Signature: (J)Z */ -jboolean Java_org_rocksdb_FlushOptions_waitForFlush(JNIEnv*, jobject, +jboolean Java_org_forstdb_FlushOptions_waitForFlush(JNIEnv*, jobject, jlong jhandle) { return reinterpret_cast(jhandle)->wait; } /* - * Class: org_rocksdb_FlushOptions + * Class: org_forstdb_FlushOptions * Method: setAllowWriteStall * Signature: (JZ)V */ -void Java_org_rocksdb_FlushOptions_setAllowWriteStall( +void Java_org_forstdb_FlushOptions_setAllowWriteStall( JNIEnv*, jobject, jlong jhandle, jboolean jallow_write_stall) { auto* flush_options = reinterpret_cast(jhandle); @@ -8718,11 +8718,11 @@ void Java_org_rocksdb_FlushOptions_setAllowWriteStall( } /* - * Class: org_rocksdb_FlushOptions + * Class: org_forstdb_FlushOptions * Method: allowWriteStall * Signature: (J)Z */ -jboolean Java_org_rocksdb_FlushOptions_allowWriteStall(JNIEnv*, jobject, +jboolean Java_org_forstdb_FlushOptions_allowWriteStall(JNIEnv*, jobject, jlong jhandle) { auto* flush_options = reinterpret_cast(jhandle); @@ -8730,11 +8730,11 @@ jboolean Java_org_rocksdb_FlushOptions_allowWriteStall(JNIEnv*, jobject, } /* - * Class: org_rocksdb_FlushOptions + * Class: org_forstdb_FlushOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_FlushOptions_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_FlushOptions_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* flush_opt = reinterpret_cast(jhandle); assert(flush_opt != nullptr); diff --git a/java/rocksjni/options_util.cc b/java/forstjni/options_util.cc similarity index 93% rename from java/rocksjni/options_util.cc rename to java/forstjni/options_util.cc index 5ebdbba92..99c8328a1 100644 --- a/java/rocksjni/options_util.cc +++ b/java/forstjni/options_util.cc @@ -12,10 +12,10 @@ #include -#include "include/org_rocksdb_OptionsUtil.h" +#include "include/org_forstdb_OptionsUtil.h" #include "rocksdb/db.h" #include "rocksdb/env.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" void build_column_family_descriptor_list( JNIEnv* env, jobject jcfds, @@ -52,11 +52,11 @@ void build_column_family_descriptor_list( } /* - * Class: org_rocksdb_OptionsUtil + * Class: org_forstdb_OptionsUtil * Method: loadLatestOptions * Signature: (JLjava/lang/String;JLjava/util/List;)V */ -void Java_org_rocksdb_OptionsUtil_loadLatestOptions( +void Java_org_forstdb_OptionsUtil_loadLatestOptions( JNIEnv* env, jclass /*jcls*/, jlong cfg_handle, jstring jdbpath, jlong jdb_opts_handle, jobject jcfds) { jboolean has_exception = JNI_FALSE; @@ -82,11 +82,11 @@ void Java_org_rocksdb_OptionsUtil_loadLatestOptions( } /* - * Class: org_rocksdb_OptionsUtil + * Class: org_forstdb_OptionsUtil * Method: loadOptionsFromFile * Signature: (JLjava/lang/String;JLjava/util/List;)V */ -void Java_org_rocksdb_OptionsUtil_loadOptionsFromFile( +void Java_org_forstdb_OptionsUtil_loadOptionsFromFile( JNIEnv* env, jclass /*jcls*/, jlong cfg_handle, jstring jopts_file_name, jlong jdb_opts_handle, jobject jcfds) { jboolean has_exception = JNI_FALSE; @@ -112,11 +112,11 @@ void Java_org_rocksdb_OptionsUtil_loadOptionsFromFile( } /* - * Class: org_rocksdb_OptionsUtil + * Class: org_forstdb_OptionsUtil * Method: getLatestOptionsFileName * Signature: (Ljava/lang/String;J)Ljava/lang/String; */ -jstring Java_org_rocksdb_OptionsUtil_getLatestOptionsFileName( +jstring Java_org_forstdb_OptionsUtil_getLatestOptionsFileName( JNIEnv* env, jclass /*jcls*/, jstring jdbpath, jlong jenv_handle) { jboolean has_exception = JNI_FALSE; auto db_path = @@ -139,11 +139,11 @@ jstring Java_org_rocksdb_OptionsUtil_getLatestOptionsFileName( } /* - * Class: org_rocksdb_OptionsUtil + * Class: org_forstdb_OptionsUtil * Method: readTableFormatConfig * Signature: (J)Lorg/rocksdb/TableFormatConfig; */ -jobject Java_org_rocksdb_OptionsUtil_readTableFormatConfig(JNIEnv* env, jclass, +jobject Java_org_forstdb_OptionsUtil_readTableFormatConfig(JNIEnv* env, jclass, jlong jcf_options) { if (jcf_options == 0) { env->ThrowNew( diff --git a/java/rocksjni/persistent_cache.cc b/java/forstjni/persistent_cache.cc similarity index 85% rename from java/rocksjni/persistent_cache.cc rename to java/forstjni/persistent_cache.cc index 295d91798..f9a650751 100644 --- a/java/rocksjni/persistent_cache.cc +++ b/java/forstjni/persistent_cache.cc @@ -12,17 +12,17 @@ #include -#include "include/org_rocksdb_PersistentCache.h" +#include "include/org_forstdb_PersistentCache.h" #include "loggerjnicallback.h" #include "portal.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_PersistentCache + * Class: org_forstdb_PersistentCache * Method: newPersistentCache * Signature: (JLjava/lang/String;JJZ)J */ -jlong Java_org_rocksdb_PersistentCache_newPersistentCache( +jlong Java_org_forstdb_PersistentCache_newPersistentCache( JNIEnv* env, jclass, jlong jenv_handle, jstring jpath, jlong jsz, jlong jlogger_handle, jboolean joptimized_for_nvm) { auto* rocks_env = reinterpret_cast(jenv_handle); @@ -47,11 +47,11 @@ jlong Java_org_rocksdb_PersistentCache_newPersistentCache( } /* - * Class: org_rocksdb_PersistentCache + * Class: org_forstdb_PersistentCache * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_PersistentCache_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_PersistentCache_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* cache = reinterpret_cast*>( diff --git a/java/rocksjni/portal.h b/java/forstjni/portal.h similarity index 99% rename from java/rocksjni/portal.h rename to java/forstjni/portal.h index c13b8a666..1edb9a0f3 100644 --- a/java/rocksjni/portal.h +++ b/java/forstjni/portal.h @@ -35,16 +35,16 @@ #include "rocksdb/utilities/memory_util.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" -#include "rocksjni/compaction_filter_factory_jnicallback.h" -#include "rocksjni/comparatorjnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/event_listener_jnicallback.h" -#include "rocksjni/loggerjnicallback.h" -#include "rocksjni/table_filter_jnicallback.h" -#include "rocksjni/trace_writer_jnicallback.h" -#include "rocksjni/transaction_notifier_jnicallback.h" -#include "rocksjni/wal_filter_jnicallback.h" -#include "rocksjni/writebatchhandlerjnicallback.h" +#include "forstjni/compaction_filter_factory_jnicallback.h" +#include "forstjni/comparatorjnicallback.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/event_listener_jnicallback.h" +#include "forstjni/loggerjnicallback.h" +#include "forstjni/table_filter_jnicallback.h" +#include "forstjni/trace_writer_jnicallback.h" +#include "forstjni/transaction_notifier_jnicallback.h" +#include "forstjni/wal_filter_jnicallback.h" +#include "forstjni/writebatchhandlerjnicallback.h" // Remove macro on windows #ifdef DELETE diff --git a/java/rocksjni/ratelimiterjni.cc b/java/forstjni/ratelimiterjni.cc similarity index 79% rename from java/rocksjni/ratelimiterjni.cc rename to java/forstjni/ratelimiterjni.cc index 7a17f367e..83aead43c 100644 --- a/java/rocksjni/ratelimiterjni.cc +++ b/java/forstjni/ratelimiterjni.cc @@ -5,17 +5,17 @@ // // This file implements the "bridge" between Java and C++ for RateLimiter. -#include "include/org_rocksdb_RateLimiter.h" +#include "include/org_forstdb_RateLimiter.h" #include "rocksdb/rate_limiter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: newRateLimiterHandle * Signature: (JJIBZ)J */ -jlong Java_org_rocksdb_RateLimiter_newRateLimiterHandle( +jlong Java_org_forstdb_RateLimiter_newRateLimiterHandle( JNIEnv* /*env*/, jclass /*jclazz*/, jlong jrate_bytes_per_second, jlong jrefill_period_micros, jint jfairness, jbyte jrate_limiter_mode, jboolean jauto_tune) { @@ -32,11 +32,11 @@ jlong Java_org_rocksdb_RateLimiter_newRateLimiterHandle( } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RateLimiter_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_RateLimiter_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* handle = @@ -46,11 +46,11 @@ void Java_org_rocksdb_RateLimiter_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: setBytesPerSecond * Signature: (JJ)V */ -void Java_org_rocksdb_RateLimiter_setBytesPerSecond(JNIEnv* /*env*/, +void Java_org_forstdb_RateLimiter_setBytesPerSecond(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong jbytes_per_second) { @@ -60,11 +60,11 @@ void Java_org_rocksdb_RateLimiter_setBytesPerSecond(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: getBytesPerSecond * Signature: (J)J */ -jlong Java_org_rocksdb_RateLimiter_getBytesPerSecond(JNIEnv* /*env*/, +jlong Java_org_forstdb_RateLimiter_getBytesPerSecond(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( @@ -74,11 +74,11 @@ jlong Java_org_rocksdb_RateLimiter_getBytesPerSecond(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: request * Signature: (JJ)V */ -void Java_org_rocksdb_RateLimiter_request(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_RateLimiter_request(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong jbytes) { reinterpret_cast*>(handle) ->get() @@ -86,11 +86,11 @@ void Java_org_rocksdb_RateLimiter_request(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: getSingleBurstBytes * Signature: (J)J */ -jlong Java_org_rocksdb_RateLimiter_getSingleBurstBytes(JNIEnv* /*env*/, +jlong Java_org_forstdb_RateLimiter_getSingleBurstBytes(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( @@ -100,11 +100,11 @@ jlong Java_org_rocksdb_RateLimiter_getSingleBurstBytes(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: getTotalBytesThrough * Signature: (J)J */ -jlong Java_org_rocksdb_RateLimiter_getTotalBytesThrough(JNIEnv* /*env*/, +jlong Java_org_forstdb_RateLimiter_getTotalBytesThrough(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( @@ -114,11 +114,11 @@ jlong Java_org_rocksdb_RateLimiter_getTotalBytesThrough(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_RateLimiter + * Class: org_forstdb_RateLimiter * Method: getTotalRequests * Signature: (J)J */ -jlong Java_org_rocksdb_RateLimiter_getTotalRequests(JNIEnv* /*env*/, +jlong Java_org_forstdb_RateLimiter_getTotalRequests(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast*>( diff --git a/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc b/java/forstjni/remove_emptyvalue_compactionfilterjni.cc similarity index 75% rename from java/rocksjni/remove_emptyvalue_compactionfilterjni.cc rename to java/forstjni/remove_emptyvalue_compactionfilterjni.cc index c0b09e151..2164fc44c 100644 --- a/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc +++ b/java/forstjni/remove_emptyvalue_compactionfilterjni.cc @@ -5,16 +5,16 @@ #include -#include "include/org_rocksdb_RemoveEmptyValueCompactionFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "include/org_forstdb_RemoveEmptyValueCompactionFilter.h" +#include "forstjni/cplusplus_to_java_convert.h" #include "utilities/compaction_filters/remove_emptyvalue_compactionfilter.h" /* - * Class: org_rocksdb_RemoveEmptyValueCompactionFilter + * Class: org_forstdb_RemoveEmptyValueCompactionFilter * Method: createNewRemoveEmptyValueCompactionFilter0 * Signature: ()J */ -jlong Java_org_rocksdb_RemoveEmptyValueCompactionFilter_createNewRemoveEmptyValueCompactionFilter0( +jlong Java_org_forstdb_RemoveEmptyValueCompactionFilter_createNewRemoveEmptyValueCompactionFilter0( JNIEnv* /*env*/, jclass /*jcls*/) { auto* compaction_filter = new ROCKSDB_NAMESPACE::RemoveEmptyValueCompactionFilter(); diff --git a/java/rocksjni/restorejni.cc b/java/forstjni/restorejni.cc similarity index 76% rename from java/rocksjni/restorejni.cc rename to java/forstjni/restorejni.cc index aadc86128..a20c883cc 100644 --- a/java/rocksjni/restorejni.cc +++ b/java/forstjni/restorejni.cc @@ -13,27 +13,27 @@ #include -#include "include/org_rocksdb_RestoreOptions.h" +#include "include/org_forstdb_RestoreOptions.h" #include "rocksdb/utilities/backup_engine.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_RestoreOptions + * Class: org_forstdb_RestoreOptions * Method: newRestoreOptions * Signature: (Z)J */ -jlong Java_org_rocksdb_RestoreOptions_newRestoreOptions( +jlong Java_org_forstdb_RestoreOptions_newRestoreOptions( JNIEnv* /*env*/, jclass /*jcls*/, jboolean keep_log_files) { auto* ropt = new ROCKSDB_NAMESPACE::RestoreOptions(keep_log_files); return GET_CPLUSPLUS_POINTER(ropt); } /* - * Class: org_rocksdb_RestoreOptions + * Class: org_forstdb_RestoreOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RestoreOptions_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_RestoreOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* ropt = reinterpret_cast(jhandle); diff --git a/java/rocksjni/rocks_callback_object.cc b/java/forstjni/rocks_callback_object.cc similarity index 87% rename from java/rocksjni/rocks_callback_object.cc rename to java/forstjni/rocks_callback_object.cc index 35513e151..19a32866a 100644 --- a/java/rocksjni/rocks_callback_object.cc +++ b/java/forstjni/rocks_callback_object.cc @@ -8,15 +8,15 @@ #include -#include "include/org_rocksdb_RocksCallbackObject.h" +#include "include/org_forstdb_RocksCallbackObject.h" #include "jnicallback.h" /* - * Class: org_rocksdb_RocksCallbackObject + * Class: org_forstdb_RocksCallbackObject * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RocksCallbackObject_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_RocksCallbackObject_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { // TODO(AR) is deleting from the super class JniCallback OK, or must we delete diff --git a/java/rocksjni/rocksdb_exception_test.cc b/java/forstjni/rocksdb_exception_test.cc similarity index 72% rename from java/rocksjni/rocksdb_exception_test.cc rename to java/forstjni/rocksdb_exception_test.cc index 67e62f726..8150bb1ad 100644 --- a/java/rocksjni/rocksdb_exception_test.cc +++ b/java/forstjni/rocksdb_exception_test.cc @@ -5,50 +5,50 @@ #include -#include "include/org_rocksdb_RocksDBExceptionTest.h" +#include "include/org_forstdb_RocksDBExceptionTest.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_RocksDBExceptionTest + * Class: org_forstdb_RocksDBExceptionTest * Method: raiseException * Signature: ()V */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseException(JNIEnv* env, +void Java_org_forstdb_RocksDBExceptionTest_raiseException(JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, std::string("test message")); } /* - * Class: org_rocksdb_RocksDBExceptionTest + * Class: org_forstdb_RocksDBExceptionTest * Method: raiseExceptionWithStatusCode * Signature: ()V */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCode( +void Java_org_forstdb_RocksDBExceptionTest_raiseExceptionWithStatusCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "test message", ROCKSDB_NAMESPACE::Status::NotSupported()); } /* - * Class: org_rocksdb_RocksDBExceptionTest + * Class: org_forstdb_RocksDBExceptionTest * Method: raiseExceptionNoMsgWithStatusCode * Signature: ()V */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCode( +void Java_org_forstdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::NotSupported()); } /* - * Class: org_rocksdb_RocksDBExceptionTest + * Class: org_forstdb_RocksDBExceptionTest * Method: raiseExceptionWithStatusCodeSubCode * Signature: ()V */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeSubCode( +void Java_org_forstdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeSubCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, "test message", @@ -57,11 +57,11 @@ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeSubCode( } /* - * Class: org_rocksdb_RocksDBExceptionTest + * Class: org_forstdb_RocksDBExceptionTest * Method: raiseExceptionNoMsgWithStatusCodeSubCode * Signature: ()V */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCodeSubCode( +void Java_org_forstdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCodeSubCode( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( env, ROCKSDB_NAMESPACE::Status::TimedOut( @@ -69,11 +69,11 @@ void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCodeSubC } /* - * Class: org_rocksdb_RocksDBExceptionTest + * Class: org_forstdb_RocksDBExceptionTest * Method: raiseExceptionWithStatusCodeState * Signature: ()V */ -void Java_org_rocksdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeState( +void Java_org_forstdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeState( JNIEnv* env, jobject /*jobj*/) { ROCKSDB_NAMESPACE::Slice state("test state"); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew( diff --git a/java/rocksjni/rocksjni.cc b/java/forstjni/rocksjni.cc similarity index 92% rename from java/rocksjni/rocksjni.cc rename to java/forstjni/rocksjni.cc index 8b44a21dc..e9c9b7915 100644 --- a/java/rocksjni/rocksjni.cc +++ b/java/forstjni/rocksjni.cc @@ -17,7 +17,7 @@ #include #include -#include "include/org_rocksdb_RocksDB.h" +#include "include/org_forstdb_RocksDB.h" #include "rocksdb/cache.h" #include "rocksdb/convenience.h" #include "rocksdb/db.h" @@ -25,9 +25,9 @@ #include "rocksdb/perf_context.h" #include "rocksdb/types.h" #include "rocksdb/version.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/kv_helper.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/kv_helper.h" +#include "forstjni/portal.h" #ifdef min #undef min @@ -59,11 +59,11 @@ jlong rocksdb_open_helper(JNIEnv* env, jlong jopt_handle, jstring jdb_path, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: open * Signature: (JLjava/lang/String;)J */ -jlong Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2(JNIEnv* env, jclass, +jlong Java_org_forstdb_RocksDB_open__JLjava_lang_String_2(JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path) { return rocksdb_open_helper(env, jopt_handle, jdb_path, @@ -74,11 +74,11 @@ jlong Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: openROnly * Signature: (JLjava/lang/String;Z)J */ -jlong Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2Z( +jlong Java_org_forstdb_RocksDB_openROnly__JLjava_lang_String_2Z( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jboolean jerror_if_wal_file_exists) { const bool error_if_wal_file_exists = jerror_if_wal_file_exists == JNI_TRUE; @@ -178,11 +178,11 @@ jlongArray rocksdb_open_helper( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: openROnly * Signature: (JLjava/lang/String;[[B[JZ)[J */ -jlongArray Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2_3_3B_3JZ( +jlongArray Java_org_forstdb_RocksDB_openROnly__JLjava_lang_String_2_3_3B_3JZ( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options, jboolean jerror_if_wal_file_exists) { @@ -203,11 +203,11 @@ jlongArray Java_org_rocksdb_RocksDB_openROnly__JLjava_lang_String_2_3_3B_3JZ( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: open * Signature: (JLjava/lang/String;[[B[J)[J */ -jlongArray Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2_3_3B_3J( +jlongArray Java_org_forstdb_RocksDB_open__JLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options) { return rocksdb_open_helper( @@ -221,11 +221,11 @@ jlongArray Java_org_rocksdb_RocksDB_open__JLjava_lang_String_2_3_3B_3J( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: openAsSecondary * Signature: (JLjava/lang/String;Ljava/lang/String;)J */ -jlong Java_org_rocksdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2( +jlong Java_org_forstdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jstring jsecondary_db_path) { const char* secondary_db_path = @@ -251,12 +251,12 @@ jlong Java_org_rocksdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_S } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: openAsSecondary * Signature: (JLjava/lang/String;Ljava/lang/String;[[B[J)[J */ jlongArray -Java_org_rocksdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2_3_3B_3J( +Java_org_forstdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jstring jsecondary_db_path, jobjectArray jcolumn_names, jlongArray jcolumn_options) { @@ -287,22 +287,22 @@ Java_org_rocksdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_ } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { +void Java_org_forstdb_RocksDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* db = reinterpret_cast(jhandle); assert(db != nullptr); delete db; } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: closeDatabase * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_closeDatabase(JNIEnv* env, jclass, +void Java_org_forstdb_RocksDB_closeDatabase(JNIEnv* env, jclass, jlong jhandle) { auto* db = reinterpret_cast(jhandle); assert(db != nullptr); @@ -311,11 +311,11 @@ void Java_org_rocksdb_RocksDB_closeDatabase(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: listColumnFamilies * Signature: (JLjava/lang/String;)[[B */ -jobjectArray Java_org_rocksdb_RocksDB_listColumnFamilies(JNIEnv* env, jclass, +jobjectArray Java_org_forstdb_RocksDB_listColumnFamilies(JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path) { std::vector column_family_names; @@ -338,11 +338,11 @@ jobjectArray Java_org_rocksdb_RocksDB_listColumnFamilies(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: createColumnFamily * Signature: (J[BIJ)J */ -jlong Java_org_rocksdb_RocksDB_createColumnFamily(JNIEnv* env, jobject, +jlong Java_org_forstdb_RocksDB_createColumnFamily(JNIEnv* env, jobject, jlong jhandle, jbyteArray jcf_name, jint jcf_name_len, @@ -374,11 +374,11 @@ jlong Java_org_rocksdb_RocksDB_createColumnFamily(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: createColumnFamilies * Signature: (JJ[[B)[J */ -jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__JJ_3_3B( +jlongArray Java_org_forstdb_RocksDB_createColumnFamilies__JJ_3_3B( JNIEnv* env, jobject, jlong jhandle, jlong jcf_options_handle, jobjectArray jcf_names) { auto* db = reinterpret_cast(jhandle); @@ -415,11 +415,11 @@ jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__JJ_3_3B( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: createColumnFamilies * Signature: (J[J[[B)[J */ -jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__J_3J_3_3B( +jlongArray Java_org_forstdb_RocksDB_createColumnFamilies__J_3J_3_3B( JNIEnv* env, jobject, jlong jhandle, jlongArray jcf_options_handles, jobjectArray jcf_names) { auto* db = reinterpret_cast(jhandle); @@ -492,11 +492,11 @@ jlongArray Java_org_rocksdb_RocksDB_createColumnFamilies__J_3J_3_3B( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: createColumnFamilyWithImport * Signature: (J[BIJJ[J)J */ -jlong Java_org_rocksdb_RocksDB_createColumnFamilyWithImport( +jlong Java_org_forstdb_RocksDB_createColumnFamilyWithImport( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jcf_name, jint jcf_name_len, jlong j_cf_options, jlong j_cf_import_options, jlongArray j_metadata_handle_array) { @@ -549,11 +549,11 @@ jlong Java_org_rocksdb_RocksDB_createColumnFamilyWithImport( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: dropColumnFamily * Signature: (JJ)V; */ -void Java_org_rocksdb_RocksDB_dropColumnFamily(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_dropColumnFamily(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); @@ -566,11 +566,11 @@ void Java_org_rocksdb_RocksDB_dropColumnFamily(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: dropColumnFamilies * Signature: (J[J)V */ -void Java_org_rocksdb_RocksDB_dropColumnFamilies( +void Java_org_forstdb_RocksDB_dropColumnFamilies( JNIEnv* env, jobject, jlong jdb_handle, jlongArray jcolumn_family_handles) { auto* db_handle = reinterpret_cast(jdb_handle); @@ -602,11 +602,11 @@ void Java_org_rocksdb_RocksDB_dropColumnFamilies( // ROCKSDB_NAMESPACE::DB::Put /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: put * Signature: (J[BII[BII)V */ -void Java_org_rocksdb_RocksDB_put__J_3BII_3BII(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_put__J_3BII_3BII(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, @@ -625,11 +625,11 @@ void Java_org_rocksdb_RocksDB_put__J_3BII_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: put * Signature: (J[BII[BIIJ)V */ -void Java_org_rocksdb_RocksDB_put__J_3BII_3BIIJ(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_put__J_3BII_3BIIJ(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, @@ -659,11 +659,11 @@ void Java_org_rocksdb_RocksDB_put__J_3BII_3BIIJ(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: put * Signature: (JJ[BII[BII)V */ -void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BII(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_put__JJ_3BII_3BII(JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, @@ -684,11 +684,11 @@ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: put * Signature: (JJ[BII[BIIJ)V */ -void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BIIJ( +void Java_org_forstdb_RocksDB_put__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { @@ -714,11 +714,11 @@ void Java_org_rocksdb_RocksDB_put__JJ_3BII_3BIIJ( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: putDirect * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_RocksDB_putDirect( +void Java_org_forstdb_RocksDB_putDirect( JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len, jlong jcf_handle) { @@ -784,11 +784,11 @@ bool rocksdb_delete_helper(JNIEnv* env, ROCKSDB_NAMESPACE::DB* db, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: delete * Signature: (J[BII)V */ -void Java_org_rocksdb_RocksDB_delete__J_3BII(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_delete__J_3BII(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { auto* db = reinterpret_cast(jdb_handle); @@ -799,11 +799,11 @@ void Java_org_rocksdb_RocksDB_delete__J_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: delete * Signature: (J[BIIJ)V */ -void Java_org_rocksdb_RocksDB_delete__J_3BIIJ(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_delete__J_3BIIJ(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { @@ -823,11 +823,11 @@ void Java_org_rocksdb_RocksDB_delete__J_3BIIJ(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: delete * Signature: (JJ[BII)V */ -void Java_org_rocksdb_RocksDB_delete__JJ_3BII(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_delete__JJ_3BII(JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_off, @@ -840,11 +840,11 @@ void Java_org_rocksdb_RocksDB_delete__JJ_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: delete * Signature: (JJ[BIIJ)V */ -void Java_org_rocksdb_RocksDB_delete__JJ_3BIIJ( +void Java_org_forstdb_RocksDB_delete__JJ_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -901,11 +901,11 @@ bool rocksdb_single_delete_helper( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: singleDelete * Signature: (J[BI)V */ -void Java_org_rocksdb_RocksDB_singleDelete__J_3BI(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_singleDelete__J_3BI(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_len) { @@ -917,11 +917,11 @@ void Java_org_rocksdb_RocksDB_singleDelete__J_3BI(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: singleDelete * Signature: (J[BIJ)V */ -void Java_org_rocksdb_RocksDB_singleDelete__J_3BIJ(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_singleDelete__J_3BIJ(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_len, @@ -942,11 +942,11 @@ void Java_org_rocksdb_RocksDB_singleDelete__J_3BIJ(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: singleDelete * Signature: (JJ[BIJ)V */ -void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BI(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_singleDelete__JJ_3BI(JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, @@ -959,11 +959,11 @@ void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BI(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: singleDelete * Signature: (JJ[BIJ)V */ -void Java_org_rocksdb_RocksDB_singleDelete__JJ_3BIJ( +void Java_org_forstdb_RocksDB_singleDelete__JJ_3BIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -1036,11 +1036,11 @@ bool rocksdb_delete_range_helper( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteRange * Signature: (J[BII[BII)V */ -void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BII( +void Java_org_forstdb_RocksDB_deleteRange__J_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { @@ -1140,11 +1140,11 @@ jint rocksdb_get_helper_direct( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteRange * Signature: (J[BII[BIIJ)V */ -void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BIIJ( +void Java_org_forstdb_RocksDB_deleteRange__J_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len, jlong jcf_handle) { @@ -1165,11 +1165,11 @@ void Java_org_rocksdb_RocksDB_deleteRange__J_3BII_3BIIJ( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteRange * Signature: (JJ[BII[BII)V */ -void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BII( +void Java_org_forstdb_RocksDB_deleteRange__JJ_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { @@ -1182,11 +1182,11 @@ void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BII( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteRange * Signature: (JJ[BII[BIIJ)V */ -void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BIIJ( +void Java_org_forstdb_RocksDB_deleteRange__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len, @@ -1208,11 +1208,11 @@ void Java_org_rocksdb_RocksDB_deleteRange__JJ_3BII_3BIIJ( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: clipColumnFamily * Signature: (JJ[BII[BII)V */ -void Java_org_rocksdb_RocksDB_clipColumnFamily( +void Java_org_forstdb_RocksDB_clipColumnFamily( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jbyteArray jbegin_key, jint jbegin_key_off, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_off, jint jend_key_len) { @@ -1263,11 +1263,11 @@ void Java_org_rocksdb_RocksDB_clipColumnFamily( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getDirect * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I */ -jint Java_org_rocksdb_RocksDB_getDirect(JNIEnv* env, jobject /*jdb*/, +jint Java_org_forstdb_RocksDB_getDirect(JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jropt_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, @@ -1289,11 +1289,11 @@ jint Java_org_rocksdb_RocksDB_getDirect(JNIEnv* env, jobject /*jdb*/, // ROCKSDB_NAMESPACE::DB::Merge /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: merge * Signature: (J[BII[BII)V */ -void Java_org_rocksdb_RocksDB_merge__J_3BII_3BII(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_merge__J_3BII_3BII(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, @@ -1312,11 +1312,11 @@ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: merge * Signature: (J[BII[BIIJ)V */ -void Java_org_rocksdb_RocksDB_merge__J_3BII_3BIIJ( +void Java_org_forstdb_RocksDB_merge__J_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { @@ -1343,11 +1343,11 @@ void Java_org_rocksdb_RocksDB_merge__J_3BII_3BIIJ( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: merge * Signature: (JJ[BII[BII)V */ -void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BII( +void Java_org_forstdb_RocksDB_merge__JJ_3BII_3BII( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len) { @@ -1365,11 +1365,11 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BII( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: merge * Signature: (JJ[BII[BIIJ)V */ -void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( +void Java_org_forstdb_RocksDB_merge__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { @@ -1396,11 +1396,11 @@ void Java_org_rocksdb_RocksDB_merge__JJ_3BII_3BIIJ( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: mergeDirect * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_RocksDB_mergeDirect( +void Java_org_forstdb_RocksDB_mergeDirect( JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, jint jval_len, jlong jcf_handle) { @@ -1429,11 +1429,11 @@ void Java_org_rocksdb_RocksDB_mergeDirect( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteDirect * Signature: (JJLjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_RocksDB_deleteDirect(JNIEnv* env, jobject /*jdb*/, +void Java_org_forstdb_RocksDB_deleteDirect(JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jwrite_options, jobject jkey, jint jkey_offset, jint jkey_len, @@ -1463,11 +1463,11 @@ void Java_org_rocksdb_RocksDB_deleteDirect(JNIEnv* env, jobject /*jdb*/, ////////////////////////////////////////////////////////////////////////////// // ROCKSDB_NAMESPACE::DB::Write /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: write0 * Signature: (JJJ)V */ -void Java_org_rocksdb_RocksDB_write0(JNIEnv* env, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_write0(JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jlong jwb_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -1483,11 +1483,11 @@ void Java_org_rocksdb_RocksDB_write0(JNIEnv* env, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: write1 * Signature: (JJJ)V */ -void Java_org_rocksdb_RocksDB_write1(JNIEnv* env, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_write1(JNIEnv* env, jobject, jlong jdb_handle, jlong jwrite_options_handle, jlong jwbwi_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -1554,11 +1554,11 @@ jbyteArray rocksdb_get_helper( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (J[BII)[B */ -jbyteArray Java_org_rocksdb_RocksDB_get__J_3BII(JNIEnv* env, jobject, +jbyteArray Java_org_forstdb_RocksDB_get__J_3BII(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { @@ -1568,11 +1568,11 @@ jbyteArray Java_org_rocksdb_RocksDB_get__J_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (J[BIIJ)[B */ -jbyteArray Java_org_rocksdb_RocksDB_get__J_3BIIJ(JNIEnv* env, jobject, +jbyteArray Java_org_forstdb_RocksDB_get__J_3BIIJ(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, @@ -1592,11 +1592,11 @@ jbyteArray Java_org_rocksdb_RocksDB_get__J_3BIIJ(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (JJ[BII)[B */ -jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BII(JNIEnv* env, jobject, +jbyteArray Java_org_forstdb_RocksDB_get__JJ_3BII(JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, @@ -1608,11 +1608,11 @@ jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (JJ[BIIJ)[B */ -jbyteArray Java_org_rocksdb_RocksDB_get__JJ_3BIIJ( +jbyteArray Java_org_forstdb_RocksDB_get__JJ_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jlong jcf_handle) { auto* db_handle = reinterpret_cast(jdb_handle); @@ -1697,11 +1697,11 @@ jint rocksdb_get_helper( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (J[BII[BII)I */ -jint Java_org_rocksdb_RocksDB_get__J_3BII_3BII(JNIEnv* env, jobject, +jint Java_org_forstdb_RocksDB_get__J_3BII_3BII(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, @@ -1714,11 +1714,11 @@ jint Java_org_rocksdb_RocksDB_get__J_3BII_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (J[BII[BIIJ)I */ -jint Java_org_rocksdb_RocksDB_get__J_3BII_3BIIJ(JNIEnv* env, jobject, +jint Java_org_forstdb_RocksDB_get__J_3BII_3BIIJ(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, @@ -1742,11 +1742,11 @@ jint Java_org_rocksdb_RocksDB_get__J_3BII_3BIIJ(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (JJ[BII[BII)I */ -jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BII(JNIEnv* env, jobject, +jint Java_org_forstdb_RocksDB_get__JJ_3BII_3BII(JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, @@ -1760,11 +1760,11 @@ jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BII(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: get * Signature: (JJ[BII[BIIJ)I */ -jint Java_org_rocksdb_RocksDB_get__JJ_3BII_3BIIJ( +jint Java_org_forstdb_RocksDB_get__JJ_3BII_3BIIJ( JNIEnv* env, jobject, jlong jdb_handle, jlong jropt_handle, jbyteArray jkey, jint jkey_off, jint jkey_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcf_handle) { @@ -2150,11 +2150,11 @@ void multi_get_helper_direct(JNIEnv* env, jobject, ROCKSDB_NAMESPACE::DB* db, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: multiGet * Signature: (J[[B[I[I)[[B */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I( +jobjectArray Java_org_forstdb_RocksDB_multiGet__J_3_3B_3I_3I( JNIEnv* env, jobject jdb, jlong jdb_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens) { return multi_get_helper( @@ -2163,11 +2163,11 @@ jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: multiGet * Signature: (J[[B[I[I[J)[[B */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I_3J( +jobjectArray Java_org_forstdb_RocksDB_multiGet__J_3_3B_3I_3I_3J( JNIEnv* env, jobject jdb, jlong jdb_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens, jlongArray jcolumn_family_handles) { @@ -2178,11 +2178,11 @@ jobjectArray Java_org_rocksdb_RocksDB_multiGet__J_3_3B_3I_3I_3J( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: multiGet * Signature: (JJ[[B[I[I)[[B */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I( +jobjectArray Java_org_forstdb_RocksDB_multiGet__JJ_3_3B_3I_3I( JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens) { return multi_get_helper( @@ -2192,11 +2192,11 @@ jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: multiGet * Signature: (JJ[[B[I[I[J)[[B */ -jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J( +jobjectArray Java_org_forstdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J( JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, jobjectArray jkeys, jintArray jkey_offs, jintArray jkey_lens, jlongArray jcolumn_family_handles) { @@ -2207,12 +2207,12 @@ jobjectArray Java_org_rocksdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: multiGet * Signature: * (JJ[J[Ljava/nio/ByteBuffer;[I[I[Ljava/nio/ByteBuffer;[I[Lorg/rocksdb/Status;)V */ -void Java_org_rocksdb_RocksDB_multiGet__JJ_3J_3Ljava_nio_ByteBuffer_2_3I_3I_3Ljava_nio_ByteBuffer_2_3I_3Lorg_rocksdb_Status_2( +void Java_org_forstdb_RocksDB_multiGet__JJ_3J_3Ljava_nio_ByteBuffer_2_3I_3I_3Ljava_nio_ByteBuffer_2_3I_3Lorg_forstdb_Status_2( JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, jlongArray jcolumn_family_handles, jobjectArray jkeys, jintArray jkey_offsets, jintArray jkey_lengths, jobjectArray jvalues, @@ -2357,11 +2357,11 @@ jboolean key_exists_helper(JNIEnv* env, jlong jdb_handle, jlong jcf_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: keyExist * Signature: (JJJ[BII)Z */ -jboolean Java_org_rocksdb_RocksDB_keyExists(JNIEnv* env, jobject, +jboolean Java_org_forstdb_RocksDB_keyExists(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, @@ -2387,11 +2387,11 @@ jboolean Java_org_rocksdb_RocksDB_keyExists(JNIEnv* env, jobject, final int keyLength); - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: keyExistDirect * Signature: (JJJLjava/nio/ByteBuffer;II)Z */ -jboolean Java_org_rocksdb_RocksDB_keyExistsDirect( +jboolean Java_org_forstdb_RocksDB_keyExistsDirect( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jobject jkey, jint jkey_offset, jint jkey_len) { char* key = reinterpret_cast(env->GetDirectBufferAddress(jkey)); @@ -2414,11 +2414,11 @@ jboolean Java_org_rocksdb_RocksDB_keyExistsDirect( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: keyMayExist * Signature: (JJJ[BII)Z */ -jboolean Java_org_rocksdb_RocksDB_keyMayExist( +jboolean Java_org_forstdb_RocksDB_keyMayExist( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { bool has_exception = false; @@ -2438,11 +2438,11 @@ jboolean Java_org_rocksdb_RocksDB_keyMayExist( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: keyMayExistDirect * Signature: (JJJLjava/nio/ByteBuffer;II)Z */ -jboolean Java_org_rocksdb_RocksDB_keyMayExistDirect( +jboolean Java_org_forstdb_RocksDB_keyMayExistDirect( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jobject jkey, jint jkey_offset, jint jkey_len) { bool has_exception = false; @@ -2461,12 +2461,12 @@ jboolean Java_org_rocksdb_RocksDB_keyMayExistDirect( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: keyMayExistDirectFoundValue * Signature: * (JJJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)[J */ -jintArray Java_org_rocksdb_RocksDB_keyMayExistDirectFoundValue( +jintArray Java_org_forstdb_RocksDB_keyMayExistDirectFoundValue( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jobject jkey, jint jkey_offset, jint jkey_len, jobject jval, jint jval_offset, jint jval_len) { @@ -2533,11 +2533,11 @@ jintArray Java_org_rocksdb_RocksDB_keyMayExistDirectFoundValue( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: keyMayExistFoundValue * Signature: (JJJ[BII)[[B */ -jobjectArray Java_org_rocksdb_RocksDB_keyMayExistFoundValue( +jobjectArray Java_org_forstdb_RocksDB_keyMayExistFoundValue( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jread_opts_handle, jbyteArray jkey, jint jkey_offset, jint jkey_len) { bool has_exception = false; @@ -2621,11 +2621,11 @@ jobjectArray Java_org_rocksdb_RocksDB_keyMayExistFoundValue( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: iterator * Signature: (JJJ)J */ -jlong Java_org_rocksdb_RocksDB_iterator(JNIEnv*, jobject, jlong db_handle, +jlong Java_org_forstdb_RocksDB_iterator(JNIEnv*, jobject, jlong db_handle, jlong jcf_handle, jlong jread_options_handle) { auto* db = reinterpret_cast(db_handle); @@ -2637,11 +2637,11 @@ jlong Java_org_rocksdb_RocksDB_iterator(JNIEnv*, jobject, jlong db_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: iterators * Signature: (J[JJ)[J */ -jlongArray Java_org_rocksdb_RocksDB_iterators(JNIEnv* env, jobject, +jlongArray Java_org_forstdb_RocksDB_iterators(JNIEnv* env, jobject, jlong db_handle, jlongArray jcolumn_family_handles, jlong jread_options_handle) { @@ -2701,7 +2701,7 @@ jlongArray Java_org_rocksdb_RocksDB_iterators(JNIEnv* env, jobject, * Method: getSnapshot * Signature: (J)J */ -jlong Java_org_rocksdb_RocksDB_getSnapshot(JNIEnv*, jobject, jlong db_handle) { +jlong Java_org_forstdb_RocksDB_getSnapshot(JNIEnv*, jobject, jlong db_handle) { auto* db = reinterpret_cast(db_handle); const ROCKSDB_NAMESPACE::Snapshot* snapshot = db->GetSnapshot(); return GET_CPLUSPLUS_POINTER(snapshot); @@ -2711,7 +2711,7 @@ jlong Java_org_rocksdb_RocksDB_getSnapshot(JNIEnv*, jobject, jlong db_handle) { * Method: releaseSnapshot * Signature: (JJ)V */ -void Java_org_rocksdb_RocksDB_releaseSnapshot(JNIEnv*, jobject, jlong db_handle, +void Java_org_forstdb_RocksDB_releaseSnapshot(JNIEnv*, jobject, jlong db_handle, jlong snapshot_handle) { auto* db = reinterpret_cast(db_handle); auto* snapshot = @@ -2720,11 +2720,11 @@ void Java_org_rocksdb_RocksDB_releaseSnapshot(JNIEnv*, jobject, jlong db_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getProperty * Signature: (JJLjava/lang/String;I)Ljava/lang/String; */ -jstring Java_org_rocksdb_RocksDB_getProperty(JNIEnv* env, jobject, +jstring Java_org_forstdb_RocksDB_getProperty(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jstring jproperty, jint jproperty_len) { @@ -2758,11 +2758,11 @@ jstring Java_org_rocksdb_RocksDB_getProperty(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getMapProperty * Signature: (JJLjava/lang/String;I)Ljava/util/Map; */ -jobject Java_org_rocksdb_RocksDB_getMapProperty(JNIEnv* env, jobject, +jobject Java_org_forstdb_RocksDB_getMapProperty(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jstring jproperty, @@ -2797,11 +2797,11 @@ jobject Java_org_rocksdb_RocksDB_getMapProperty(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getLongProperty * Signature: (JJLjava/lang/String;I)J */ -jlong Java_org_rocksdb_RocksDB_getLongProperty(JNIEnv* env, jobject, +jlong Java_org_forstdb_RocksDB_getLongProperty(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jstring jproperty, @@ -2836,21 +2836,21 @@ jlong Java_org_rocksdb_RocksDB_getLongProperty(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: resetStats * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_resetStats(JNIEnv*, jobject, jlong jdb_handle) { +void Java_org_forstdb_RocksDB_resetStats(JNIEnv*, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); db->ResetStats(); } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getAggregatedLongProperty * Signature: (JLjava/lang/String;I)J */ -jlong Java_org_rocksdb_RocksDB_getAggregatedLongProperty(JNIEnv* env, jobject, +jlong Java_org_forstdb_RocksDB_getAggregatedLongProperty(JNIEnv* env, jobject, jlong db_handle, jstring jproperty, jint jproperty_len) { @@ -2874,11 +2874,11 @@ jlong Java_org_rocksdb_RocksDB_getAggregatedLongProperty(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getApproximateSizes * Signature: (JJ[JB)[J */ -jlongArray Java_org_rocksdb_RocksDB_getApproximateSizes( +jlongArray Java_org_forstdb_RocksDB_getApproximateSizes( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlongArray jrange_slice_handles, jbyte jinclude_flags) { const jsize jlen = env->GetArrayLength(jrange_slice_handles); @@ -2953,11 +2953,11 @@ jlongArray Java_org_rocksdb_RocksDB_getApproximateSizes( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getApproximateMemTableStats * Signature: (JJJJ)[J */ -jlongArray Java_org_rocksdb_RocksDB_getApproximateMemTableStats( +jlongArray Java_org_forstdb_RocksDB_getApproximateMemTableStats( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlong jstartHandle, jlong jlimitHandle) { auto* start = reinterpret_cast(jstartHandle); @@ -2997,11 +2997,11 @@ jlongArray Java_org_rocksdb_RocksDB_getApproximateMemTableStats( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: compactRange * Signature: (J[BI[BIJJ)V */ -void Java_org_rocksdb_RocksDB_compactRange(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_compactRange(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jbegin, jint jbegin_len, jbyteArray jend, jint jend_len, @@ -3073,11 +3073,11 @@ void Java_org_rocksdb_RocksDB_compactRange(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: setOptions * Signature: (JJ[Ljava/lang/String;[Ljava/lang/String;)V */ -void Java_org_rocksdb_RocksDB_setOptions(JNIEnv* env, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_setOptions(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jobjectArray jkeys, jobjectArray jvalues) { const jsize len = env->GetArrayLength(jkeys); @@ -3136,11 +3136,11 @@ void Java_org_rocksdb_RocksDB_setOptions(JNIEnv* env, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: setDBOptions * Signature: (J[Ljava/lang/String;[Ljava/lang/String;)V */ -void Java_org_rocksdb_RocksDB_setDBOptions(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_setDBOptions(JNIEnv* env, jobject, jlong jdb_handle, jobjectArray jkeys, jobjectArray jvalues) { const jsize len = env->GetArrayLength(jkeys); @@ -3194,11 +3194,11 @@ void Java_org_rocksdb_RocksDB_setDBOptions(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getOptions * Signature: (JJ)Ljava/lang/String; */ -jstring Java_org_rocksdb_RocksDB_getOptions(JNIEnv* env, jobject, +jstring Java_org_forstdb_RocksDB_getOptions(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3223,11 +3223,11 @@ jstring Java_org_rocksdb_RocksDB_getOptions(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getDBOptions * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_RocksDB_getDBOptions(JNIEnv* env, jobject, +jstring Java_org_forstdb_RocksDB_getDBOptions(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3243,42 +3243,42 @@ jstring Java_org_rocksdb_RocksDB_getDBOptions(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: setPerfLevel * Signature: (JB)V */ -void Java_org_rocksdb_RocksDB_setPerfLevel(JNIEnv*, jobject, +void Java_org_forstdb_RocksDB_setPerfLevel(JNIEnv*, jobject, jbyte jperf_level) { - rocksdb::SetPerfLevel( + ROCKSDB_NAMESPACE::SetPerfLevel( ROCKSDB_NAMESPACE::PerfLevelTypeJni::toCppPerfLevelType(jperf_level)); } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getPerfLevel * Signature: (J)B */ -jbyte Java_org_rocksdb_RocksDB_getPerfLevelNative(JNIEnv*, jobject) { +jbyte Java_org_forstdb_RocksDB_getPerfLevelNative(JNIEnv*, jobject) { return ROCKSDB_NAMESPACE::PerfLevelTypeJni::toJavaPerfLevelType( - rocksdb::GetPerfLevel()); + ROCKSDB_NAMESPACE::GetPerfLevel()); } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getPerfContextNative * Signature: ()J */ -jlong Java_org_rocksdb_RocksDB_getPerfContextNative(JNIEnv*, jobject) { - ROCKSDB_NAMESPACE::PerfContext* perf_context = rocksdb::get_perf_context(); +jlong Java_org_forstdb_RocksDB_getPerfContextNative(JNIEnv*, jobject) { + ROCKSDB_NAMESPACE::PerfContext* perf_context = ROCKSDB_NAMESPACE::get_perf_context(); return reinterpret_cast(perf_context); } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: compactFiles * Signature: (JJJ[Ljava/lang/String;IIJ)[Ljava/lang/String; */ -jobjectArray Java_org_rocksdb_RocksDB_compactFiles( +jobjectArray Java_org_forstdb_RocksDB_compactFiles( JNIEnv* env, jobject, jlong jdb_handle, jlong jcompaction_opts_handle, jlong jcf_handle, jobjectArray jinput_file_names, jint joutput_level, jint joutput_path_id, jlong jcompaction_job_info_handle) { @@ -3324,11 +3324,11 @@ jobjectArray Java_org_rocksdb_RocksDB_compactFiles( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: cancelAllBackgroundWork * Signature: (JZ)V */ -void Java_org_rocksdb_RocksDB_cancelAllBackgroundWork(JNIEnv*, jobject, +void Java_org_forstdb_RocksDB_cancelAllBackgroundWork(JNIEnv*, jobject, jlong jdb_handle, jboolean jwait) { auto* db = reinterpret_cast(jdb_handle); @@ -3336,11 +3336,11 @@ void Java_org_rocksdb_RocksDB_cancelAllBackgroundWork(JNIEnv*, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: pauseBackgroundWork * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_pauseBackgroundWork(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_pauseBackgroundWork(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->PauseBackgroundWork(); @@ -3350,11 +3350,11 @@ void Java_org_rocksdb_RocksDB_pauseBackgroundWork(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: continueBackgroundWork * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_continueBackgroundWork(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_continueBackgroundWork(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->ContinueBackgroundWork(); @@ -3364,11 +3364,11 @@ void Java_org_rocksdb_RocksDB_continueBackgroundWork(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: enableAutoCompaction * Signature: (J[J)V */ -void Java_org_rocksdb_RocksDB_enableAutoCompaction(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_enableAutoCompaction(JNIEnv* env, jobject, jlong jdb_handle, jlongArray jcf_handles) { auto* db = reinterpret_cast(jdb_handle); @@ -3385,11 +3385,11 @@ void Java_org_rocksdb_RocksDB_enableAutoCompaction(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: numberLevels * Signature: (JJ)I */ -jint Java_org_rocksdb_RocksDB_numberLevels(JNIEnv*, jobject, jlong jdb_handle, +jint Java_org_forstdb_RocksDB_numberLevels(JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; @@ -3403,11 +3403,11 @@ jint Java_org_rocksdb_RocksDB_numberLevels(JNIEnv*, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: maxMemCompactionLevel * Signature: (JJ)I */ -jint Java_org_rocksdb_RocksDB_maxMemCompactionLevel(JNIEnv*, jobject, +jint Java_org_forstdb_RocksDB_maxMemCompactionLevel(JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3422,11 +3422,11 @@ jint Java_org_rocksdb_RocksDB_maxMemCompactionLevel(JNIEnv*, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: level0StopWriteTrigger * Signature: (JJ)I */ -jint Java_org_rocksdb_RocksDB_level0StopWriteTrigger(JNIEnv*, jobject, +jint Java_org_forstdb_RocksDB_level0StopWriteTrigger(JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3441,11 +3441,11 @@ jint Java_org_rocksdb_RocksDB_level0StopWriteTrigger(JNIEnv*, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getName * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_RocksDB_getName(JNIEnv* env, jobject, +jstring Java_org_forstdb_RocksDB_getName(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); std::string name = db->GetName(); @@ -3453,21 +3453,21 @@ jstring Java_org_rocksdb_RocksDB_getName(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getEnv * Signature: (J)J */ -jlong Java_org_rocksdb_RocksDB_getEnv(JNIEnv*, jobject, jlong jdb_handle) { +jlong Java_org_forstdb_RocksDB_getEnv(JNIEnv*, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); return GET_CPLUSPLUS_POINTER(db->GetEnv()); } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: flush * Signature: (JJ[J)V */ -void Java_org_rocksdb_RocksDB_flush(JNIEnv* env, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_flush(JNIEnv* env, jobject, jlong jdb_handle, jlong jflush_opts_handle, jlongArray jcf_handles) { auto* db = reinterpret_cast(jdb_handle); @@ -3493,11 +3493,11 @@ void Java_org_rocksdb_RocksDB_flush(JNIEnv* env, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: flushWal * Signature: (JZ)V */ -void Java_org_rocksdb_RocksDB_flushWal(JNIEnv* env, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_flushWal(JNIEnv* env, jobject, jlong jdb_handle, jboolean jsync) { auto* db = reinterpret_cast(jdb_handle); auto s = db->FlushWAL(jsync == JNI_TRUE); @@ -3507,11 +3507,11 @@ void Java_org_rocksdb_RocksDB_flushWal(JNIEnv* env, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: syncWal * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_syncWal(JNIEnv* env, jobject, jlong jdb_handle) { +void Java_org_forstdb_RocksDB_syncWal(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->SyncWAL(); if (!s.ok()) { @@ -3520,22 +3520,22 @@ void Java_org_rocksdb_RocksDB_syncWal(JNIEnv* env, jobject, jlong jdb_handle) { } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getLatestSequenceNumber * Signature: (J)V */ -jlong Java_org_rocksdb_RocksDB_getLatestSequenceNumber(JNIEnv*, jobject, +jlong Java_org_forstdb_RocksDB_getLatestSequenceNumber(JNIEnv*, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); return db->GetLatestSequenceNumber(); } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: disableFileDeletions * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_disableFileDeletions(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_disableFileDeletions(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::Status s = db->DisableFileDeletions(); @@ -3545,11 +3545,11 @@ void Java_org_rocksdb_RocksDB_disableFileDeletions(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: enableFileDeletions * Signature: (JZ)V */ -void Java_org_rocksdb_RocksDB_enableFileDeletions(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_enableFileDeletions(JNIEnv* env, jobject, jlong jdb_handle, jboolean jforce) { auto* db = reinterpret_cast(jdb_handle); @@ -3560,11 +3560,11 @@ void Java_org_rocksdb_RocksDB_enableFileDeletions(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getLiveFiles * Signature: (JZ)[Ljava/lang/String; */ -jobjectArray Java_org_rocksdb_RocksDB_getLiveFiles(JNIEnv* env, jobject, +jobjectArray Java_org_forstdb_RocksDB_getLiveFiles(JNIEnv* env, jobject, jlong jdb_handle, jboolean jflush_memtable) { auto* db = reinterpret_cast(jdb_handle); @@ -3585,11 +3585,11 @@ jobjectArray Java_org_rocksdb_RocksDB_getLiveFiles(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getSortedWalFiles * Signature: (J)[Lorg/rocksdb/LogFile; */ -jobjectArray Java_org_rocksdb_RocksDB_getSortedWalFiles(JNIEnv* env, jobject, +jobjectArray Java_org_forstdb_RocksDB_getSortedWalFiles(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); std::vector> sorted_wal_files; @@ -3633,11 +3633,11 @@ jobjectArray Java_org_rocksdb_RocksDB_getSortedWalFiles(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getUpdatesSince * Signature: (JJ)J */ -jlong Java_org_rocksdb_RocksDB_getUpdatesSince(JNIEnv* env, jobject, +jlong Java_org_forstdb_RocksDB_getUpdatesSince(JNIEnv* env, jobject, jlong jdb_handle, jlong jsequence_number) { auto* db = reinterpret_cast(jdb_handle); @@ -3654,11 +3654,11 @@ jlong Java_org_rocksdb_RocksDB_getUpdatesSince(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteFile * Signature: (JLjava/lang/String;)V */ -void Java_org_rocksdb_RocksDB_deleteFile(JNIEnv* env, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_deleteFile(JNIEnv* env, jobject, jlong jdb_handle, jstring jname) { auto* db = reinterpret_cast(jdb_handle); jboolean has_exception = JNI_FALSE; @@ -3672,11 +3672,11 @@ void Java_org_rocksdb_RocksDB_deleteFile(JNIEnv* env, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getLiveFilesMetaData * Signature: (J)[Lorg/rocksdb/LiveFileMetaData; */ -jobjectArray Java_org_rocksdb_RocksDB_getLiveFilesMetaData(JNIEnv* env, jobject, +jobjectArray Java_org_forstdb_RocksDB_getLiveFilesMetaData(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); std::vector live_files_meta_data; @@ -3719,11 +3719,11 @@ jobjectArray Java_org_rocksdb_RocksDB_getLiveFilesMetaData(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getColumnFamilyMetaData * Signature: (JJ)Lorg/rocksdb/ColumnFamilyMetaData; */ -jobject Java_org_rocksdb_RocksDB_getColumnFamilyMetaData(JNIEnv* env, jobject, +jobject Java_org_forstdb_RocksDB_getColumnFamilyMetaData(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3741,11 +3741,11 @@ jobject Java_org_rocksdb_RocksDB_getColumnFamilyMetaData(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: ingestExternalFile * Signature: (JJ[Ljava/lang/String;IJ)V */ -void Java_org_rocksdb_RocksDB_ingestExternalFile( +void Java_org_forstdb_RocksDB_ingestExternalFile( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jobjectArray jfile_path_list, jint jfile_path_list_len, jlong jingest_external_file_options_handle) { @@ -3771,11 +3771,11 @@ void Java_org_rocksdb_RocksDB_ingestExternalFile( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: verifyChecksum * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_verifyChecksum(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_verifyChecksum(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->VerifyChecksum(); @@ -3785,11 +3785,11 @@ void Java_org_rocksdb_RocksDB_verifyChecksum(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getDefaultColumnFamily * Signature: (J)J */ -jlong Java_org_rocksdb_RocksDB_getDefaultColumnFamily(JNIEnv*, jobject, +jlong Java_org_forstdb_RocksDB_getDefaultColumnFamily(JNIEnv*, jobject, jlong jdb_handle) { auto* db_handle = reinterpret_cast(jdb_handle); auto* cf_handle = db_handle->DefaultColumnFamily(); @@ -3797,11 +3797,11 @@ jlong Java_org_rocksdb_RocksDB_getDefaultColumnFamily(JNIEnv*, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getPropertiesOfAllTables * Signature: (JJ)Ljava/util/Map; */ -jobject Java_org_rocksdb_RocksDB_getPropertiesOfAllTables(JNIEnv* env, jobject, +jobject Java_org_forstdb_RocksDB_getPropertiesOfAllTables(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3869,11 +3869,11 @@ jobject Java_org_rocksdb_RocksDB_getPropertiesOfAllTables(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: getPropertiesOfTablesInRange * Signature: (JJ[J)Ljava/util/Map; */ -jobject Java_org_rocksdb_RocksDB_getPropertiesOfTablesInRange( +jobject Java_org_forstdb_RocksDB_getPropertiesOfTablesInRange( JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle, jlongArray jrange_slice_handles) { auto* db = reinterpret_cast(jdb_handle); @@ -3922,11 +3922,11 @@ jobject Java_org_rocksdb_RocksDB_getPropertiesOfTablesInRange( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: suggestCompactRange * Signature: (JJ)[J */ -jlongArray Java_org_rocksdb_RocksDB_suggestCompactRange(JNIEnv* env, jobject, +jlongArray Java_org_forstdb_RocksDB_suggestCompactRange(JNIEnv* env, jobject, jlong jdb_handle, jlong jcf_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -3972,11 +3972,11 @@ jlongArray Java_org_rocksdb_RocksDB_suggestCompactRange(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: promoteL0 * Signature: (JJI)V */ -void Java_org_rocksdb_RocksDB_promoteL0(JNIEnv*, jobject, jlong jdb_handle, +void Java_org_forstdb_RocksDB_promoteL0(JNIEnv*, jobject, jlong jdb_handle, jlong jcf_handle, jint jtarget_level) { auto* db = reinterpret_cast(jdb_handle); ROCKSDB_NAMESPACE::ColumnFamilyHandle* cf_handle; @@ -3990,11 +3990,11 @@ void Java_org_rocksdb_RocksDB_promoteL0(JNIEnv*, jobject, jlong jdb_handle, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: startTrace * Signature: (JJJ)V */ -void Java_org_rocksdb_RocksDB_startTrace( +void Java_org_forstdb_RocksDB_startTrace( JNIEnv* env, jobject, jlong jdb_handle, jlong jmax_trace_file_size, jlong jtrace_writer_jnicallback_handle) { auto* db = reinterpret_cast(jdb_handle); @@ -4013,11 +4013,11 @@ void Java_org_rocksdb_RocksDB_startTrace( } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: endTrace * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_endTrace(JNIEnv* env, jobject, jlong jdb_handle) { +void Java_org_forstdb_RocksDB_endTrace(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->EndTrace(); if (!s.ok()) { @@ -4026,11 +4026,11 @@ void Java_org_rocksdb_RocksDB_endTrace(JNIEnv* env, jobject, jlong jdb_handle) { } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: tryCatchUpWithPrimary * Signature: (J)V */ -void Java_org_rocksdb_RocksDB_tryCatchUpWithPrimary(JNIEnv* env, jobject, +void Java_org_forstdb_RocksDB_tryCatchUpWithPrimary(JNIEnv* env, jobject, jlong jdb_handle) { auto* db = reinterpret_cast(jdb_handle); auto s = db->TryCatchUpWithPrimary(); @@ -4040,11 +4040,11 @@ void Java_org_rocksdb_RocksDB_tryCatchUpWithPrimary(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: destroyDB * Signature: (Ljava/lang/String;J)V */ -void Java_org_rocksdb_RocksDB_destroyDB(JNIEnv* env, jclass, jstring jdb_path, +void Java_org_forstdb_RocksDB_destroyDB(JNIEnv* env, jclass, jstring jdb_path, jlong joptions_handle) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); if (db_path == nullptr) { @@ -4095,11 +4095,11 @@ bool get_slice_helper(JNIEnv* env, jobjectArray ranges, jsize index, return true; } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: deleteFilesInRanges * Signature: (JJLjava/util/List;Z)V */ -void Java_org_rocksdb_RocksDB_deleteFilesInRanges(JNIEnv* env, jobject /*jdb*/, +void Java_org_forstdb_RocksDB_deleteFilesInRanges(JNIEnv* env, jobject /*jdb*/, jlong jdb_handle, jlong jcf_handle, jobjectArray ranges, @@ -4140,11 +4140,11 @@ void Java_org_rocksdb_RocksDB_deleteFilesInRanges(JNIEnv* env, jobject /*jdb*/, } /* - * Class: org_rocksdb_RocksDB + * Class: org_forstdb_RocksDB * Method: version * Signature: ()I */ -jint Java_org_rocksdb_RocksDB_version(JNIEnv*, jclass) { +jint Java_org_forstdb_RocksDB_version(JNIEnv*, jclass) { uint32_t encodedVersion = (ROCKSDB_MAJOR & 0xff) << 16; encodedVersion |= (ROCKSDB_MINOR & 0xff) << 8; encodedVersion |= (ROCKSDB_PATCH & 0xff); diff --git a/java/rocksjni/slice.cc b/java/forstjni/slice.cc similarity index 79% rename from java/rocksjni/slice.cc rename to java/forstjni/slice.cc index 63c6b1b9f..3d447562c 100644 --- a/java/rocksjni/slice.cc +++ b/java/forstjni/slice.cc @@ -14,20 +14,20 @@ #include -#include "include/org_rocksdb_AbstractSlice.h" -#include "include/org_rocksdb_DirectSlice.h" -#include "include/org_rocksdb_Slice.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_AbstractSlice.h" +#include "include/org_forstdb_DirectSlice.h" +#include "include/org_forstdb_Slice.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" // /* - * Class: org_rocksdb_Slice + * Class: org_forstdb_Slice * Method: createNewSlice0 * Signature: ([BI)J */ -jlong Java_org_rocksdb_Slice_createNewSlice0(JNIEnv* env, jclass /*jcls*/, +jlong Java_org_forstdb_Slice_createNewSlice0(JNIEnv* env, jclass /*jcls*/, jbyteArray data, jint offset) { const jsize dataSize = env->GetArrayLength(data); const int len = dataSize - offset; - // NOTE: buf will be deleted in the Java_org_rocksdb_Slice_disposeInternalBuf + // NOTE: buf will be deleted in the Java_org_forstdb_Slice_disposeInternalBuf // method jbyte* buf = new jbyte[len]; env->GetByteArrayRegion(data, offset, len, buf); @@ -151,11 +151,11 @@ jlong Java_org_rocksdb_Slice_createNewSlice0(JNIEnv* env, jclass /*jcls*/, } /* - * Class: org_rocksdb_Slice + * Class: org_forstdb_Slice * Method: createNewSlice1 * Signature: ([B)J */ -jlong Java_org_rocksdb_Slice_createNewSlice1(JNIEnv* env, jclass /*jcls*/, +jlong Java_org_forstdb_Slice_createNewSlice1(JNIEnv* env, jclass /*jcls*/, jbyteArray data) { jbyte* ptrData = env->GetByteArrayElements(data, nullptr); if (ptrData == nullptr) { @@ -164,7 +164,7 @@ jlong Java_org_rocksdb_Slice_createNewSlice1(JNIEnv* env, jclass /*jcls*/, } const int len = env->GetArrayLength(data) + 1; - // NOTE: buf will be deleted in the Java_org_rocksdb_Slice_disposeInternalBuf + // NOTE: buf will be deleted in the Java_org_forstdb_Slice_disposeInternalBuf // method char* buf = new char[len]; memcpy(buf, ptrData, len - 1); @@ -178,11 +178,11 @@ jlong Java_org_rocksdb_Slice_createNewSlice1(JNIEnv* env, jclass /*jcls*/, } /* - * Class: org_rocksdb_Slice + * Class: org_forstdb_Slice * Method: data0 * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_Slice_data0(JNIEnv* env, jobject /*jobj*/, +jbyteArray Java_org_forstdb_Slice_data0(JNIEnv* env, jobject /*jobj*/, jlong handle) { const auto* slice = reinterpret_cast(handle); const jsize len = static_cast(slice->size()); @@ -205,11 +205,11 @@ jbyteArray Java_org_rocksdb_Slice_data0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Slice + * Class: org_forstdb_Slice * Method: clear0 * Signature: (JZJ)V */ -void Java_org_rocksdb_Slice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_Slice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jboolean shouldRelease, jlong internalBufferOffset) { auto* slice = reinterpret_cast(handle); @@ -221,33 +221,33 @@ void Java_org_rocksdb_Slice_clear0(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_Slice + * Class: org_forstdb_Slice * Method: removePrefix0 * Signature: (JI)V */ -void Java_org_rocksdb_Slice_removePrefix0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_Slice_removePrefix0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jint length) { auto* slice = reinterpret_cast(handle); slice->remove_prefix(length); } /* - * Class: org_rocksdb_DirectSlice + * Class: org_forstdb_DirectSlice * Method: setLength0 * Signature: (JI)V */ -void Java_org_rocksdb_DirectSlice_setLength0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_DirectSlice_setLength0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jint length) { auto* slice = reinterpret_cast(handle); slice->size_ = length; } /* - * Class: org_rocksdb_Slice + * Class: org_forstdb_Slice * Method: disposeInternalBuf * Signature: (JJ)V */ -void Java_org_rocksdb_Slice_disposeInternalBuf(JNIEnv* /*env*/, +void Java_org_forstdb_Slice_disposeInternalBuf(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle, jlong internalBufferOffset) { const auto* slice = reinterpret_cast(handle); @@ -260,11 +260,11 @@ void Java_org_rocksdb_Slice_disposeInternalBuf(JNIEnv* /*env*/, // -#include "include/org_rocksdb_SstFileManager.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_SstFileManager.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: newSstFileManager * Signature: (JJJDJ)J */ -jlong Java_org_rocksdb_SstFileManager_newSstFileManager( +jlong Java_org_forstdb_SstFileManager_newSstFileManager( JNIEnv* jnienv, jclass /*jcls*/, jlong jenv_handle, jlong jlogger_handle, jlong jrate_bytes, jdouble jmax_trash_db_ratio, jlong jmax_delete_chunk_bytes) { @@ -56,11 +56,11 @@ jlong Java_org_rocksdb_SstFileManager_newSstFileManager( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: setMaxAllowedSpaceUsage * Signature: (JJ)V */ -void Java_org_rocksdb_SstFileManager_setMaxAllowedSpaceUsage( +void Java_org_forstdb_SstFileManager_setMaxAllowedSpaceUsage( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_allowed_space) { auto* sptr_sst_file_manager = @@ -70,11 +70,11 @@ void Java_org_rocksdb_SstFileManager_setMaxAllowedSpaceUsage( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: setCompactionBufferSize * Signature: (JJ)V */ -void Java_org_rocksdb_SstFileManager_setCompactionBufferSize( +void Java_org_forstdb_SstFileManager_setCompactionBufferSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jcompaction_buffer_size) { auto* sptr_sst_file_manager = @@ -85,11 +85,11 @@ void Java_org_rocksdb_SstFileManager_setCompactionBufferSize( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: isMaxAllowedSpaceReached * Signature: (J)Z */ -jboolean Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReached( +jboolean Java_org_forstdb_SstFileManager_isMaxAllowedSpaceReached( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( @@ -98,12 +98,12 @@ jboolean Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReached( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: isMaxAllowedSpaceReachedIncludingCompactions * Signature: (J)Z */ jboolean -Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReachedIncludingCompactions( +Java_org_forstdb_SstFileManager_isMaxAllowedSpaceReachedIncludingCompactions( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( @@ -113,11 +113,11 @@ Java_org_rocksdb_SstFileManager_isMaxAllowedSpaceReachedIncludingCompactions( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: getTotalSize * Signature: (J)J */ -jlong Java_org_rocksdb_SstFileManager_getTotalSize(JNIEnv* /*env*/, +jlong Java_org_forstdb_SstFileManager_getTotalSize(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = @@ -127,11 +127,11 @@ jlong Java_org_rocksdb_SstFileManager_getTotalSize(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: getTrackedFiles * Signature: (J)Ljava/util/Map; */ -jobject Java_org_rocksdb_SstFileManager_getTrackedFiles(JNIEnv* env, +jobject Java_org_forstdb_SstFileManager_getTrackedFiles(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = @@ -181,11 +181,11 @@ jobject Java_org_rocksdb_SstFileManager_getTrackedFiles(JNIEnv* env, } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: getDeleteRateBytesPerSecond * Signature: (J)J */ -jlong Java_org_rocksdb_SstFileManager_getDeleteRateBytesPerSecond( +jlong Java_org_forstdb_SstFileManager_getDeleteRateBytesPerSecond( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = reinterpret_cast*>( @@ -194,11 +194,11 @@ jlong Java_org_rocksdb_SstFileManager_getDeleteRateBytesPerSecond( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: setDeleteRateBytesPerSecond * Signature: (JJ)V */ -void Java_org_rocksdb_SstFileManager_setDeleteRateBytesPerSecond( +void Java_org_forstdb_SstFileManager_setDeleteRateBytesPerSecond( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdelete_rate) { auto* sptr_sst_file_manager = reinterpret_cast*>( @@ -207,11 +207,11 @@ void Java_org_rocksdb_SstFileManager_setDeleteRateBytesPerSecond( } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: getMaxTrashDBRatio * Signature: (J)D */ -jdouble Java_org_rocksdb_SstFileManager_getMaxTrashDBRatio(JNIEnv* /*env*/, +jdouble Java_org_forstdb_SstFileManager_getMaxTrashDBRatio(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = @@ -221,11 +221,11 @@ jdouble Java_org_rocksdb_SstFileManager_getMaxTrashDBRatio(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: setMaxTrashDBRatio * Signature: (JD)V */ -void Java_org_rocksdb_SstFileManager_setMaxTrashDBRatio(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileManager_setMaxTrashDBRatio(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jdouble jratio) { @@ -236,11 +236,11 @@ void Java_org_rocksdb_SstFileManager_setMaxTrashDBRatio(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_SstFileManager + * Class: org_forstdb_SstFileManager * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_SstFileManager_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileManager_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* sptr_sst_file_manager = diff --git a/java/rocksjni/sst_file_reader_iterator.cc b/java/forstjni/sst_file_reader_iterator.cc similarity index 82% rename from java/rocksjni/sst_file_reader_iterator.cc rename to java/forstjni/sst_file_reader_iterator.cc index 68fa4c37c..71e33b781 100644 --- a/java/rocksjni/sst_file_reader_iterator.cc +++ b/java/forstjni/sst_file_reader_iterator.cc @@ -10,16 +10,16 @@ #include #include -#include "include/org_rocksdb_SstFileReaderIterator.h" +#include "include/org_forstdb_SstFileReaderIterator.h" #include "rocksdb/iterator.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileReaderIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -28,66 +28,66 @@ void Java_org_rocksdb_SstFileReaderIterator_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: isValid0 * Signature: (J)Z */ -jboolean Java_org_rocksdb_SstFileReaderIterator_isValid0(JNIEnv* /*env*/, +jboolean Java_org_forstdb_SstFileReaderIterator_isValid0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle)->Valid(); } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekToFirst0 * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekToFirst0(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileReaderIterator_seekToFirst0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToFirst(); } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekToLast0 * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekToLast0(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileReaderIterator_seekToLast0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToLast(); } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: next0 * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_next0(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileReaderIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: prev0 * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_prev0(JNIEnv* /*env*/, +void Java_org_forstdb_SstFileReaderIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Prev(); } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seek0 * Signature: (J[BI)V */ -void Java_org_rocksdb_SstFileReaderIterator_seek0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_SstFileReaderIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { @@ -107,11 +107,11 @@ void Java_org_rocksdb_SstFileReaderIterator_seek0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekForPrev0 * Signature: (J[BI)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekForPrev0(JNIEnv* env, +void Java_org_forstdb_SstFileReaderIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, @@ -132,11 +132,11 @@ void Java_org_rocksdb_SstFileReaderIterator_seekForPrev0(JNIEnv* env, } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: status0 * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_status0(JNIEnv* env, +void Java_org_forstdb_SstFileReaderIterator_status0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -150,11 +150,11 @@ void Java_org_rocksdb_SstFileReaderIterator_status0(JNIEnv* env, } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: key0 * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_SstFileReaderIterator_key0(JNIEnv* env, +jbyteArray Java_org_forstdb_SstFileReaderIterator_key0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -172,11 +172,11 @@ jbyteArray Java_org_rocksdb_SstFileReaderIterator_key0(JNIEnv* env, } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: value0 * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_SstFileReaderIterator_value0(JNIEnv* env, +jbyteArray Java_org_forstdb_SstFileReaderIterator_value0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -195,11 +195,11 @@ jbyteArray Java_org_rocksdb_SstFileReaderIterator_value0(JNIEnv* env, } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: keyDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ -jint Java_org_rocksdb_SstFileReaderIterator_keyDirect0( +jint Java_org_forstdb_SstFileReaderIterator_keyDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -212,11 +212,11 @@ jint Java_org_rocksdb_SstFileReaderIterator_keyDirect0( * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: keyByteArray0 * Signature: (J[BII)I */ -jint Java_org_rocksdb_SstFileReaderIterator_keyByteArray0( +jint Java_org_forstdb_SstFileReaderIterator_keyByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jkey, jint jkey_off, jint jkey_len) { auto* it = reinterpret_cast(handle); @@ -232,11 +232,11 @@ jint Java_org_rocksdb_SstFileReaderIterator_keyByteArray0( } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: valueDirect0 * Signature: (JLjava/nio/ByteBuffer;II)I */ -jint Java_org_rocksdb_SstFileReaderIterator_valueDirect0( +jint Java_org_forstdb_SstFileReaderIterator_valueDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -249,11 +249,11 @@ jint Java_org_rocksdb_SstFileReaderIterator_valueDirect0( * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: valueByteArray0 * Signature: (J[BII)I */ -jint Java_org_rocksdb_SstFileReaderIterator_valueByteArray0( +jint Java_org_forstdb_SstFileReaderIterator_valueByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jvalue_target, jint jvalue_off, jint jvalue_len) { auto* it = reinterpret_cast(handle); @@ -269,11 +269,11 @@ jint Java_org_rocksdb_SstFileReaderIterator_valueByteArray0( } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekDirect0( +void Java_org_forstdb_SstFileReaderIterator_seekDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -285,11 +285,11 @@ void Java_org_rocksdb_SstFileReaderIterator_seekDirect0( } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekForPrevDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekForPrevDirect0( +void Java_org_forstdb_SstFileReaderIterator_seekForPrevDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -304,11 +304,11 @@ void Java_org_rocksdb_SstFileReaderIterator_seekForPrevDirect0( * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekByteArray0 * Signature: (J[BII)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekByteArray0( +void Java_org_forstdb_SstFileReaderIterator_seekByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { const std::unique_ptr target(new char[jtarget_len]); @@ -331,11 +331,11 @@ void Java_org_rocksdb_SstFileReaderIterator_seekByteArray0( * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: seekForPrevByteArray0 * Signature: (J[BII)V */ -void Java_org_rocksdb_SstFileReaderIterator_seekForPrevByteArray0( +void Java_org_forstdb_SstFileReaderIterator_seekForPrevByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { const std::unique_ptr target(new char[jtarget_len]); @@ -355,11 +355,11 @@ void Java_org_rocksdb_SstFileReaderIterator_seekForPrevByteArray0( } /* - * Class: org_rocksdb_SstFileReaderIterator + * Class: org_forstdb_SstFileReaderIterator * Method: refresh0 * Signature: (J)V */ -void Java_org_rocksdb_SstFileReaderIterator_refresh0(JNIEnv* env, +void Java_org_forstdb_SstFileReaderIterator_refresh0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); diff --git a/java/rocksjni/sst_file_readerjni.cc b/java/forstjni/sst_file_readerjni.cc similarity index 82% rename from java/rocksjni/sst_file_readerjni.cc rename to java/forstjni/sst_file_readerjni.cc index 7ef711842..325ed6251 100644 --- a/java/rocksjni/sst_file_readerjni.cc +++ b/java/forstjni/sst_file_readerjni.cc @@ -11,20 +11,20 @@ #include -#include "include/org_rocksdb_SstFileReader.h" +#include "include/org_forstdb_SstFileReader.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/sst_file_reader.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_SstFileReader + * Class: org_forstdb_SstFileReader * Method: newSstFileReader * Signature: (J)J */ -jlong Java_org_rocksdb_SstFileReader_newSstFileReader(JNIEnv * /*env*/, +jlong Java_org_forstdb_SstFileReader_newSstFileReader(JNIEnv * /*env*/, jclass /*jcls*/, jlong joptions) { auto *options = @@ -35,11 +35,11 @@ jlong Java_org_rocksdb_SstFileReader_newSstFileReader(JNIEnv * /*env*/, } /* - * Class: org_rocksdb_SstFileReader + * Class: org_forstdb_SstFileReader * Method: open * Signature: (JLjava/lang/String;)V */ -void Java_org_rocksdb_SstFileReader_open(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileReader_open(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jstring jfile_path) { const char *file_path = env->GetStringUTFChars(jfile_path, nullptr); if (file_path == nullptr) { @@ -57,11 +57,11 @@ void Java_org_rocksdb_SstFileReader_open(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileReader + * Class: org_forstdb_SstFileReader * Method: newIterator * Signature: (JJ)J */ -jlong Java_org_rocksdb_SstFileReader_newIterator(JNIEnv * /*env*/, +jlong Java_org_forstdb_SstFileReader_newIterator(JNIEnv * /*env*/, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle) { @@ -73,22 +73,22 @@ jlong Java_org_rocksdb_SstFileReader_newIterator(JNIEnv * /*env*/, } /* - * Class: org_rocksdb_SstFileReader + * Class: org_forstdb_SstFileReader * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_SstFileReader_disposeInternal(JNIEnv * /*env*/, +void Java_org_forstdb_SstFileReader_disposeInternal(JNIEnv * /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); } /* - * Class: org_rocksdb_SstFileReader + * Class: org_forstdb_SstFileReader * Method: verifyChecksum * Signature: (J)V */ -void Java_org_rocksdb_SstFileReader_verifyChecksum(JNIEnv *env, +void Java_org_forstdb_SstFileReader_verifyChecksum(JNIEnv *env, jobject /*jobj*/, jlong jhandle) { auto *sst_file_reader = @@ -100,11 +100,11 @@ void Java_org_rocksdb_SstFileReader_verifyChecksum(JNIEnv *env, } /* - * Class: org_rocksdb_SstFileReader + * Class: org_forstdb_SstFileReader * Method: getTableProperties * Signature: (J)J */ -jobject Java_org_rocksdb_SstFileReader_getTableProperties(JNIEnv *env, +jobject Java_org_forstdb_SstFileReader_getTableProperties(JNIEnv *env, jobject /*jobj*/, jlong jhandle) { auto *sst_file_reader = diff --git a/java/rocksjni/sst_file_writerjni.cc b/java/forstjni/sst_file_writerjni.cc similarity index 86% rename from java/rocksjni/sst_file_writerjni.cc rename to java/forstjni/sst_file_writerjni.cc index 1898c3cfc..a21742228 100644 --- a/java/rocksjni/sst_file_writerjni.cc +++ b/java/forstjni/sst_file_writerjni.cc @@ -11,20 +11,20 @@ #include -#include "include/org_rocksdb_SstFileWriter.h" +#include "include/org_forstdb_SstFileWriter.h" #include "rocksdb/comparator.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/sst_file_writer.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: newSstFileWriter * Signature: (JJJB)J */ -jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJJB( +jlong Java_org_forstdb_SstFileWriter_newSstFileWriter__JJJB( JNIEnv * /*env*/, jclass /*jcls*/, jlong jenvoptions, jlong joptions, jlong jcomparator_handle, jbyte jcomparator_type) { ROCKSDB_NAMESPACE::Comparator *comparator = nullptr; @@ -51,11 +51,11 @@ jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJJB( } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: newSstFileWriter * Signature: (JJ)J */ -jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJ(JNIEnv * /*env*/, +jlong Java_org_forstdb_SstFileWriter_newSstFileWriter__JJ(JNIEnv * /*env*/, jclass /*jcls*/, jlong jenvoptions, jlong joptions) { @@ -69,11 +69,11 @@ jlong Java_org_rocksdb_SstFileWriter_newSstFileWriter__JJ(JNIEnv * /*env*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: open * Signature: (JLjava/lang/String;)V */ -void Java_org_rocksdb_SstFileWriter_open(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_open(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jstring jfile_path) { const char *file_path = env->GetStringUTFChars(jfile_path, nullptr); if (file_path == nullptr) { @@ -91,11 +91,11 @@ void Java_org_rocksdb_SstFileWriter_open(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: put * Signature: (JJJ)V */ -void Java_org_rocksdb_SstFileWriter_put__JJJ(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_put__JJJ(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jlong jkey_handle, jlong jvalue_handle) { auto *key_slice = reinterpret_cast(jkey_handle); @@ -110,11 +110,11 @@ void Java_org_rocksdb_SstFileWriter_put__JJJ(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: put * Signature: (JJJ)V */ -void Java_org_rocksdb_SstFileWriter_put__J_3B_3B(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_put__J_3B_3B(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jbyteArray jval) { jbyte *key = env->GetByteArrayElements(jkey, nullptr); @@ -147,11 +147,11 @@ void Java_org_rocksdb_SstFileWriter_put__J_3B_3B(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_SstFileWriter_putDirect(JNIEnv *env, jobject /*jdb*/, +void Java_org_forstdb_SstFileWriter_putDirect(JNIEnv *env, jobject /*jdb*/, jlong jdb_handle, jobject jkey, jint jkey_off, jint jkey_len, jobject jval, jint jval_off, @@ -171,11 +171,11 @@ void Java_org_rocksdb_SstFileWriter_putDirect(JNIEnv *env, jobject /*jdb*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: fileSize * Signature: (J)J */ -jlong Java_org_rocksdb_SstFileWriter_fileSize(JNIEnv * /*env*/, jobject /*jdb*/, +jlong Java_org_forstdb_SstFileWriter_fileSize(JNIEnv * /*env*/, jobject /*jdb*/, jlong jdb_handle) { auto *writer = reinterpret_cast(jdb_handle); @@ -183,11 +183,11 @@ jlong Java_org_rocksdb_SstFileWriter_fileSize(JNIEnv * /*env*/, jobject /*jdb*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: merge * Signature: (JJJ)V */ -void Java_org_rocksdb_SstFileWriter_merge__JJJ(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_merge__JJJ(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jlong jkey_handle, jlong jvalue_handle) { auto *key_slice = reinterpret_cast(jkey_handle); @@ -202,11 +202,11 @@ void Java_org_rocksdb_SstFileWriter_merge__JJJ(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: merge * Signature: (J[B[B)V */ -void Java_org_rocksdb_SstFileWriter_merge__J_3B_3B(JNIEnv *env, +void Java_org_forstdb_SstFileWriter_merge__J_3B_3B(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, @@ -241,11 +241,11 @@ void Java_org_rocksdb_SstFileWriter_merge__J_3B_3B(JNIEnv *env, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: delete * Signature: (JJJ)V */ -void Java_org_rocksdb_SstFileWriter_delete__J_3B(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_delete__J_3B(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey) { jbyte *key = env->GetByteArrayElements(jkey, nullptr); @@ -268,11 +268,11 @@ void Java_org_rocksdb_SstFileWriter_delete__J_3B(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: delete * Signature: (JJJ)V */ -void Java_org_rocksdb_SstFileWriter_delete__JJ(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_delete__JJ(JNIEnv *env, jobject /*jobj*/, jlong jhandle, jlong jkey_handle) { auto *key_slice = reinterpret_cast(jkey_handle); @@ -285,11 +285,11 @@ void Java_org_rocksdb_SstFileWriter_delete__JJ(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: finish * Signature: (J)V */ -void Java_org_rocksdb_SstFileWriter_finish(JNIEnv *env, jobject /*jobj*/, +void Java_org_forstdb_SstFileWriter_finish(JNIEnv *env, jobject /*jobj*/, jlong jhandle) { ROCKSDB_NAMESPACE::Status s = reinterpret_cast(jhandle)->Finish(); @@ -299,11 +299,11 @@ void Java_org_rocksdb_SstFileWriter_finish(JNIEnv *env, jobject /*jobj*/, } /* - * Class: org_rocksdb_SstFileWriter + * Class: org_forstdb_SstFileWriter * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_SstFileWriter_disposeInternal(JNIEnv * /*env*/, +void Java_org_forstdb_SstFileWriter_disposeInternal(JNIEnv * /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); diff --git a/java/rocksjni/sst_partitioner.cc b/java/forstjni/sst_partitioner.cc similarity index 74% rename from java/rocksjni/sst_partitioner.cc rename to java/forstjni/sst_partitioner.cc index 1cea3b0cb..f249cb13a 100644 --- a/java/rocksjni/sst_partitioner.cc +++ b/java/forstjni/sst_partitioner.cc @@ -13,17 +13,17 @@ #include -#include "include/org_rocksdb_SstPartitionerFixedPrefixFactory.h" +#include "include/org_forstdb_SstPartitionerFixedPrefixFactory.h" #include "rocksdb/sst_file_manager.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_SstPartitionerFixedPrefixFactory + * Class: org_forstdb_SstPartitionerFixedPrefixFactory * Method: newSstPartitionerFixedPrefixFactory0 * Signature: (J)J */ -jlong Java_org_rocksdb_SstPartitionerFixedPrefixFactory_newSstPartitionerFixedPrefixFactory0( +jlong Java_org_forstdb_SstPartitionerFixedPrefixFactory_newSstPartitionerFixedPrefixFactory0( JNIEnv*, jclass, jlong prefix_len) { auto* ptr = new std::shared_ptr( ROCKSDB_NAMESPACE::NewSstPartitionerFixedPrefixFactory(prefix_len)); @@ -31,11 +31,11 @@ jlong Java_org_rocksdb_SstPartitionerFixedPrefixFactory_newSstPartitionerFixedPr } /* - * Class: org_rocksdb_SstPartitionerFixedPrefixFactory + * Class: org_forstdb_SstPartitionerFixedPrefixFactory * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_SstPartitionerFixedPrefixFactory_disposeInternal( +void Java_org_forstdb_SstPartitionerFixedPrefixFactory_disposeInternal( JNIEnv*, jobject, jlong jhandle) { auto* ptr = reinterpret_cast< std::shared_ptr*>(jhandle); diff --git a/java/rocksjni/statistics.cc b/java/forstjni/statistics.cc similarity index 81% rename from java/rocksjni/statistics.cc rename to java/forstjni/statistics.cc index bd405afa1..c6d0c8257 100644 --- a/java/rocksjni/statistics.cc +++ b/java/forstjni/statistics.cc @@ -13,48 +13,48 @@ #include #include -#include "include/org_rocksdb_Statistics.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "rocksjni/statisticsjni.h" +#include "include/org_forstdb_Statistics.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" +#include "forstjni/statisticsjni.h" /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: newStatistics * Signature: ()J */ -jlong Java_org_rocksdb_Statistics_newStatistics__(JNIEnv* env, jclass jcls) { - return Java_org_rocksdb_Statistics_newStatistics___3BJ(env, jcls, nullptr, 0); +jlong Java_org_forstdb_Statistics_newStatistics__(JNIEnv* env, jclass jcls) { + return Java_org_forstdb_Statistics_newStatistics___3BJ(env, jcls, nullptr, 0); } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: newStatistics * Signature: (J)J */ -jlong Java_org_rocksdb_Statistics_newStatistics__J( +jlong Java_org_forstdb_Statistics_newStatistics__J( JNIEnv* env, jclass jcls, jlong jother_statistics_handle) { - return Java_org_rocksdb_Statistics_newStatistics___3BJ( + return Java_org_forstdb_Statistics_newStatistics___3BJ( env, jcls, nullptr, jother_statistics_handle); } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: newStatistics * Signature: ([B)J */ -jlong Java_org_rocksdb_Statistics_newStatistics___3B(JNIEnv* env, jclass jcls, +jlong Java_org_forstdb_Statistics_newStatistics___3B(JNIEnv* env, jclass jcls, jbyteArray jhistograms) { - return Java_org_rocksdb_Statistics_newStatistics___3BJ(env, jcls, jhistograms, + return Java_org_forstdb_Statistics_newStatistics___3BJ(env, jcls, jhistograms, 0); } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: newStatistics * Signature: ([BJ)J */ -jlong Java_org_rocksdb_Statistics_newStatistics___3BJ( +jlong Java_org_forstdb_Statistics_newStatistics___3BJ( JNIEnv* env, jclass, jbyteArray jhistograms, jlong jother_statistics_handle) { std::shared_ptr* pSptr_other_statistics = @@ -100,11 +100,11 @@ jlong Java_org_rocksdb_Statistics_newStatistics___3BJ( } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_Statistics_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_Statistics_disposeInternal(JNIEnv*, jobject, jlong jhandle) { if (jhandle > 0) { auto* pSptr_statistics = @@ -115,11 +115,11 @@ void Java_org_rocksdb_Statistics_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: statsLevel * Signature: (J)B */ -jbyte Java_org_rocksdb_Statistics_statsLevel(JNIEnv*, jobject, jlong jhandle) { +jbyte Java_org_forstdb_Statistics_statsLevel(JNIEnv*, jobject, jlong jhandle) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); @@ -129,11 +129,11 @@ jbyte Java_org_rocksdb_Statistics_statsLevel(JNIEnv*, jobject, jlong jhandle) { } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: setStatsLevel * Signature: (JB)V */ -void Java_org_rocksdb_Statistics_setStatsLevel(JNIEnv*, jobject, jlong jhandle, +void Java_org_forstdb_Statistics_setStatsLevel(JNIEnv*, jobject, jlong jhandle, jbyte jstats_level) { auto* pSptr_statistics = reinterpret_cast*>( @@ -145,11 +145,11 @@ void Java_org_rocksdb_Statistics_setStatsLevel(JNIEnv*, jobject, jlong jhandle, } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: getTickerCount * Signature: (JB)J */ -jlong Java_org_rocksdb_Statistics_getTickerCount(JNIEnv*, jobject, +jlong Java_org_forstdb_Statistics_getTickerCount(JNIEnv*, jobject, jlong jhandle, jbyte jticker_type) { auto* pSptr_statistics = @@ -162,11 +162,11 @@ jlong Java_org_rocksdb_Statistics_getTickerCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: getAndResetTickerCount * Signature: (JB)J */ -jlong Java_org_rocksdb_Statistics_getAndResetTickerCount(JNIEnv*, jobject, +jlong Java_org_forstdb_Statistics_getAndResetTickerCount(JNIEnv*, jobject, jlong jhandle, jbyte jticker_type) { auto* pSptr_statistics = @@ -178,11 +178,11 @@ jlong Java_org_rocksdb_Statistics_getAndResetTickerCount(JNIEnv*, jobject, } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: getHistogramData * Signature: (JB)Lorg/rocksdb/HistogramData; */ -jobject Java_org_rocksdb_Statistics_getHistogramData(JNIEnv* env, jobject, +jobject Java_org_forstdb_Statistics_getHistogramData(JNIEnv* env, jobject, jlong jhandle, jbyte jhistogram_type) { auto* pSptr_statistics = @@ -219,11 +219,11 @@ jobject Java_org_rocksdb_Statistics_getHistogramData(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: getHistogramString * Signature: (JB)Ljava/lang/String; */ -jstring Java_org_rocksdb_Statistics_getHistogramString(JNIEnv* env, jobject, +jstring Java_org_forstdb_Statistics_getHistogramString(JNIEnv* env, jobject, jlong jhandle, jbyte jhistogram_type) { auto* pSptr_statistics = @@ -237,11 +237,11 @@ jstring Java_org_rocksdb_Statistics_getHistogramString(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: reset * Signature: (J)V */ -void Java_org_rocksdb_Statistics_reset(JNIEnv* env, jobject, jlong jhandle) { +void Java_org_forstdb_Statistics_reset(JNIEnv* env, jobject, jlong jhandle) { auto* pSptr_statistics = reinterpret_cast*>( jhandle); @@ -253,11 +253,11 @@ void Java_org_rocksdb_Statistics_reset(JNIEnv* env, jobject, jlong jhandle) { } /* - * Class: org_rocksdb_Statistics + * Class: org_forstdb_Statistics * Method: toString * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_Statistics_toString(JNIEnv* env, jobject, +jstring Java_org_forstdb_Statistics_toString(JNIEnv* env, jobject, jlong jhandle) { auto* pSptr_statistics = reinterpret_cast*>( diff --git a/java/rocksjni/statisticsjni.cc b/java/forstjni/statisticsjni.cc similarity index 96% rename from java/rocksjni/statisticsjni.cc rename to java/forstjni/statisticsjni.cc index f46337893..4a053033f 100644 --- a/java/rocksjni/statisticsjni.cc +++ b/java/forstjni/statisticsjni.cc @@ -6,7 +6,7 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Statistics -#include "rocksjni/statisticsjni.h" +#include "forstjni/statisticsjni.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/statisticsjni.h b/java/forstjni/statisticsjni.h similarity index 100% rename from java/rocksjni/statisticsjni.h rename to java/forstjni/statisticsjni.h diff --git a/java/rocksjni/table.cc b/java/forstjni/table.cc similarity index 94% rename from java/rocksjni/table.cc rename to java/forstjni/table.cc index 7f99900e4..b15c9777f 100644 --- a/java/rocksjni/table.cc +++ b/java/forstjni/table.cc @@ -10,19 +10,19 @@ #include -#include "include/org_rocksdb_BlockBasedTableConfig.h" -#include "include/org_rocksdb_PlainTableConfig.h" +#include "include/org_forstdb_BlockBasedTableConfig.h" +#include "include/org_forstdb_PlainTableConfig.h" #include "portal.h" #include "rocksdb/cache.h" #include "rocksdb/filter_policy.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_PlainTableConfig + * Class: org_forstdb_PlainTableConfig * Method: newTableFactoryHandle * Signature: (IIDIIBZZ)J */ -jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( +jlong Java_org_forstdb_PlainTableConfig_newTableFactoryHandle( JNIEnv * /*env*/, jobject /*jobj*/, jint jkey_size, jint jbloom_bits_per_key, jdouble jhash_table_ratio, jint jindex_sparseness, jint jhuge_page_tlb_size, jbyte jencoding_type, jboolean jfull_scan_mode, @@ -43,11 +43,11 @@ jlong Java_org_rocksdb_PlainTableConfig_newTableFactoryHandle( } /* - * Class: org_rocksdb_BlockBasedTableConfig + * Class: org_forstdb_BlockBasedTableConfig * Method: newTableFactoryHandle * Signature: (ZZZZBBDBZJJJJIIIJZZZJZZIIZZBJIJI)J */ -jlong Java_org_rocksdb_BlockBasedTableConfig_newTableFactoryHandle( +jlong Java_org_forstdb_BlockBasedTableConfig_newTableFactoryHandle( JNIEnv *, jobject, jboolean jcache_index_and_filter_blocks, jboolean jcache_index_and_filter_blocks_with_high_priority, jboolean jpin_l0_filter_and_index_blocks_in_cache, diff --git a/java/rocksjni/table_filter.cc b/java/forstjni/table_filter.cc similarity index 72% rename from java/rocksjni/table_filter.cc rename to java/forstjni/table_filter.cc index 1400fa1d9..ac234d889 100644 --- a/java/rocksjni/table_filter.cc +++ b/java/forstjni/table_filter.cc @@ -10,16 +10,16 @@ #include -#include "include/org_rocksdb_AbstractTableFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/table_filter_jnicallback.h" +#include "include/org_forstdb_AbstractTableFilter.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/table_filter_jnicallback.h" /* - * Class: org_rocksdb_AbstractTableFilter + * Class: org_forstdb_AbstractTableFilter * Method: createNewTableFilter * Signature: ()J */ -jlong Java_org_rocksdb_AbstractTableFilter_createNewTableFilter( +jlong Java_org_forstdb_AbstractTableFilter_createNewTableFilter( JNIEnv* env, jobject jtable_filter) { auto* table_filter_jnicallback = new ROCKSDB_NAMESPACE::TableFilterJniCallback(env, jtable_filter); diff --git a/java/rocksjni/table_filter_jnicallback.cc b/java/forstjni/table_filter_jnicallback.cc similarity index 96% rename from java/rocksjni/table_filter_jnicallback.cc rename to java/forstjni/table_filter_jnicallback.cc index 5350c5cee..061919f7b 100644 --- a/java/rocksjni/table_filter_jnicallback.cc +++ b/java/forstjni/table_filter_jnicallback.cc @@ -6,9 +6,9 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TableFilter. -#include "rocksjni/table_filter_jnicallback.h" +#include "forstjni/table_filter_jnicallback.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { TableFilterJniCallback::TableFilterJniCallback(JNIEnv* env, diff --git a/java/rocksjni/table_filter_jnicallback.h b/java/forstjni/table_filter_jnicallback.h similarity index 96% rename from java/rocksjni/table_filter_jnicallback.h rename to java/forstjni/table_filter_jnicallback.h index 0ef404ca2..7fbec5994 100644 --- a/java/rocksjni/table_filter_jnicallback.h +++ b/java/forstjni/table_filter_jnicallback.h @@ -15,7 +15,7 @@ #include #include "rocksdb/table_properties.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/testable_event_listener.cc b/java/forstjni/testable_event_listener.cc similarity index 98% rename from java/rocksjni/testable_event_listener.cc rename to java/forstjni/testable_event_listener.cc index 71188bc3c..a9380d440 100644 --- a/java/rocksjni/testable_event_listener.cc +++ b/java/forstjni/testable_event_listener.cc @@ -7,7 +7,7 @@ #include #include -#include "include/org_rocksdb_test_TestableEventListener.h" +#include "include/org_forstdb_test_TestableEventListener.h" #include "rocksdb/listener.h" #include "rocksdb/status.h" #include "rocksdb/table_properties.h" @@ -73,11 +73,11 @@ static TableProperties newTablePropertiesForTest() { } /* - * Class: org_rocksdb_test_TestableEventListener + * Class: org_forstdb_test_TestableEventListener * Method: invokeAllCallbacks * Signature: (J)V */ -void Java_org_rocksdb_test_TestableEventListener_invokeAllCallbacks( +void Java_org_forstdb_test_TestableEventListener_invokeAllCallbacks( JNIEnv *, jclass, jlong jhandle) { const auto &el = *reinterpret_cast *>( diff --git a/java/rocksjni/thread_status.cc b/java/forstjni/thread_status.cc similarity index 83% rename from java/rocksjni/thread_status.cc rename to java/forstjni/thread_status.cc index c600f6cd5..312e8c590 100644 --- a/java/rocksjni/thread_status.cc +++ b/java/forstjni/thread_status.cc @@ -10,15 +10,15 @@ #include -#include "include/org_rocksdb_ThreadStatus.h" +#include "include/org_forstdb_ThreadStatus.h" #include "portal.h" /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: getThreadTypeName * Signature: (B)Ljava/lang/String; */ -jstring Java_org_rocksdb_ThreadStatus_getThreadTypeName( +jstring Java_org_forstdb_ThreadStatus_getThreadTypeName( JNIEnv* env, jclass, jbyte jthread_type_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetThreadTypeName( ROCKSDB_NAMESPACE::ThreadTypeJni::toCppThreadType(jthread_type_value)); @@ -26,11 +26,11 @@ jstring Java_org_rocksdb_ThreadStatus_getThreadTypeName( } /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: getOperationName * Signature: (B)Ljava/lang/String; */ -jstring Java_org_rocksdb_ThreadStatus_getOperationName( +jstring Java_org_forstdb_ThreadStatus_getOperationName( JNIEnv* env, jclass, jbyte joperation_type_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationName( ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( @@ -39,11 +39,11 @@ jstring Java_org_rocksdb_ThreadStatus_getOperationName( } /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: microsToStringNative * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_ThreadStatus_microsToStringNative(JNIEnv* env, jclass, +jstring Java_org_forstdb_ThreadStatus_microsToStringNative(JNIEnv* env, jclass, jlong jmicros) { auto str = ROCKSDB_NAMESPACE::ThreadStatus::MicrosToString( static_cast(jmicros)); @@ -51,11 +51,11 @@ jstring Java_org_rocksdb_ThreadStatus_microsToStringNative(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: getOperationStageName * Signature: (B)Ljava/lang/String; */ -jstring Java_org_rocksdb_ThreadStatus_getOperationStageName( +jstring Java_org_forstdb_ThreadStatus_getOperationStageName( JNIEnv* env, jclass, jbyte joperation_stage_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationStageName( ROCKSDB_NAMESPACE::OperationStageJni::toCppOperationStage( @@ -64,11 +64,11 @@ jstring Java_org_rocksdb_ThreadStatus_getOperationStageName( } /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: getOperationPropertyName * Signature: (BI)Ljava/lang/String; */ -jstring Java_org_rocksdb_ThreadStatus_getOperationPropertyName( +jstring Java_org_forstdb_ThreadStatus_getOperationPropertyName( JNIEnv* env, jclass, jbyte joperation_type_value, jint jindex) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetOperationPropertyName( ROCKSDB_NAMESPACE::OperationTypeJni::toCppOperationType( @@ -78,11 +78,11 @@ jstring Java_org_rocksdb_ThreadStatus_getOperationPropertyName( } /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: interpretOperationProperties * Signature: (B[J)Ljava/util/Map; */ -jobject Java_org_rocksdb_ThreadStatus_interpretOperationProperties( +jobject Java_org_forstdb_ThreadStatus_interpretOperationProperties( JNIEnv* env, jclass, jbyte joperation_type_value, jlongArray joperation_properties) { // convert joperation_properties @@ -113,11 +113,11 @@ jobject Java_org_rocksdb_ThreadStatus_interpretOperationProperties( } /* - * Class: org_rocksdb_ThreadStatus + * Class: org_forstdb_ThreadStatus * Method: getStateName * Signature: (B)Ljava/lang/String; */ -jstring Java_org_rocksdb_ThreadStatus_getStateName(JNIEnv* env, jclass, +jstring Java_org_forstdb_ThreadStatus_getStateName(JNIEnv* env, jclass, jbyte jstate_type_value) { auto name = ROCKSDB_NAMESPACE::ThreadStatus::GetStateName( ROCKSDB_NAMESPACE::StateTypeJni::toCppStateType(jstate_type_value)); diff --git a/java/rocksjni/trace_writer.cc b/java/forstjni/trace_writer.cc similarity index 72% rename from java/rocksjni/trace_writer.cc rename to java/forstjni/trace_writer.cc index d58276399..8152a0b17 100644 --- a/java/rocksjni/trace_writer.cc +++ b/java/forstjni/trace_writer.cc @@ -8,16 +8,16 @@ #include -#include "include/org_rocksdb_AbstractTraceWriter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/trace_writer_jnicallback.h" +#include "include/org_forstdb_AbstractTraceWriter.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/trace_writer_jnicallback.h" /* - * Class: org_rocksdb_AbstractTraceWriter + * Class: org_forstdb_AbstractTraceWriter * Method: createNewTraceWriter * Signature: ()J */ -jlong Java_org_rocksdb_AbstractTraceWriter_createNewTraceWriter(JNIEnv* env, +jlong Java_org_forstdb_AbstractTraceWriter_createNewTraceWriter(JNIEnv* env, jobject jobj) { auto* trace_writer = new ROCKSDB_NAMESPACE::TraceWriterJniCallback(env, jobj); return GET_CPLUSPLUS_POINTER(trace_writer); diff --git a/java/rocksjni/trace_writer_jnicallback.cc b/java/forstjni/trace_writer_jnicallback.cc similarity index 97% rename from java/rocksjni/trace_writer_jnicallback.cc rename to java/forstjni/trace_writer_jnicallback.cc index d1ed32038..88934f269 100644 --- a/java/rocksjni/trace_writer_jnicallback.cc +++ b/java/forstjni/trace_writer_jnicallback.cc @@ -6,9 +6,9 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TraceWriter. -#include "rocksjni/trace_writer_jnicallback.h" +#include "forstjni/trace_writer_jnicallback.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { TraceWriterJniCallback::TraceWriterJniCallback(JNIEnv* env, diff --git a/java/rocksjni/trace_writer_jnicallback.h b/java/forstjni/trace_writer_jnicallback.h similarity index 96% rename from java/rocksjni/trace_writer_jnicallback.h rename to java/forstjni/trace_writer_jnicallback.h index c82a3a72c..bb9ee895d 100644 --- a/java/rocksjni/trace_writer_jnicallback.h +++ b/java/forstjni/trace_writer_jnicallback.h @@ -14,7 +14,7 @@ #include #include "rocksdb/trace_reader_writer.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/transaction.cc b/java/forstjni/transaction.cc similarity index 88% rename from java/rocksjni/transaction.cc rename to java/forstjni/transaction.cc index 3e90db8bc..5212d2ad8 100644 --- a/java/rocksjni/transaction.cc +++ b/java/forstjni/transaction.cc @@ -12,10 +12,10 @@ #include -#include "include/org_rocksdb_Transaction.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/kv_helper.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_Transaction.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/kv_helper.h" +#include "forstjni/portal.h" #if defined(_MSC_VER) #pragma warning(push) @@ -24,33 +24,33 @@ #endif /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setSnapshot * Signature: (J)V */ -void Java_org_rocksdb_Transaction_setSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_Transaction_setSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->SetSnapshot(); } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setSnapshotOnNextOperation * Signature: (J)V */ -void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__J( +void Java_org_forstdb_Transaction_setSnapshotOnNextOperation__J( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); txn->SetSnapshotOnNextOperation(nullptr); } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setSnapshotOnNextOperation * Signature: (JJ)V */ -void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__JJ( +void Java_org_forstdb_Transaction_setSnapshotOnNextOperation__JJ( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jtxn_notifier_handle) { auto* txn = reinterpret_cast(jhandle); @@ -61,11 +61,11 @@ void Java_org_rocksdb_Transaction_setSnapshotOnNextOperation__JJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getSnapshot * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getSnapshot(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -74,11 +74,11 @@ jlong Java_org_rocksdb_Transaction_getSnapshot(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: clearSnapshot * Signature: (J)V */ -void Java_org_rocksdb_Transaction_clearSnapshot(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_clearSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -86,11 +86,11 @@ void Java_org_rocksdb_Transaction_clearSnapshot(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: prepare * Signature: (J)V */ -void Java_org_rocksdb_Transaction_prepare(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_prepare(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->Prepare(); @@ -100,11 +100,11 @@ void Java_org_rocksdb_Transaction_prepare(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: commit * Signature: (J)V */ -void Java_org_rocksdb_Transaction_commit(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_commit(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->Commit(); @@ -114,11 +114,11 @@ void Java_org_rocksdb_Transaction_commit(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: rollback * Signature: (J)V */ -void Java_org_rocksdb_Transaction_rollback(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_rollback(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Status s = txn->Rollback(); @@ -128,11 +128,11 @@ void Java_org_rocksdb_Transaction_rollback(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setSavePoint * Signature: (J)V */ -void Java_org_rocksdb_Transaction_setSavePoint(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_setSavePoint(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -140,11 +140,11 @@ void Java_org_rocksdb_Transaction_setSavePoint(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: rollbackToSavePoint * Signature: (J)V */ -void Java_org_rocksdb_Transaction_rollbackToSavePoint(JNIEnv* env, +void Java_org_forstdb_Transaction_rollbackToSavePoint(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -160,11 +160,11 @@ typedef std::function(jhandle); @@ -209,11 +209,11 @@ jbyteArray Java_org_rocksdb_Transaction_get__JJ_3BII( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: get * Signature: (JJ[BII[BIIJ)I */ -jint Java_org_rocksdb_Transaction_get__JJ_3BII_3BIIJ( +jint Java_org_forstdb_Transaction_get__JJ_3BII_3BIIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_part_len, jlong jcolumn_family_handle) { @@ -237,11 +237,11 @@ jint Java_org_rocksdb_Transaction_get__JJ_3BII_3BIIJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getDirect * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I */ -jint Java_org_rocksdb_Transaction_getDirect(JNIEnv* env, jobject, jlong jhandle, +jint Java_org_forstdb_Transaction_getDirect(JNIEnv* env, jobject, jlong jhandle, jlong jread_options_handle, jobject jkey_bb, jint jkey_off, jint jkey_part_len, jobject jval_bb, @@ -268,7 +268,7 @@ jint Java_org_rocksdb_Transaction_getDirect(JNIEnv* env, jobject, jlong jhandle, } } -// TODO(AR) consider refactoring to share this between here and rocksjni.cc +// TODO(AR) consider refactoring to share this between here and forstjni.cc // used by txn_multi_get_helper below std::vector txn_column_families_helper( JNIEnv* env, jlongArray jcolumn_family_handles, bool* has_exception) { @@ -317,7 +317,7 @@ void free_key_values(std::vector& keys_to_free) { } } -// TODO(AR) consider refactoring to share this between here and rocksjni.cc +// TODO(AR) consider refactoring to share this between here and forstjni.cc // cf multi get jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get, const jlong& jread_options_handle, @@ -403,11 +403,11 @@ jobjectArray txn_multi_get_helper(JNIEnv* env, const FnMultiGet& fn_multi_get, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: multiGet * Signature: (JJ[[B[J)[[B */ -jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B_3J( +jobjectArray Java_org_forstdb_Transaction_multiGet__JJ_3_3B_3J( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts, jlongArray jcolumn_family_handles) { bool has_exception = false; @@ -431,11 +431,11 @@ jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B_3J( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: multiGet * Signature: (JJ[[B)[[B */ -jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B( +jobjectArray Java_org_forstdb_Transaction_multiGet__JJ_3_3B( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts) { auto* txn = reinterpret_cast(jhandle); @@ -450,11 +450,11 @@ jobjectArray Java_org_rocksdb_Transaction_multiGet__JJ_3_3B( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getForUpdate * Signature: (JJ[BIIJZZ)[B */ -jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIIJZZ( +jbyteArray Java_org_forstdb_Transaction_getForUpdate__JJ_3BIIJZZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jlong jcolumn_family_handle, jboolean jexclusive, jboolean jdo_validate) { @@ -478,11 +478,11 @@ jbyteArray Java_org_rocksdb_Transaction_getForUpdate__JJ_3BIIJZZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getForUpdate * Signature: (JJ[BII[BIIJZZ)I */ -jint Java_org_rocksdb_Transaction_getForUpdate__JJ_3BII_3BIIJZZ( +jint Java_org_forstdb_Transaction_getForUpdate__JJ_3BII_3BIIJZZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcolumn_family_handle, @@ -508,11 +508,11 @@ jint Java_org_rocksdb_Transaction_getForUpdate__JJ_3BII_3BIIJZZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getDirectForUpdate * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZZ)I */ -jint Java_org_rocksdb_Transaction_getDirectForUpdate( +jint Java_org_forstdb_Transaction_getDirectForUpdate( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobject jkey_bb, jint jkey_off, jint jkey_part_len, jobject jval_bb, jint jval_off, jint jval_len, jlong jcolumn_family_handle, @@ -540,11 +540,11 @@ jint Java_org_rocksdb_Transaction_getDirectForUpdate( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: multiGetForUpdate * Signature: (JJ[[B[J)[[B */ -jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B_3J( +jobjectArray Java_org_forstdb_Transaction_multiGetForUpdate__JJ_3_3B_3J( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts, jlongArray jcolumn_family_handles) { bool has_exception = false; @@ -569,11 +569,11 @@ jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B_3J( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: multiGetForUpdate * Signature: (JJ[[B)[[B */ -jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B( +jobjectArray Java_org_forstdb_Transaction_multiGetForUpdate__JJ_3_3B( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jobjectArray jkey_parts) { auto* txn = reinterpret_cast(jhandle); @@ -588,11 +588,11 @@ jobjectArray Java_org_rocksdb_Transaction_multiGetForUpdate__JJ_3_3B( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getIterator * Signature: (JJJ)J */ -jlong Java_org_rocksdb_Transaction_getIterator(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getIterator(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jread_options_handle, jlong jcolumn_family_handle) { @@ -607,11 +607,11 @@ jlong Java_org_rocksdb_Transaction_getIterator(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: put * Signature: (J[BII[BIIJZ)V */ -void Java_org_rocksdb_Transaction_put__J_3BII_3BIIJZ( +void Java_org_forstdb_Transaction_put__J_3BII_3BIIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -631,11 +631,11 @@ void Java_org_rocksdb_Transaction_put__J_3BII_3BIIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: put * Signature: (J[BII[BII)V */ -void Java_org_rocksdb_Transaction_put__J_3BII_3BII( +void Java_org_forstdb_Transaction_put__J_3BII_3BII( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_len) { @@ -651,11 +651,11 @@ void Java_org_rocksdb_Transaction_put__J_3BII_3BII( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZ)V */ -void Java_org_rocksdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ( +void Java_org_forstdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ( JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -676,11 +676,11 @@ void Java_org_rocksdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II( +void Java_org_forstdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II( JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len) { auto* txn = reinterpret_cast(jhandle); @@ -699,7 +699,7 @@ typedef std::function FnWriteKVParts; -// TODO(AR) consider refactoring to share this between here and rocksjni.cc +// TODO(AR) consider refactoring to share this between here and forstjni.cc void txn_write_kv_parts_helper(JNIEnv* env, const FnWriteKVParts& fn_write_kv_parts, const jobjectArray& jkey_parts, @@ -799,11 +799,11 @@ void txn_write_kv_parts_helper(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: put * Signature: (J[[BI[[BIJZ)V */ -void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BIJZ( +void Java_org_forstdb_Transaction_put__J_3_3BI_3_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -823,11 +823,11 @@ void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: put * Signature: (J[[BI[[BI)V */ -void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BI( +void Java_org_forstdb_Transaction_put__J_3_3BI_3_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len) { auto* txn = reinterpret_cast(jhandle); @@ -841,11 +841,11 @@ void Java_org_rocksdb_Transaction_put__J_3_3BI_3_3BI( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: merge * Signature: (J[BII[BIIJZ)V */ -void Java_org_rocksdb_Transaction_merge__J_3BII_3BIIJZ( +void Java_org_forstdb_Transaction_merge__J_3BII_3BIIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -865,11 +865,11 @@ void Java_org_rocksdb_Transaction_merge__J_3BII_3BIIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: merge * Signature: (J[BII[BII)V */ -void Java_org_rocksdb_Transaction_merge__J_3BII_3BII( +void Java_org_forstdb_Transaction_merge__J_3BII_3BII( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_len) { @@ -885,12 +885,12 @@ void Java_org_rocksdb_Transaction_merge__J_3BII_3BII( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: mergeDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZ)V */ JNIEXPORT void JNICALL -Java_org_rocksdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ( +Java_org_forstdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ( JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -911,12 +911,12 @@ Java_org_rocksdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_Byt } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: mergeDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V */ JNIEXPORT void JNICALL -Java_org_rocksdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II( +Java_org_forstdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II( JNIEnv* env, jobject, jlong jhandle, jobject jkey_bb, jint jkey_off, jint jkey_len, jobject jval_bb, jint jval_off, jint jval_len) { auto* txn = reinterpret_cast(jhandle); @@ -935,7 +935,7 @@ typedef std::function FnWriteK; -// TODO(AR) consider refactoring to share this between here and rocksjni.cc +// TODO(AR) consider refactoring to share this between here and forstjni.cc void txn_write_k_helper(JNIEnv* env, const FnWriteK& fn_write_k, const jbyteArray& jkey, const jint& jkey_part_len) { jbyte* key = env->GetByteArrayElements(jkey, nullptr); @@ -960,11 +960,11 @@ void txn_write_k_helper(JNIEnv* env, const FnWriteK& fn_write_k, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: delete * Signature: (J[BIJZ)V */ -void Java_org_rocksdb_Transaction_delete__J_3BIJZ( +void Java_org_forstdb_Transaction_delete__J_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); @@ -981,11 +981,11 @@ void Java_org_rocksdb_Transaction_delete__J_3BIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: delete * Signature: (J[BI)V */ -void Java_org_rocksdb_Transaction_delete__J_3BI(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_delete__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); @@ -999,7 +999,7 @@ typedef std::function FnWriteKParts; -// TODO(AR) consider refactoring to share this between here and rocksjni.cc +// TODO(AR) consider refactoring to share this between here and forstjni.cc void txn_write_k_parts_helper(JNIEnv* env, const FnWriteKParts& fn_write_k_parts, const jobjectArray& jkey_parts, @@ -1048,11 +1048,11 @@ void txn_write_k_parts_helper(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: delete * Signature: (J[[BIJZ)V */ -void Java_org_rocksdb_Transaction_delete__J_3_3BIJZ( +void Java_org_forstdb_Transaction_delete__J_3_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -1070,11 +1070,11 @@ void Java_org_rocksdb_Transaction_delete__J_3_3BIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: delete * Signature: (J[[BI)V */ -void Java_org_rocksdb_Transaction_delete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_delete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len) { @@ -1086,11 +1086,11 @@ void Java_org_rocksdb_Transaction_delete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: singleDelete * Signature: (J[BIJZ)V */ -void Java_org_rocksdb_Transaction_singleDelete__J_3BIJZ( +void Java_org_forstdb_Transaction_singleDelete__J_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { auto* txn = reinterpret_cast(jhandle); @@ -1107,11 +1107,11 @@ void Java_org_rocksdb_Transaction_singleDelete__J_3BIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: singleDelete * Signature: (J[BI)V */ -void Java_org_rocksdb_Transaction_singleDelete__J_3BI(JNIEnv* env, +void Java_org_forstdb_Transaction_singleDelete__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, @@ -1125,11 +1125,11 @@ void Java_org_rocksdb_Transaction_singleDelete__J_3BI(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: singleDelete * Signature: (J[[BIJZ)V */ -void Java_org_rocksdb_Transaction_singleDelete__J_3_3BIJZ( +void Java_org_forstdb_Transaction_singleDelete__J_3_3BIJZ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jlong jcolumn_family_handle, jboolean jassume_tracked) { @@ -1148,11 +1148,11 @@ void Java_org_rocksdb_Transaction_singleDelete__J_3_3BIJZ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: singleDelete * Signature: (J[[BI)V */ -void Java_org_rocksdb_Transaction_singleDelete__J_3_3BI(JNIEnv* env, +void Java_org_forstdb_Transaction_singleDelete__J_3_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, @@ -1167,11 +1167,11 @@ void Java_org_rocksdb_Transaction_singleDelete__J_3_3BI(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putUntracked * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BIJ( +void Java_org_forstdb_Transaction_putUntracked__J_3BI_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len, jlong jcolumn_family_handle) { @@ -1191,11 +1191,11 @@ void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BIJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putUntracked * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BI( +void Java_org_forstdb_Transaction_putUntracked__J_3BI_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jbyteArray jval, jint jval_len) { auto* txn = reinterpret_cast(jhandle); @@ -1210,11 +1210,11 @@ void Java_org_rocksdb_Transaction_putUntracked__J_3BI_3BI( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putUntracked * Signature: (J[[BI[[BIJ)V */ -void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BIJ( +void Java_org_forstdb_Transaction_putUntracked__J_3_3BI_3_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len, jlong jcolumn_family_handle) { @@ -1233,11 +1233,11 @@ void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BIJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putUntracked * Signature: (J[[BI[[BI)V */ -void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BI( +void Java_org_forstdb_Transaction_putUntracked__J_3_3BI_3_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jobjectArray jvalue_parts, jint jvalue_parts_len) { auto* txn = reinterpret_cast(jhandle); @@ -1251,11 +1251,11 @@ void Java_org_rocksdb_Transaction_putUntracked__J_3_3BI_3_3BI( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: mergeUntracked * Signature: (J[BII[BIIJ)V */ -void Java_org_rocksdb_Transaction_mergeUntracked( +void Java_org_forstdb_Transaction_mergeUntracked( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_off, jint jkey_part_len, jbyteArray jval, jint jval_off, jint jval_len, jlong jcolumn_family_handle) { @@ -1275,11 +1275,11 @@ void Java_org_rocksdb_Transaction_mergeUntracked( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: mergeUntrackedDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_Transaction_mergeUntrackedDirect( +void Java_org_forstdb_Transaction_mergeUntrackedDirect( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobject jkey, jint jkey_off, jint jkey_part_len, jobject jval, jint jval_off, jint jval_len, jlong jcolumn_family_handle) { @@ -1300,11 +1300,11 @@ void Java_org_rocksdb_Transaction_mergeUntrackedDirect( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: deleteUntracked * Signature: (J[BIJ)V */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3BIJ( +void Java_org_forstdb_Transaction_deleteUntracked__J_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); @@ -1320,11 +1320,11 @@ void Java_org_rocksdb_Transaction_deleteUntracked__J_3BIJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: deleteUntracked * Signature: (J[BI)V */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3BI(JNIEnv* env, +void Java_org_forstdb_Transaction_deleteUntracked__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, @@ -1338,11 +1338,11 @@ void Java_org_rocksdb_Transaction_deleteUntracked__J_3BI(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: deleteUntracked * Signature: (J[[BIJ)V */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BIJ( +void Java_org_forstdb_Transaction_deleteUntracked__J_3_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); @@ -1360,11 +1360,11 @@ void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BIJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: deleteUntracked * Signature: (J[[BI)V */ -void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BI( +void Java_org_forstdb_Transaction_deleteUntracked__J_3_3BI( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jobjectArray jkey_parts, jint jkey_parts_len) { auto* txn = reinterpret_cast(jhandle); @@ -1378,11 +1378,11 @@ void Java_org_rocksdb_Transaction_deleteUntracked__J_3_3BI( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: putLogData * Signature: (J[BI)V */ -void Java_org_rocksdb_Transaction_putLogData(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_putLogData(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len) { auto* txn = reinterpret_cast(jhandle); @@ -1404,11 +1404,11 @@ void Java_org_rocksdb_Transaction_putLogData(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: disableIndexing * Signature: (J)V */ -void Java_org_rocksdb_Transaction_disableIndexing(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_disableIndexing(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1416,11 +1416,11 @@ void Java_org_rocksdb_Transaction_disableIndexing(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: enableIndexing * Signature: (J)V */ -void Java_org_rocksdb_Transaction_enableIndexing(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_enableIndexing(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1428,33 +1428,33 @@ void Java_org_rocksdb_Transaction_enableIndexing(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getNumKeys * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getNumKeys(JNIEnv* /*env*/, jobject /*jobj*/, +jlong Java_org_forstdb_Transaction_getNumKeys(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetNumKeys(); } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getNumPuts * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getNumPuts(JNIEnv* /*env*/, jobject /*jobj*/, +jlong Java_org_forstdb_Transaction_getNumPuts(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); return txn->GetNumPuts(); } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getNumDeletes * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getNumDeletes(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getNumDeletes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1462,11 +1462,11 @@ jlong Java_org_rocksdb_Transaction_getNumDeletes(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getNumMerges * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getNumMerges(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getNumMerges(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1474,11 +1474,11 @@ jlong Java_org_rocksdb_Transaction_getNumMerges(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getElapsedTime * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getElapsedTime(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getElapsedTime(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1486,11 +1486,11 @@ jlong Java_org_rocksdb_Transaction_getElapsedTime(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getWriteBatch * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getWriteBatch(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getWriteBatch(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1498,11 +1498,11 @@ jlong Java_org_rocksdb_Transaction_getWriteBatch(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setLockTimeout * Signature: (JJ)V */ -void Java_org_rocksdb_Transaction_setLockTimeout(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_setLockTimeout(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jlock_timeout) { @@ -1511,11 +1511,11 @@ void Java_org_rocksdb_Transaction_setLockTimeout(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getWriteOptions * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getWriteOptions(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getWriteOptions(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1523,11 +1523,11 @@ jlong Java_org_rocksdb_Transaction_getWriteOptions(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setWriteOptions * Signature: (JJ)V */ -void Java_org_rocksdb_Transaction_setWriteOptions(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_setWriteOptions(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jwrite_options_handle) { @@ -1538,11 +1538,11 @@ void Java_org_rocksdb_Transaction_setWriteOptions(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: undo * Signature: (J[BIJ)V */ -void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BIJ( +void Java_org_forstdb_Transaction_undoGetForUpdate__J_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, jint jkey_part_len, jlong jcolumn_family_handle) { auto* txn = reinterpret_cast(jhandle); @@ -1563,11 +1563,11 @@ void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BIJ( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: undoGetForUpdate * Signature: (J[BI)V */ -void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BI(JNIEnv* env, +void Java_org_forstdb_Transaction_undoGetForUpdate__J_3BI(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jbyteArray jkey, @@ -1587,11 +1587,11 @@ void Java_org_rocksdb_Transaction_undoGetForUpdate__J_3BI(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: rebuildFromWriteBatch * Signature: (JJ)V */ -void Java_org_rocksdb_Transaction_rebuildFromWriteBatch( +void Java_org_forstdb_Transaction_rebuildFromWriteBatch( JNIEnv* env, jobject /*jobj*/, jlong jhandle, jlong jwrite_batch_handle) { auto* txn = reinterpret_cast(jhandle); auto* write_batch = @@ -1603,11 +1603,11 @@ void Java_org_rocksdb_Transaction_rebuildFromWriteBatch( } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getCommitTimeWriteBatch * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getCommitTimeWriteBatch(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getCommitTimeWriteBatch(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1615,11 +1615,11 @@ jlong Java_org_rocksdb_Transaction_getCommitTimeWriteBatch(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setLogNumber * Signature: (JJ)V */ -void Java_org_rocksdb_Transaction_setLogNumber(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_setLogNumber(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jlog_number) { auto* txn = reinterpret_cast(jhandle); @@ -1627,11 +1627,11 @@ void Java_org_rocksdb_Transaction_setLogNumber(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getLogNumber * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getLogNumber(JNIEnv* /*env*/, +jlong Java_org_forstdb_Transaction_getLogNumber(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1639,11 +1639,11 @@ jlong Java_org_rocksdb_Transaction_getLogNumber(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: setName * Signature: (JLjava/lang/String;)V */ -void Java_org_rocksdb_Transaction_setName(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_Transaction_setName(JNIEnv* env, jobject /*jobj*/, jlong jhandle, jstring jname) { auto* txn = reinterpret_cast(jhandle); const char* name = env->GetStringUTFChars(jname, nullptr); @@ -1662,11 +1662,11 @@ void Java_org_rocksdb_Transaction_setName(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getName * Signature: (J)Ljava/lang/String; */ -jstring Java_org_rocksdb_Transaction_getName(JNIEnv* env, jobject /*jobj*/, +jstring Java_org_forstdb_Transaction_getName(JNIEnv* env, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TransactionName name = txn->GetName(); @@ -1674,11 +1674,11 @@ jstring Java_org_rocksdb_Transaction_getName(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getID * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getID(JNIEnv* /*env*/, jobject /*jobj*/, +jlong Java_org_forstdb_Transaction_getID(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::TransactionID id = txn->GetID(); @@ -1686,11 +1686,11 @@ jlong Java_org_rocksdb_Transaction_getID(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: isDeadlockDetect * Signature: (J)Z */ -jboolean Java_org_rocksdb_Transaction_isDeadlockDetect(JNIEnv* /*env*/, +jboolean Java_org_forstdb_Transaction_isDeadlockDetect(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1698,11 +1698,11 @@ jboolean Java_org_rocksdb_Transaction_isDeadlockDetect(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getWaitingTxns * Signature: (J)Lorg/rocksdb/Transaction/WaitingTransactions; */ -jobject Java_org_rocksdb_Transaction_getWaitingTxns(JNIEnv* env, +jobject Java_org_forstdb_Transaction_getWaitingTxns(JNIEnv* env, jobject jtransaction_obj, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); @@ -1717,11 +1717,11 @@ jobject Java_org_rocksdb_Transaction_getWaitingTxns(JNIEnv* env, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getState * Signature: (J)B */ -jbyte Java_org_rocksdb_Transaction_getState(JNIEnv* /*env*/, jobject /*jobj*/, +jbyte Java_org_forstdb_Transaction_getState(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); ROCKSDB_NAMESPACE::Transaction::TransactionState txn_status = txn->GetState(); @@ -1756,11 +1756,11 @@ jbyte Java_org_rocksdb_Transaction_getState(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: getId * Signature: (J)J */ -jlong Java_org_rocksdb_Transaction_getId(JNIEnv* /*env*/, jobject /*jobj*/, +jlong Java_org_forstdb_Transaction_getId(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* txn = reinterpret_cast(jhandle); uint64_t id = txn->GetId(); @@ -1768,11 +1768,11 @@ jlong Java_org_rocksdb_Transaction_getId(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_Transaction + * Class: org_forstdb_Transaction * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_Transaction_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_Transaction_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); diff --git a/java/rocksjni/transaction_db.cc b/java/forstjni/transaction_db.cc similarity index 91% rename from java/rocksjni/transaction_db.cc rename to java/forstjni/transaction_db.cc index 0adf85606..0d8ae20c6 100644 --- a/java/rocksjni/transaction_db.cc +++ b/java/forstjni/transaction_db.cc @@ -14,18 +14,18 @@ #include #include -#include "include/org_rocksdb_TransactionDB.h" +#include "include/org_forstdb_TransactionDB.h" #include "rocksdb/options.h" #include "rocksdb/utilities/transaction.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: open * Signature: (JJLjava/lang/String;)J */ -jlong Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2( +jlong Java_org_forstdb_TransactionDB_open__JJLjava_lang_String_2( JNIEnv* env, jclass, jlong joptions_handle, jlong jtxn_db_options_handle, jstring jdb_path) { auto* options = @@ -52,11 +52,11 @@ jlong Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: open * Signature: (JJLjava/lang/String;[[B[J)[J */ -jlongArray Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2_3_3B_3J( +jlongArray Java_org_forstdb_TransactionDB_open__JJLjava_lang_String_2_3_3B_3J( JNIEnv* env, jclass, jlong jdb_options_handle, jlong jtxn_db_options_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options_handles) { @@ -143,11 +143,11 @@ jlongArray Java_org_rocksdb_TransactionDB_open__JJLjava_lang_String_2_3_3B_3J( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_TransactionDB_disposeInternal(JNIEnv*, jobject, +void Java_org_forstdb_TransactionDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); assert(txn_db != nullptr); @@ -155,11 +155,11 @@ void Java_org_rocksdb_TransactionDB_disposeInternal(JNIEnv*, jobject, } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: closeDatabase * Signature: (J)V */ -void Java_org_rocksdb_TransactionDB_closeDatabase(JNIEnv* env, jclass, +void Java_org_forstdb_TransactionDB_closeDatabase(JNIEnv* env, jclass, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); assert(txn_db != nullptr); @@ -168,11 +168,11 @@ void Java_org_rocksdb_TransactionDB_closeDatabase(JNIEnv* env, jclass, } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: beginTransaction * Signature: (JJ)J */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJ( +jlong Java_org_forstdb_TransactionDB_beginTransaction__JJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle) { auto* txn_db = reinterpret_cast(jhandle); auto* write_options = @@ -183,11 +183,11 @@ jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJ( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: beginTransaction * Signature: (JJJ)J */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJJ( +jlong Java_org_forstdb_TransactionDB_beginTransaction__JJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jtxn_options_handle) { auto* txn_db = reinterpret_cast(jhandle); @@ -201,11 +201,11 @@ jlong Java_org_rocksdb_TransactionDB_beginTransaction__JJJ( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: beginTransaction_withOld * Signature: (JJJ)J */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJ( +jlong Java_org_forstdb_TransactionDB_beginTransaction_1withOld__JJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jold_txn_handle) { auto* txn_db = reinterpret_cast(jhandle); @@ -226,11 +226,11 @@ jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJ( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: beginTransaction_withOld * Signature: (JJJJ)J */ -jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJJ( +jlong Java_org_forstdb_TransactionDB_beginTransaction_1withOld__JJJJ( JNIEnv*, jobject, jlong jhandle, jlong jwrite_options_handle, jlong jtxn_options_handle, jlong jold_txn_handle) { auto* txn_db = reinterpret_cast(jhandle); @@ -252,11 +252,11 @@ jlong Java_org_rocksdb_TransactionDB_beginTransaction_1withOld__JJJJ( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: getTransactionByName * Signature: (JLjava/lang/String;)J */ -jlong Java_org_rocksdb_TransactionDB_getTransactionByName(JNIEnv* env, jobject, +jlong Java_org_forstdb_TransactionDB_getTransactionByName(JNIEnv* env, jobject, jlong jhandle, jstring jname) { auto* txn_db = reinterpret_cast(jhandle); @@ -271,11 +271,11 @@ jlong Java_org_rocksdb_TransactionDB_getTransactionByName(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: getAllPreparedTransactions * Signature: (J)[J */ -jlongArray Java_org_rocksdb_TransactionDB_getAllPreparedTransactions( +jlongArray Java_org_forstdb_TransactionDB_getAllPreparedTransactions( JNIEnv* env, jobject, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); std::vector txns; @@ -306,11 +306,11 @@ jlongArray Java_org_rocksdb_TransactionDB_getAllPreparedTransactions( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: getLockStatusData * Signature: (J)Ljava/util/Map; */ -jobject Java_org_rocksdb_TransactionDB_getLockStatusData(JNIEnv* env, jobject, +jobject Java_org_forstdb_TransactionDB_getLockStatusData(JNIEnv* env, jobject, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); const std::unordered_multimap @@ -355,11 +355,11 @@ jobject Java_org_rocksdb_TransactionDB_getLockStatusData(JNIEnv* env, jobject, } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: getDeadlockInfoBuffer * Signature: (J)[Lorg/rocksdb/TransactionDB/DeadlockPath; */ -jobjectArray Java_org_rocksdb_TransactionDB_getDeadlockInfoBuffer( +jobjectArray Java_org_forstdb_TransactionDB_getDeadlockInfoBuffer( JNIEnv* env, jobject jobj, jlong jhandle) { auto* txn_db = reinterpret_cast(jhandle); const std::vector deadlock_info_buffer = @@ -440,11 +440,11 @@ jobjectArray Java_org_rocksdb_TransactionDB_getDeadlockInfoBuffer( } /* - * Class: org_rocksdb_TransactionDB + * Class: org_forstdb_TransactionDB * Method: setDeadlockInfoBufferSize * Signature: (JI)V */ -void Java_org_rocksdb_TransactionDB_setDeadlockInfoBufferSize( +void Java_org_forstdb_TransactionDB_setDeadlockInfoBufferSize( JNIEnv*, jobject, jlong jhandle, jint jdeadlock_info_buffer_size) { auto* txn_db = reinterpret_cast(jhandle); txn_db->SetDeadlockInfoBufferSize(jdeadlock_info_buffer_size); diff --git a/java/rocksjni/transaction_db_options.cc b/java/forstjni/transaction_db_options.cc similarity index 75% rename from java/rocksjni/transaction_db_options.cc rename to java/forstjni/transaction_db_options.cc index 4cf27121e..d908ad37d 100644 --- a/java/rocksjni/transaction_db_options.cc +++ b/java/forstjni/transaction_db_options.cc @@ -8,17 +8,17 @@ #include -#include "include/org_rocksdb_TransactionDBOptions.h" +#include "include/org_forstdb_TransactionDBOptions.h" #include "rocksdb/utilities/transaction_db.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: newTransactionDBOptions * Signature: ()J */ -jlong Java_org_rocksdb_TransactionDBOptions_newTransactionDBOptions( +jlong Java_org_forstdb_TransactionDBOptions_newTransactionDBOptions( JNIEnv* /*env*/, jclass /*jcls*/) { ROCKSDB_NAMESPACE::TransactionDBOptions* opts = new ROCKSDB_NAMESPACE::TransactionDBOptions(); @@ -26,11 +26,11 @@ jlong Java_org_rocksdb_TransactionDBOptions_newTransactionDBOptions( } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: getMaxNumLocks * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionDBOptions_getMaxNumLocks(JNIEnv* /*env*/, +jlong Java_org_forstdb_TransactionDBOptions_getMaxNumLocks(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -39,11 +39,11 @@ jlong Java_org_rocksdb_TransactionDBOptions_getMaxNumLocks(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: setMaxNumLocks * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionDBOptions_setMaxNumLocks( +void Java_org_forstdb_TransactionDBOptions_setMaxNumLocks( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_num_locks) { auto* opts = reinterpret_cast(jhandle); @@ -51,11 +51,11 @@ void Java_org_rocksdb_TransactionDBOptions_setMaxNumLocks( } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: getNumStripes * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionDBOptions_getNumStripes(JNIEnv* /*env*/, +jlong Java_org_forstdb_TransactionDBOptions_getNumStripes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -64,11 +64,11 @@ jlong Java_org_rocksdb_TransactionDBOptions_getNumStripes(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: setNumStripes * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionDBOptions_setNumStripes(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionDBOptions_setNumStripes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jnum_stripes) { @@ -78,11 +78,11 @@ void Java_org_rocksdb_TransactionDBOptions_setNumStripes(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: getTransactionLockTimeout * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionDBOptions_getTransactionLockTimeout( +jlong Java_org_forstdb_TransactionDBOptions_getTransactionLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -90,11 +90,11 @@ jlong Java_org_rocksdb_TransactionDBOptions_getTransactionLockTimeout( } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: setTransactionLockTimeout * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionDBOptions_setTransactionLockTimeout( +void Java_org_forstdb_TransactionDBOptions_setTransactionLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jtransaction_lock_timeout) { auto* opts = @@ -103,11 +103,11 @@ void Java_org_rocksdb_TransactionDBOptions_setTransactionLockTimeout( } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: getDefaultLockTimeout * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionDBOptions_getDefaultLockTimeout( +jlong Java_org_forstdb_TransactionDBOptions_getDefaultLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -115,11 +115,11 @@ jlong Java_org_rocksdb_TransactionDBOptions_getDefaultLockTimeout( } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: setDefaultLockTimeout * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionDBOptions_setDefaultLockTimeout( +void Java_org_forstdb_TransactionDBOptions_setDefaultLockTimeout( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdefault_lock_timeout) { auto* opts = @@ -128,11 +128,11 @@ void Java_org_rocksdb_TransactionDBOptions_setDefaultLockTimeout( } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: getWritePolicy * Signature: (J)B */ -jbyte Java_org_rocksdb_TransactionDBOptions_getWritePolicy(JNIEnv* /*env*/, +jbyte Java_org_forstdb_TransactionDBOptions_getWritePolicy(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -142,11 +142,11 @@ jbyte Java_org_rocksdb_TransactionDBOptions_getWritePolicy(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: setWritePolicy * Signature: (JB)V */ -void Java_org_rocksdb_TransactionDBOptions_setWritePolicy(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionDBOptions_setWritePolicy(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jbyte jwrite_policy) { @@ -158,11 +158,11 @@ void Java_org_rocksdb_TransactionDBOptions_setWritePolicy(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionDBOptions + * Class: org_forstdb_TransactionDBOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_TransactionDBOptions_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionDBOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); diff --git a/java/rocksjni/transaction_log.cc b/java/forstjni/transaction_log.cc similarity index 77% rename from java/rocksjni/transaction_log.cc rename to java/forstjni/transaction_log.cc index 97c3bb301..8ddc64322 100644 --- a/java/rocksjni/transaction_log.cc +++ b/java/forstjni/transaction_log.cc @@ -12,26 +12,26 @@ #include #include -#include "include/org_rocksdb_TransactionLogIterator.h" -#include "rocksjni/portal.h" +#include "include/org_forstdb_TransactionLogIterator.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_TransactionLogIterator + * Class: org_forstdb_TransactionLogIterator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_TransactionLogIterator_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionLogIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { delete reinterpret_cast(handle); } /* - * Class: org_rocksdb_TransactionLogIterator + * Class: org_forstdb_TransactionLogIterator * Method: isValid * Signature: (J)Z */ -jboolean Java_org_rocksdb_TransactionLogIterator_isValid(JNIEnv* /*env*/, +jboolean Java_org_forstdb_TransactionLogIterator_isValid(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle) @@ -39,22 +39,22 @@ jboolean Java_org_rocksdb_TransactionLogIterator_isValid(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionLogIterator + * Class: org_forstdb_TransactionLogIterator * Method: next * Signature: (J)V */ -void Java_org_rocksdb_TransactionLogIterator_next(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionLogIterator_next(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* - * Class: org_rocksdb_TransactionLogIterator + * Class: org_forstdb_TransactionLogIterator * Method: status * Signature: (J)V */ -void Java_org_rocksdb_TransactionLogIterator_status(JNIEnv* env, +void Java_org_forstdb_TransactionLogIterator_status(JNIEnv* env, jobject /*jobj*/, jlong handle) { ROCKSDB_NAMESPACE::Status s = @@ -66,11 +66,11 @@ void Java_org_rocksdb_TransactionLogIterator_status(JNIEnv* env, } /* - * Class: org_rocksdb_TransactionLogIterator + * Class: org_forstdb_TransactionLogIterator * Method: getBatch * Signature: (J)Lorg/rocksdb/TransactionLogIterator$BatchResult */ -jobject Java_org_rocksdb_TransactionLogIterator_getBatch(JNIEnv* env, +jobject Java_org_forstdb_TransactionLogIterator_getBatch(JNIEnv* env, jobject /*jobj*/, jlong handle) { ROCKSDB_NAMESPACE::BatchResult batch_result = diff --git a/java/rocksjni/transaction_notifier.cc b/java/forstjni/transaction_notifier.cc similarity index 76% rename from java/rocksjni/transaction_notifier.cc rename to java/forstjni/transaction_notifier.cc index cefeb648a..1a556460c 100644 --- a/java/rocksjni/transaction_notifier.cc +++ b/java/forstjni/transaction_notifier.cc @@ -8,16 +8,16 @@ #include -#include "include/org_rocksdb_AbstractTransactionNotifier.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/transaction_notifier_jnicallback.h" +#include "include/org_forstdb_AbstractTransactionNotifier.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/transaction_notifier_jnicallback.h" /* - * Class: org_rocksdb_AbstractTransactionNotifier + * Class: org_forstdb_AbstractTransactionNotifier * Method: createNewTransactionNotifier * Signature: ()J */ -jlong Java_org_rocksdb_AbstractTransactionNotifier_createNewTransactionNotifier( +jlong Java_org_forstdb_AbstractTransactionNotifier_createNewTransactionNotifier( JNIEnv* env, jobject jobj) { auto* transaction_notifier = new ROCKSDB_NAMESPACE::TransactionNotifierJniCallback(env, jobj); @@ -28,11 +28,11 @@ jlong Java_org_rocksdb_AbstractTransactionNotifier_createNewTransactionNotifier( } /* - * Class: org_rocksdb_AbstractTransactionNotifier + * Class: org_forstdb_AbstractTransactionNotifier * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_AbstractTransactionNotifier_disposeInternal( +void Java_org_forstdb_AbstractTransactionNotifier_disposeInternal( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { // TODO(AR) refactor to use JniCallback::JniCallback // when https://github.com/facebook/rocksdb/pull/1241/ is merged diff --git a/java/rocksjni/transaction_notifier_jnicallback.cc b/java/forstjni/transaction_notifier_jnicallback.cc similarity index 90% rename from java/rocksjni/transaction_notifier_jnicallback.cc rename to java/forstjni/transaction_notifier_jnicallback.cc index 26761cabd..abd133e3b 100644 --- a/java/rocksjni/transaction_notifier_jnicallback.cc +++ b/java/forstjni/transaction_notifier_jnicallback.cc @@ -6,10 +6,10 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::TransactionNotifier. -#include "rocksjni/transaction_notifier_jnicallback.h" +#include "forstjni/transaction_notifier_jnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/transaction_notifier_jnicallback.h b/java/forstjni/transaction_notifier_jnicallback.h similarity index 97% rename from java/rocksjni/transaction_notifier_jnicallback.h rename to java/forstjni/transaction_notifier_jnicallback.h index 089a5ee4a..d31c7b22e 100644 --- a/java/rocksjni/transaction_notifier_jnicallback.h +++ b/java/forstjni/transaction_notifier_jnicallback.h @@ -12,7 +12,7 @@ #include #include "rocksdb/utilities/transaction.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/transaction_options.cc b/java/forstjni/transaction_options.cc similarity index 75% rename from java/rocksjni/transaction_options.cc rename to java/forstjni/transaction_options.cc index dcf363e14..8cf3339c9 100644 --- a/java/rocksjni/transaction_options.cc +++ b/java/forstjni/transaction_options.cc @@ -8,27 +8,27 @@ #include -#include "include/org_rocksdb_TransactionOptions.h" +#include "include/org_forstdb_TransactionOptions.h" #include "rocksdb/utilities/transaction_db.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: newTransactionOptions * Signature: ()J */ -jlong Java_org_rocksdb_TransactionOptions_newTransactionOptions( +jlong Java_org_forstdb_TransactionOptions_newTransactionOptions( JNIEnv* /*env*/, jclass /*jcls*/) { auto* opts = new ROCKSDB_NAMESPACE::TransactionOptions(); return GET_CPLUSPLUS_POINTER(opts); } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: isSetSnapshot * Signature: (J)Z */ -jboolean Java_org_rocksdb_TransactionOptions_isSetSnapshot(JNIEnv* /*env*/, +jboolean Java_org_forstdb_TransactionOptions_isSetSnapshot(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -37,11 +37,11 @@ jboolean Java_org_rocksdb_TransactionOptions_isSetSnapshot(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: setSetSnapshot * Signature: (JZ)V */ -void Java_org_rocksdb_TransactionOptions_setSetSnapshot( +void Java_org_forstdb_TransactionOptions_setSetSnapshot( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jset_snapshot) { auto* opts = reinterpret_cast(jhandle); @@ -49,11 +49,11 @@ void Java_org_rocksdb_TransactionOptions_setSetSnapshot( } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: isDeadlockDetect * Signature: (J)Z */ -jboolean Java_org_rocksdb_TransactionOptions_isDeadlockDetect(JNIEnv* /*env*/, +jboolean Java_org_forstdb_TransactionOptions_isDeadlockDetect(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -62,11 +62,11 @@ jboolean Java_org_rocksdb_TransactionOptions_isDeadlockDetect(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: setDeadlockDetect * Signature: (JZ)V */ -void Java_org_rocksdb_TransactionOptions_setDeadlockDetect( +void Java_org_forstdb_TransactionOptions_setDeadlockDetect( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jboolean jdeadlock_detect) { auto* opts = @@ -75,11 +75,11 @@ void Java_org_rocksdb_TransactionOptions_setDeadlockDetect( } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: getLockTimeout * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionOptions_getLockTimeout(JNIEnv* /*env*/, +jlong Java_org_forstdb_TransactionOptions_getLockTimeout(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -88,11 +88,11 @@ jlong Java_org_rocksdb_TransactionOptions_getLockTimeout(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: setLockTimeout * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionOptions_setLockTimeout(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionOptions_setLockTimeout(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jlock_timeout) { @@ -102,11 +102,11 @@ void Java_org_rocksdb_TransactionOptions_setLockTimeout(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: getExpiration * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionOptions_getExpiration(JNIEnv* /*env*/, +jlong Java_org_forstdb_TransactionOptions_getExpiration(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -115,11 +115,11 @@ jlong Java_org_rocksdb_TransactionOptions_getExpiration(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: setExpiration * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionOptions_setExpiration(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionOptions_setExpiration(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jexpiration) { @@ -129,11 +129,11 @@ void Java_org_rocksdb_TransactionOptions_setExpiration(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: getDeadlockDetectDepth * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionOptions_getDeadlockDetectDepth( +jlong Java_org_forstdb_TransactionOptions_getDeadlockDetectDepth( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = reinterpret_cast(jhandle); @@ -141,11 +141,11 @@ jlong Java_org_rocksdb_TransactionOptions_getDeadlockDetectDepth( } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: setDeadlockDetectDepth * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionOptions_setDeadlockDetectDepth( +void Java_org_forstdb_TransactionOptions_setDeadlockDetectDepth( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jdeadlock_detect_depth) { auto* opts = @@ -154,11 +154,11 @@ void Java_org_rocksdb_TransactionOptions_setDeadlockDetectDepth( } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: getMaxWriteBatchSize * Signature: (J)J */ -jlong Java_org_rocksdb_TransactionOptions_getMaxWriteBatchSize(JNIEnv* /*env*/, +jlong Java_org_forstdb_TransactionOptions_getMaxWriteBatchSize(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* opts = @@ -167,11 +167,11 @@ jlong Java_org_rocksdb_TransactionOptions_getMaxWriteBatchSize(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: setMaxWriteBatchSize * Signature: (JJ)V */ -void Java_org_rocksdb_TransactionOptions_setMaxWriteBatchSize( +void Java_org_forstdb_TransactionOptions_setMaxWriteBatchSize( JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle, jlong jmax_write_batch_size) { auto* opts = @@ -180,11 +180,11 @@ void Java_org_rocksdb_TransactionOptions_setMaxWriteBatchSize( } /* - * Class: org_rocksdb_TransactionOptions + * Class: org_forstdb_TransactionOptions * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_TransactionOptions_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_TransactionOptions_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { delete reinterpret_cast(jhandle); diff --git a/java/rocksjni/ttl.cc b/java/forstjni/ttl.cc similarity index 91% rename from java/rocksjni/ttl.cc rename to java/forstjni/ttl.cc index 1fe2083d9..4621c245f 100644 --- a/java/rocksjni/ttl.cc +++ b/java/forstjni/ttl.cc @@ -15,17 +15,17 @@ #include #include -#include "include/org_rocksdb_TtlDB.h" +#include "include/org_forstdb_TtlDB.h" #include "rocksdb/utilities/db_ttl.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_TtlDB + * Class: org_forstdb_TtlDB * Method: open * Signature: (JLjava/lang/String;IZ)J */ -jlong Java_org_rocksdb_TtlDB_open(JNIEnv* env, jclass, jlong joptions_handle, +jlong Java_org_forstdb_TtlDB_open(JNIEnv* env, jclass, jlong joptions_handle, jstring jdb_path, jint jttl, jboolean jread_only) { const char* db_path = env->GetStringUTFChars(jdb_path, nullptr); @@ -51,11 +51,11 @@ jlong Java_org_rocksdb_TtlDB_open(JNIEnv* env, jclass, jlong joptions_handle, } /* - * Class: org_rocksdb_TtlDB + * Class: org_forstdb_TtlDB * Method: openCF * Signature: (JLjava/lang/String;[[B[J[IZ)[J */ -jlongArray Java_org_rocksdb_TtlDB_openCF(JNIEnv* env, jclass, jlong jopt_handle, +jlongArray Java_org_forstdb_TtlDB_openCF(JNIEnv* env, jclass, jlong jopt_handle, jstring jdb_path, jobjectArray jcolumn_names, jlongArray jcolumn_options, @@ -150,22 +150,22 @@ jlongArray Java_org_rocksdb_TtlDB_openCF(JNIEnv* env, jclass, jlong jopt_handle, } /* - * Class: org_rocksdb_TtlDB + * Class: org_forstdb_TtlDB * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_TtlDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { +void Java_org_forstdb_TtlDB_disposeInternal(JNIEnv*, jobject, jlong jhandle) { auto* ttl_db = reinterpret_cast(jhandle); assert(ttl_db != nullptr); delete ttl_db; } /* - * Class: org_rocksdb_TtlDB + * Class: org_forstdb_TtlDB * Method: closeDatabase * Signature: (J)V */ -void Java_org_rocksdb_TtlDB_closeDatabase(JNIEnv* /* env */, jclass, +void Java_org_forstdb_TtlDB_closeDatabase(JNIEnv* /* env */, jclass, jlong /* jhandle */) { // auto* ttl_db = reinterpret_cast(jhandle); // assert(ttl_db != nullptr); @@ -177,11 +177,11 @@ void Java_org_rocksdb_TtlDB_closeDatabase(JNIEnv* /* env */, jclass, } /* - * Class: org_rocksdb_TtlDB + * Class: org_forstdb_TtlDB * Method: createColumnFamilyWithTtl * Signature: (JLorg/rocksdb/ColumnFamilyDescriptor;[BJI)J; */ -jlong Java_org_rocksdb_TtlDB_createColumnFamilyWithTtl(JNIEnv* env, jobject, +jlong Java_org_forstdb_TtlDB_createColumnFamilyWithTtl(JNIEnv* env, jobject, jlong jdb_handle, jbyteArray jcolumn_name, jlong jcolumn_options, diff --git a/java/rocksjni/wal_filter.cc b/java/forstjni/wal_filter.cc similarity index 71% rename from java/rocksjni/wal_filter.cc rename to java/forstjni/wal_filter.cc index 24b88afed..3fbd59a7a 100644 --- a/java/rocksjni/wal_filter.cc +++ b/java/forstjni/wal_filter.cc @@ -8,16 +8,16 @@ #include -#include "include/org_rocksdb_AbstractWalFilter.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/wal_filter_jnicallback.h" +#include "include/org_forstdb_AbstractWalFilter.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/wal_filter_jnicallback.h" /* - * Class: org_rocksdb_AbstractWalFilter + * Class: org_forstdb_AbstractWalFilter * Method: createNewWalFilter * Signature: ()J */ -jlong Java_org_rocksdb_AbstractWalFilter_createNewWalFilter(JNIEnv* env, +jlong Java_org_forstdb_AbstractWalFilter_createNewWalFilter(JNIEnv* env, jobject jobj) { auto* wal_filter = new ROCKSDB_NAMESPACE::WalFilterJniCallback(env, jobj); return GET_CPLUSPLUS_POINTER(wal_filter); diff --git a/java/rocksjni/wal_filter_jnicallback.cc b/java/forstjni/wal_filter_jnicallback.cc similarity index 97% rename from java/rocksjni/wal_filter_jnicallback.cc rename to java/forstjni/wal_filter_jnicallback.cc index d2e3c9076..aa5c2f31b 100644 --- a/java/rocksjni/wal_filter_jnicallback.cc +++ b/java/forstjni/wal_filter_jnicallback.cc @@ -6,10 +6,10 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::WalFilter. -#include "rocksjni/wal_filter_jnicallback.h" +#include "forstjni/wal_filter_jnicallback.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { WalFilterJniCallback::WalFilterJniCallback(JNIEnv* env, jobject jwal_filter) diff --git a/java/rocksjni/wal_filter_jnicallback.h b/java/forstjni/wal_filter_jnicallback.h similarity index 97% rename from java/rocksjni/wal_filter_jnicallback.h rename to java/forstjni/wal_filter_jnicallback.h index 5cdc65978..d933a2e8e 100644 --- a/java/rocksjni/wal_filter_jnicallback.h +++ b/java/forstjni/wal_filter_jnicallback.h @@ -16,7 +16,7 @@ #include #include "rocksdb/wal_filter.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { diff --git a/java/rocksjni/write_batch.cc b/java/forstjni/write_batch.cc similarity index 83% rename from java/rocksjni/write_batch.cc rename to java/forstjni/write_batch.cc index 6704e4a7e..d9dc5557a 100644 --- a/java/rocksjni/write_batch.cc +++ b/java/forstjni/write_batch.cc @@ -11,25 +11,25 @@ #include "db/memtable.h" #include "db/write_batch_internal.h" -#include "include/org_rocksdb_WriteBatch.h" -#include "include/org_rocksdb_WriteBatch_Handler.h" +#include "include/org_forstdb_WriteBatch.h" +#include "include/org_forstdb_WriteBatch_Handler.h" #include "logging/logging.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/status.h" #include "rocksdb/write_buffer_manager.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" -#include "rocksjni/writebatchhandlerjnicallback.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" +#include "forstjni/writebatchhandlerjnicallback.h" #include "table/scoped_arena_iterator.h" /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: newWriteBatch * Signature: (I)J */ -jlong Java_org_rocksdb_WriteBatch_newWriteBatch__I(JNIEnv* /*env*/, +jlong Java_org_forstdb_WriteBatch_newWriteBatch__I(JNIEnv* /*env*/, jclass /*jcls*/, jint jreserved_bytes) { auto* wb = @@ -38,11 +38,11 @@ jlong Java_org_rocksdb_WriteBatch_newWriteBatch__I(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: newWriteBatch * Signature: ([BI)J */ -jlong Java_org_rocksdb_WriteBatch_newWriteBatch___3BI(JNIEnv* env, +jlong Java_org_forstdb_WriteBatch_newWriteBatch___3BI(JNIEnv* env, jclass /*jcls*/, jbyteArray jserialized, jint jserialized_length) { @@ -61,11 +61,11 @@ jlong Java_org_rocksdb_WriteBatch_newWriteBatch___3BI(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: count0 * Signature: (J)I */ -jint Java_org_rocksdb_WriteBatch_count0(JNIEnv* /*env*/, jobject /*jobj*/, +jint Java_org_forstdb_WriteBatch_count0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -74,11 +74,11 @@ jint Java_org_rocksdb_WriteBatch_count0(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: clear0 * Signature: (J)V */ -void Java_org_rocksdb_WriteBatch_clear0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_WriteBatch_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -87,11 +87,11 @@ void Java_org_rocksdb_WriteBatch_clear0(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: setSavePoint0 * Signature: (J)V */ -void Java_org_rocksdb_WriteBatch_setSavePoint0(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatch_setSavePoint0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -101,11 +101,11 @@ void Java_org_rocksdb_WriteBatch_setSavePoint0(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: rollbackToSavePoint0 * Signature: (J)V */ -void Java_org_rocksdb_WriteBatch_rollbackToSavePoint0(JNIEnv* env, +void Java_org_forstdb_WriteBatch_rollbackToSavePoint0(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -120,11 +120,11 @@ void Java_org_rocksdb_WriteBatch_rollbackToSavePoint0(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: popSavePoint * Signature: (J)V */ -void Java_org_rocksdb_WriteBatch_popSavePoint(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_WriteBatch_popSavePoint(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -138,11 +138,11 @@ void Java_org_rocksdb_WriteBatch_popSavePoint(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: setMaxBytes * Signature: (JJ)V */ -void Java_org_rocksdb_WriteBatch_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_WriteBatch_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle, jlong jmax_bytes) { auto* wb = reinterpret_cast(jwb_handle); @@ -152,11 +152,11 @@ void Java_org_rocksdb_WriteBatch_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: put * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, @@ -176,11 +176,11 @@ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BI(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: put * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_WriteBatch_put__J_3BI_3BIJ( +void Java_org_forstdb_WriteBatch_put__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -201,11 +201,11 @@ void Java_org_rocksdb_WriteBatch_put__J_3BI_3BIJ( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_WriteBatch_putDirect(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_WriteBatch_putDirect(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jobject jval, jint jval_offset, @@ -227,11 +227,11 @@ void Java_org_rocksdb_WriteBatch_putDirect(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: merge * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BI( +void Java_org_forstdb_WriteBatch_merge__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wb = reinterpret_cast(jwb_handle); @@ -249,11 +249,11 @@ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BI( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: merge * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BIJ( +void Java_org_forstdb_WriteBatch_merge__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -274,11 +274,11 @@ void Java_org_rocksdb_WriteBatch_merge__J_3BI_3BIJ( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: delete * Signature: (J[BI)V */ -void Java_org_rocksdb_WriteBatch_delete__J_3BI(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatch_delete__J_3BI(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len) { auto* wb = reinterpret_cast(jwb_handle); @@ -292,11 +292,11 @@ void Java_org_rocksdb_WriteBatch_delete__J_3BI(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: delete * Signature: (J[BIJ)V */ -void Java_org_rocksdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { @@ -316,11 +316,11 @@ void Java_org_rocksdb_WriteBatch_delete__J_3BIJ(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: singleDelete * Signature: (J[BI)V */ -void Java_org_rocksdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len) { @@ -338,11 +338,11 @@ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BI(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: singleDelete * Signature: (J[BIJ)V */ -void Java_org_rocksdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jkey, jint jkey_len, @@ -364,11 +364,11 @@ void Java_org_rocksdb_WriteBatch_singleDelete__J_3BIJ(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: deleteDirect * Signature: (JLjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_WriteBatch_deleteDirect(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_WriteBatch_deleteDirect(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jlong jcf_handle) { @@ -388,11 +388,11 @@ void Java_org_rocksdb_WriteBatch_deleteDirect(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: deleteRange * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BI( +void Java_org_forstdb_WriteBatch_deleteRange__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len) { auto* wb = reinterpret_cast(jwb_handle); @@ -410,11 +410,11 @@ void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BI( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: deleteRange * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BIJ( +void Java_org_forstdb_WriteBatch_deleteRange__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len, jlong jcf_handle) { @@ -436,11 +436,11 @@ void Java_org_rocksdb_WriteBatch_deleteRange__J_3BI_3BIJ( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: putLogData * Signature: (J[BI)V */ -void Java_org_rocksdb_WriteBatch_putLogData(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatch_putLogData(JNIEnv* env, jobject jobj, jlong jwb_handle, jbyteArray jblob, jint jblob_len) { auto* wb = reinterpret_cast(jwb_handle); @@ -456,11 +456,11 @@ void Java_org_rocksdb_WriteBatch_putLogData(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: iterate * Signature: (JJ)V */ -void Java_org_rocksdb_WriteBatch_iterate(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_WriteBatch_iterate(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jlong handlerHandle) { auto* wb = reinterpret_cast(jwb_handle); @@ -477,11 +477,11 @@ void Java_org_rocksdb_WriteBatch_iterate(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: data * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_WriteBatch_data(JNIEnv* env, jobject /*jobj*/, +jbyteArray Java_org_forstdb_WriteBatch_data(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -491,11 +491,11 @@ jbyteArray Java_org_rocksdb_WriteBatch_data(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: getDataSize * Signature: (J)J */ -jlong Java_org_rocksdb_WriteBatch_getDataSize(JNIEnv* /*env*/, jobject /*jobj*/, +jlong Java_org_forstdb_WriteBatch_getDataSize(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -505,11 +505,11 @@ jlong Java_org_rocksdb_WriteBatch_getDataSize(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasPut * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteBatch_hasPut(JNIEnv* /*env*/, jobject /*jobj*/, +jboolean Java_org_forstdb_WriteBatch_hasPut(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -518,11 +518,11 @@ jboolean Java_org_rocksdb_WriteBatch_hasPut(JNIEnv* /*env*/, jobject /*jobj*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasDelete * Signature: (J)Z */ -jboolean Java_org_rocksdb_WriteBatch_hasDelete(JNIEnv* /*env*/, +jboolean Java_org_forstdb_WriteBatch_hasDelete(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -532,11 +532,11 @@ jboolean Java_org_rocksdb_WriteBatch_hasDelete(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasSingleDelete * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasSingleDelete( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasSingleDelete( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -545,11 +545,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasSingleDelete( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasDeleteRange * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasDeleteRange( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasDeleteRange( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -558,11 +558,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasDeleteRange( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasMerge * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasMerge( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasMerge( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -571,11 +571,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasMerge( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasBeginPrepare * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasBeginPrepare( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasBeginPrepare( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -584,11 +584,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasBeginPrepare( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasEndPrepare * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasEndPrepare( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasEndPrepare( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -597,11 +597,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasEndPrepare( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasCommit * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasCommit( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasCommit( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -610,11 +610,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasCommit( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: hasRollback * Signature: (J)Z */ -JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasRollback( +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasRollback( JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -623,11 +623,11 @@ JNIEXPORT jboolean JNICALL Java_org_rocksdb_WriteBatch_hasRollback( } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: markWalTerminationPoint * Signature: (J)V */ -void Java_org_rocksdb_WriteBatch_markWalTerminationPoint(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatch_markWalTerminationPoint(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -637,11 +637,11 @@ void Java_org_rocksdb_WriteBatch_markWalTerminationPoint(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: getWalTerminationPoint * Signature: (J)Lorg/rocksdb/WriteBatch/SavePoint; */ -jobject Java_org_rocksdb_WriteBatch_getWalTerminationPoint(JNIEnv* env, +jobject Java_org_forstdb_WriteBatch_getWalTerminationPoint(JNIEnv* env, jobject /*jobj*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -652,11 +652,11 @@ jobject Java_org_rocksdb_WriteBatch_getWalTerminationPoint(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatch + * Class: org_forstdb_WriteBatch * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_WriteBatch_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatch_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* wb = reinterpret_cast(handle); @@ -665,11 +665,11 @@ void Java_org_rocksdb_WriteBatch_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatch_Handler + * Class: org_forstdb_WriteBatch_Handler * Method: createNewHandler0 * Signature: ()J */ -jlong Java_org_rocksdb_WriteBatch_00024Handler_createNewHandler0(JNIEnv* env, +jlong Java_org_forstdb_WriteBatch_00024Handler_createNewHandler0(JNIEnv* env, jobject jobj) { auto* wbjnic = new ROCKSDB_NAMESPACE::WriteBatchHandlerJniCallback(env, jobj); return GET_CPLUSPLUS_POINTER(wbjnic); diff --git a/java/rocksjni/write_batch_test.cc b/java/forstjni/write_batch_test.cc similarity index 90% rename from java/rocksjni/write_batch_test.cc rename to java/forstjni/write_batch_test.cc index 30b9a7229..bf3669d0c 100644 --- a/java/rocksjni/write_batch_test.cc +++ b/java/forstjni/write_batch_test.cc @@ -11,27 +11,27 @@ #include "db/memtable.h" #include "db/write_batch_internal.h" -#include "include/org_rocksdb_WriteBatch.h" -#include "include/org_rocksdb_WriteBatchTest.h" -#include "include/org_rocksdb_WriteBatchTestInternalHelper.h" -#include "include/org_rocksdb_WriteBatch_Handler.h" +#include "include/org_forstdb_WriteBatch.h" +#include "include/org_forstdb_WriteBatchTest.h" +#include "include/org_forstdb_WriteBatchTestInternalHelper.h" +#include "include/org_forstdb_WriteBatch_Handler.h" #include "options/cf_options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/memtablerep.h" #include "rocksdb/status.h" #include "rocksdb/write_buffer_manager.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" #include "table/scoped_arena_iterator.h" #include "test_util/testharness.h" #include "util/string_util.h" /* - * Class: org_rocksdb_WriteBatchTest + * Class: org_forstdb_WriteBatchTest * Method: getContents * Signature: (J)[B */ -jbyteArray Java_org_rocksdb_WriteBatchTest_getContents(JNIEnv* env, +jbyteArray Java_org_forstdb_WriteBatchTest_getContents(JNIEnv* env, jclass /*jclazz*/, jlong jwb_handle) { auto* b = reinterpret_cast(jwb_handle); @@ -153,11 +153,11 @@ jbyteArray Java_org_rocksdb_WriteBatchTest_getContents(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatchTestInternalHelper + * Class: org_forstdb_WriteBatchTestInternalHelper * Method: setSequence * Signature: (JJ)V */ -void Java_org_rocksdb_WriteBatchTestInternalHelper_setSequence( +void Java_org_forstdb_WriteBatchTestInternalHelper_setSequence( JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle, jlong jsn) { auto* wb = reinterpret_cast(jwb_handle); assert(wb != nullptr); @@ -167,11 +167,11 @@ void Java_org_rocksdb_WriteBatchTestInternalHelper_setSequence( } /* - * Class: org_rocksdb_WriteBatchTestInternalHelper + * Class: org_forstdb_WriteBatchTestInternalHelper * Method: sequence * Signature: (J)J */ -jlong Java_org_rocksdb_WriteBatchTestInternalHelper_sequence(JNIEnv* /*env*/, +jlong Java_org_forstdb_WriteBatchTestInternalHelper_sequence(JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -182,11 +182,11 @@ jlong Java_org_rocksdb_WriteBatchTestInternalHelper_sequence(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchTestInternalHelper + * Class: org_forstdb_WriteBatchTestInternalHelper * Method: append * Signature: (JJ)V */ -void Java_org_rocksdb_WriteBatchTestInternalHelper_append(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatchTestInternalHelper_append(JNIEnv* /*env*/, jclass /*jclazz*/, jlong jwb_handle_1, jlong jwb_handle_2) { diff --git a/java/rocksjni/write_batch_with_index.cc b/java/forstjni/write_batch_with_index.cc similarity index 84% rename from java/rocksjni/write_batch_with_index.cc rename to java/forstjni/write_batch_with_index.cc index a5c3216cb..e4ed9a449 100644 --- a/java/rocksjni/write_batch_with_index.cc +++ b/java/forstjni/write_batch_with_index.cc @@ -8,29 +8,29 @@ #include "rocksdb/utilities/write_batch_with_index.h" -#include "include/org_rocksdb_WBWIRocksIterator.h" -#include "include/org_rocksdb_WriteBatchWithIndex.h" +#include "include/org_forstdb_WBWIRocksIterator.h" +#include "include/org_forstdb_WriteBatchWithIndex.h" #include "rocksdb/comparator.h" -#include "rocksjni/cplusplus_to_java_convert.h" -#include "rocksjni/portal.h" +#include "forstjni/cplusplus_to_java_convert.h" +#include "forstjni/portal.h" /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: newWriteBatchWithIndex * Signature: ()J */ -jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__( +jlong Java_org_forstdb_WriteBatchWithIndex_newWriteBatchWithIndex__( JNIEnv* /*env*/, jclass /*jcls*/) { auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex(); return GET_CPLUSPLUS_POINTER(wbwi); } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: newWriteBatchWithIndex * Signature: (Z)J */ -jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__Z( +jlong Java_org_forstdb_WriteBatchWithIndex_newWriteBatchWithIndex__Z( JNIEnv* /*env*/, jclass /*jcls*/, jboolean joverwrite_key) { auto* wbwi = new ROCKSDB_NAMESPACE::WriteBatchWithIndex( ROCKSDB_NAMESPACE::BytewiseComparator(), 0, @@ -39,11 +39,11 @@ jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__Z( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: newWriteBatchWithIndex * Signature: (JBIZ)J */ -jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__JBIZ( +jlong Java_org_forstdb_WriteBatchWithIndex_newWriteBatchWithIndex__JBIZ( JNIEnv* /*env*/, jclass /*jcls*/, jlong jfallback_index_comparator_handle, jbyte jcomparator_type, jint jreserved_bytes, jboolean joverwrite_key) { ROCKSDB_NAMESPACE::Comparator* fallback_comparator = nullptr; @@ -68,11 +68,11 @@ jlong Java_org_rocksdb_WriteBatchWithIndex_newWriteBatchWithIndex__JBIZ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: count0 * Signature: (J)I */ -jint Java_org_rocksdb_WriteBatchWithIndex_count0(JNIEnv* /*env*/, +jint Java_org_forstdb_WriteBatchWithIndex_count0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = @@ -83,11 +83,11 @@ jint Java_org_rocksdb_WriteBatchWithIndex_count0(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: put * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BI( +void Java_org_forstdb_WriteBatchWithIndex_put__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wbwi = @@ -106,11 +106,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BI( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: put * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BIJ( +void Java_org_forstdb_WriteBatchWithIndex_put__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { @@ -133,11 +133,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_put__J_3BI_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: putDirect * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_putDirect( +void Java_org_forstdb_WriteBatchWithIndex_putDirect( JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jobject jval, jint jval_offset, jint jval_len, jlong jcf_handle) { @@ -158,11 +158,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_putDirect( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: merge * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BI( +void Java_org_forstdb_WriteBatchWithIndex_merge__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len) { auto* wbwi = @@ -181,11 +181,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BI( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: merge * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BIJ( +void Java_org_forstdb_WriteBatchWithIndex_merge__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jbyteArray jentry_value, jint jentry_value_len, jlong jcf_handle) { @@ -208,11 +208,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_merge__J_3BI_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: delete * Signature: (J[BI)V */ -void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BI(JNIEnv* env, +void Java_org_forstdb_WriteBatchWithIndex_delete__J_3BI(JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, @@ -231,11 +231,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BI(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: delete * Signature: (J[BIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BIJ( +void Java_org_forstdb_WriteBatchWithIndex_delete__J_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = @@ -255,11 +255,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_delete__J_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: singleDelete * Signature: (J[BI)V */ -void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BI( +void Java_org_forstdb_WriteBatchWithIndex_singleDelete__J_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = @@ -277,11 +277,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BI( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: singleDelete * Signature: (J[BIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BIJ( +void Java_org_forstdb_WriteBatchWithIndex_singleDelete__J_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = @@ -302,11 +302,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_singleDelete__J_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: deleteDirect * Signature: (JLjava/nio/ByteBuffer;IIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_deleteDirect( +void Java_org_forstdb_WriteBatchWithIndex_deleteDirect( JNIEnv* env, jobject /*jobj*/, jlong jwb_handle, jobject jkey, jint jkey_offset, jint jkey_len, jlong jcf_handle) { auto* wb = reinterpret_cast(jwb_handle); @@ -325,11 +325,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_deleteDirect( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: deleteRange * Signature: (J[BI[BI)V */ -void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BI( +void Java_org_forstdb_WriteBatchWithIndex_deleteRange__J_3BI_3BI( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len) { auto* wbwi = @@ -348,11 +348,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BI( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: deleteRange * Signature: (J[BI[BIJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BIJ( +void Java_org_forstdb_WriteBatchWithIndex_deleteRange__J_3BI_3BIJ( JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jbegin_key, jint jbegin_key_len, jbyteArray jend_key, jint jend_key_len, jlong jcf_handle) { @@ -375,11 +375,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_deleteRange__J_3BI_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: putLogData * Signature: (J[BI)V */ -void Java_org_rocksdb_WriteBatchWithIndex_putLogData(JNIEnv* env, jobject jobj, +void Java_org_forstdb_WriteBatchWithIndex_putLogData(JNIEnv* env, jobject jobj, jlong jwbwi_handle, jbyteArray jblob, jint jblob_len) { @@ -397,11 +397,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_putLogData(JNIEnv* env, jobject jobj, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: clear * Signature: (J)V */ -void Java_org_rocksdb_WriteBatchWithIndex_clear0(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatchWithIndex_clear0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = @@ -412,11 +412,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_clear0(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: setSavePoint0 * Signature: (J)V */ -void Java_org_rocksdb_WriteBatchWithIndex_setSavePoint0(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatchWithIndex_setSavePoint0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = @@ -427,11 +427,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_setSavePoint0(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: rollbackToSavePoint0 * Signature: (J)V */ -void Java_org_rocksdb_WriteBatchWithIndex_rollbackToSavePoint0( +void Java_org_forstdb_WriteBatchWithIndex_rollbackToSavePoint0( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = reinterpret_cast(jwbwi_handle); @@ -447,11 +447,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_rollbackToSavePoint0( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: popSavePoint * Signature: (J)V */ -void Java_org_rocksdb_WriteBatchWithIndex_popSavePoint(JNIEnv* env, +void Java_org_forstdb_WriteBatchWithIndex_popSavePoint(JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = @@ -468,11 +468,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_popSavePoint(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: setMaxBytes * Signature: (JJ)V */ -void Java_org_rocksdb_WriteBatchWithIndex_setMaxBytes(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatchWithIndex_setMaxBytes(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle, jlong jmax_bytes) { @@ -484,11 +484,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_setMaxBytes(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: getWriteBatch * Signature: (J)Lorg/rocksdb/WriteBatch; */ -jobject Java_org_rocksdb_WriteBatchWithIndex_getWriteBatch(JNIEnv* env, +jobject Java_org_forstdb_WriteBatchWithIndex_getWriteBatch(JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = @@ -502,11 +502,11 @@ jobject Java_org_rocksdb_WriteBatchWithIndex_getWriteBatch(JNIEnv* env, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: iterator0 * Signature: (J)J */ -jlong Java_org_rocksdb_WriteBatchWithIndex_iterator0(JNIEnv* /*env*/, +jlong Java_org_forstdb_WriteBatchWithIndex_iterator0(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle) { auto* wbwi = @@ -516,11 +516,11 @@ jlong Java_org_rocksdb_WriteBatchWithIndex_iterator0(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: iterator1 * Signature: (JJ)J */ -jlong Java_org_rocksdb_WriteBatchWithIndex_iterator1(JNIEnv* /*env*/, +jlong Java_org_forstdb_WriteBatchWithIndex_iterator1(JNIEnv* /*env*/, jobject /*jobj*/, jlong jwbwi_handle, jlong jcf_handle) { @@ -533,11 +533,11 @@ jlong Java_org_rocksdb_WriteBatchWithIndex_iterator1(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: iteratorWithBase * Signature: (JJJJ)J */ -jlong Java_org_rocksdb_WriteBatchWithIndex_iteratorWithBase( +jlong Java_org_forstdb_WriteBatchWithIndex_iteratorWithBase( JNIEnv*, jobject, jlong jwbwi_handle, jlong jcf_handle, jlong jbase_iterator_handle, jlong jread_opts_handle) { auto* wbwi = @@ -557,11 +557,11 @@ jlong Java_org_rocksdb_WriteBatchWithIndex_iteratorWithBase( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: getFromBatch * Signature: (JJ[BI)[B */ -jbyteArray JNICALL Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BI( +jbyteArray JNICALL Java_org_forstdb_WriteBatchWithIndex_getFromBatch__JJ_3BI( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdbopt_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = @@ -577,11 +577,11 @@ jbyteArray JNICALL Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BI( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: getFromBatch * Signature: (JJ[BIJ)[B */ -jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BIJ( +jbyteArray Java_org_forstdb_WriteBatchWithIndex_getFromBatch__JJ_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdbopt_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = @@ -599,11 +599,11 @@ jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatch__JJ_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: getFromBatchAndDB * Signature: (JJJ[BI)[B */ -jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BI( +jbyteArray Java_org_forstdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BI( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdb_handle, jlong jreadopt_handle, jbyteArray jkey, jint jkey_len) { auto* wbwi = @@ -621,11 +621,11 @@ jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BI( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: getFromBatchAndDB * Signature: (JJJ[BIJ)[B */ -jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BIJ( +jbyteArray Java_org_forstdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BIJ( JNIEnv* env, jobject /*jobj*/, jlong jwbwi_handle, jlong jdb_handle, jlong jreadopt_handle, jbyteArray jkey, jint jkey_len, jlong jcf_handle) { auto* wbwi = @@ -645,11 +645,11 @@ jbyteArray Java_org_rocksdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BIJ( } /* - * Class: org_rocksdb_WriteBatchWithIndex + * Class: org_forstdb_WriteBatchWithIndex * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_WriteBatchWithIndex_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBatchWithIndex_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* wbwi = @@ -661,11 +661,11 @@ void Java_org_rocksdb_WriteBatchWithIndex_disposeInternal(JNIEnv* /*env*/, /* WBWIRocksIterator below */ /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_WBWIRocksIterator_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -674,64 +674,64 @@ void Java_org_rocksdb_WBWIRocksIterator_disposeInternal(JNIEnv* /*env*/, } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: isValid0 * Signature: (J)Z */ -jboolean Java_org_rocksdb_WBWIRocksIterator_isValid0(JNIEnv* /*env*/, +jboolean Java_org_forstdb_WBWIRocksIterator_isValid0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { return reinterpret_cast(handle)->Valid(); } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekToFirst0 * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekToFirst0(JNIEnv* /*env*/, +void Java_org_forstdb_WBWIRocksIterator_seekToFirst0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToFirst(); } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekToLast0 * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekToLast0(JNIEnv* /*env*/, +void Java_org_forstdb_WBWIRocksIterator_seekToLast0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->SeekToLast(); } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: next0 * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_WBWIRocksIterator_next0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Next(); } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: prev0 * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, +void Java_org_forstdb_WBWIRocksIterator_prev0(JNIEnv* /*env*/, jobject /*jobj*/, jlong handle) { reinterpret_cast(handle)->Prev(); } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seek0 * Signature: (J[BI)V */ -void Java_org_rocksdb_WBWIRocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_WBWIRocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -752,11 +752,11 @@ void Java_org_rocksdb_WBWIRocksIterator_seek0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekDirect0( +void Java_org_forstdb_WBWIRocksIterator_seekDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -771,11 +771,11 @@ void Java_org_rocksdb_WBWIRocksIterator_seekDirect0( * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekByteArray0 * Signature: (J[BII)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekByteArray0( +void Java_org_forstdb_WBWIRocksIterator_seekByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { const std::unique_ptr target(new char[jtarget_len]); @@ -795,11 +795,11 @@ void Java_org_rocksdb_WBWIRocksIterator_seekByteArray0( } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekForPrev0 * Signature: (J[BI)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekForPrev0(JNIEnv* env, +void Java_org_forstdb_WBWIRocksIterator_seekForPrev0(JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, @@ -822,11 +822,11 @@ void Java_org_rocksdb_WBWIRocksIterator_seekForPrev0(JNIEnv* env, } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekForPrevDirect0 * Signature: (JLjava/nio/ByteBuffer;II)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekForPrevDirect0( +void Java_org_forstdb_WBWIRocksIterator_seekForPrevDirect0( JNIEnv* env, jobject /*jobj*/, jlong handle, jobject jtarget, jint jtarget_off, jint jtarget_len) { auto* it = reinterpret_cast(handle); @@ -841,11 +841,11 @@ void Java_org_rocksdb_WBWIRocksIterator_seekForPrevDirect0( * This method supports fetching into indirect byte buffers; * the Java wrapper extracts the byte[] and passes it here. * - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: seekForPrevByteArray0 * Signature: (J[BII)V */ -void Java_org_rocksdb_WBWIRocksIterator_seekForPrevByteArray0( +void Java_org_forstdb_WBWIRocksIterator_seekForPrevByteArray0( JNIEnv* env, jobject /*jobj*/, jlong handle, jbyteArray jtarget, jint jtarget_off, jint jtarget_len) { const std::unique_ptr target(new char[jtarget_len]); @@ -865,11 +865,11 @@ void Java_org_rocksdb_WBWIRocksIterator_seekForPrevByteArray0( } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: status0 * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_status0(JNIEnv* env, jobject /*jobj*/, +void Java_org_forstdb_WBWIRocksIterator_status0(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); ROCKSDB_NAMESPACE::Status s = it->status(); @@ -882,11 +882,11 @@ void Java_org_rocksdb_WBWIRocksIterator_status0(JNIEnv* env, jobject /*jobj*/, } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: entry1 * Signature: (J)[J */ -jlongArray Java_org_rocksdb_WBWIRocksIterator_entry1(JNIEnv* env, +jlongArray Java_org_forstdb_WBWIRocksIterator_entry1(JNIEnv* env, jobject /*jobj*/, jlong handle) { auto* it = reinterpret_cast(handle); @@ -942,11 +942,11 @@ jlongArray Java_org_rocksdb_WBWIRocksIterator_entry1(JNIEnv* env, } /* - * Class: org_rocksdb_WBWIRocksIterator + * Class: org_forstdb_WBWIRocksIterator * Method: refresh0 * Signature: (J)V */ -void Java_org_rocksdb_WBWIRocksIterator_refresh0(JNIEnv* env) { +void Java_org_forstdb_WBWIRocksIterator_refresh0(JNIEnv* env) { ROCKSDB_NAMESPACE::Status s = ROCKSDB_NAMESPACE::Status::NotSupported("Refresh() is not supported"); ROCKSDB_NAMESPACE::RocksDBExceptionJni::ThrowNew(env, s); diff --git a/java/rocksjni/write_buffer_manager.cc b/java/forstjni/write_buffer_manager.cc similarity index 81% rename from java/rocksjni/write_buffer_manager.cc rename to java/forstjni/write_buffer_manager.cc index 9ce697e10..114d3a64b 100644 --- a/java/rocksjni/write_buffer_manager.cc +++ b/java/forstjni/write_buffer_manager.cc @@ -9,16 +9,16 @@ #include -#include "include/org_rocksdb_WriteBufferManager.h" +#include "include/org_forstdb_WriteBufferManager.h" #include "rocksdb/cache.h" -#include "rocksjni/cplusplus_to_java_convert.h" +#include "forstjni/cplusplus_to_java_convert.h" /* - * Class: org_rocksdb_WriteBufferManager + * Class: org_forstdb_WriteBufferManager * Method: newWriteBufferManager * Signature: (JJ)J */ -jlong Java_org_rocksdb_WriteBufferManager_newWriteBufferManager( +jlong Java_org_forstdb_WriteBufferManager_newWriteBufferManager( JNIEnv* /*env*/, jclass /*jclazz*/, jlong jbuffer_size, jlong jcache_handle, jboolean allow_stall) { auto* cache_ptr = @@ -32,11 +32,11 @@ jlong Java_org_rocksdb_WriteBufferManager_newWriteBufferManager( } /* - * Class: org_rocksdb_WriteBufferManager + * Class: org_forstdb_WriteBufferManager * Method: disposeInternal * Signature: (J)V */ -void Java_org_rocksdb_WriteBufferManager_disposeInternal(JNIEnv* /*env*/, +void Java_org_forstdb_WriteBufferManager_disposeInternal(JNIEnv* /*env*/, jobject /*jobj*/, jlong jhandle) { auto* write_buffer_manager = diff --git a/java/rocksjni/writebatchhandlerjnicallback.cc b/java/forstjni/writebatchhandlerjnicallback.cc similarity index 99% rename from java/rocksjni/writebatchhandlerjnicallback.cc rename to java/forstjni/writebatchhandlerjnicallback.cc index 66ceabe9a..04e97f8bd 100644 --- a/java/rocksjni/writebatchhandlerjnicallback.cc +++ b/java/forstjni/writebatchhandlerjnicallback.cc @@ -6,9 +6,9 @@ // This file implements the callback "bridge" between Java and C++ for // ROCKSDB_NAMESPACE::Comparator. -#include "rocksjni/writebatchhandlerjnicallback.h" +#include "forstjni/writebatchhandlerjnicallback.h" -#include "rocksjni/portal.h" +#include "forstjni/portal.h" namespace ROCKSDB_NAMESPACE { WriteBatchHandlerJniCallback::WriteBatchHandlerJniCallback( diff --git a/java/rocksjni/writebatchhandlerjnicallback.h b/java/forstjni/writebatchhandlerjnicallback.h similarity index 99% rename from java/rocksjni/writebatchhandlerjnicallback.h rename to java/forstjni/writebatchhandlerjnicallback.h index 9629797ca..b71935ad3 100644 --- a/java/rocksjni/writebatchhandlerjnicallback.h +++ b/java/forstjni/writebatchhandlerjnicallback.h @@ -15,7 +15,7 @@ #include #include "rocksdb/write_batch.h" -#include "rocksjni/jnicallback.h" +#include "forstjni/jnicallback.h" namespace ROCKSDB_NAMESPACE { /** diff --git a/java/include/org_forstdb_AbstractCompactionFilter.h b/java/include/org_forstdb_AbstractCompactionFilter.h new file mode 100644 index 000000000..65ae517f7 --- /dev/null +++ b/java/include/org_forstdb_AbstractCompactionFilter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractCompactionFilter */ + +#ifndef _Included_org_forstdb_AbstractCompactionFilter +#define _Included_org_forstdb_AbstractCompactionFilter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractCompactionFilter + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_AbstractCompactionFilter_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractCompactionFilterFactory.h b/java/include/org_forstdb_AbstractCompactionFilterFactory.h new file mode 100644 index 000000000..1884a297d --- /dev/null +++ b/java/include/org_forstdb_AbstractCompactionFilterFactory.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractCompactionFilterFactory */ + +#ifndef _Included_org_forstdb_AbstractCompactionFilterFactory +#define _Included_org_forstdb_AbstractCompactionFilterFactory +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractCompactionFilterFactory + * Method: createNewCompactionFilterFactory0 + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractCompactionFilterFactory_createNewCompactionFilterFactory0 + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_AbstractCompactionFilterFactory + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_AbstractCompactionFilterFactory_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractComparator.h b/java/include/org_forstdb_AbstractComparator.h new file mode 100644 index 000000000..d476fdbe7 --- /dev/null +++ b/java/include/org_forstdb_AbstractComparator.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractComparator */ + +#ifndef _Included_org_forstdb_AbstractComparator +#define _Included_org_forstdb_AbstractComparator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractComparator + * Method: usingDirectBuffers + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_AbstractComparator_usingDirectBuffers + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_AbstractComparator + * Method: createNewComparator + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractComparator_createNewComparator + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractEventListener.h b/java/include/org_forstdb_AbstractEventListener.h new file mode 100644 index 000000000..e04648a8e --- /dev/null +++ b/java/include/org_forstdb_AbstractEventListener.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractEventListener */ + +#ifndef _Included_org_forstdb_AbstractEventListener +#define _Included_org_forstdb_AbstractEventListener +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractEventListener + * Method: createNewEventListener + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractEventListener_createNewEventListener + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_AbstractEventListener + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_AbstractEventListener_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractSlice.h b/java/include/org_forstdb_AbstractSlice.h new file mode 100644 index 000000000..2121b1fe3 --- /dev/null +++ b/java/include/org_forstdb_AbstractSlice.h @@ -0,0 +1,69 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractSlice */ + +#ifndef _Included_org_forstdb_AbstractSlice +#define _Included_org_forstdb_AbstractSlice +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractSlice + * Method: createNewSliceFromString + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractSlice_createNewSliceFromString + (JNIEnv *, jclass, jstring); + +/* + * Class: org_forstdb_AbstractSlice + * Method: size0 + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_AbstractSlice_size0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_AbstractSlice + * Method: empty0 + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_AbstractSlice_empty0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_AbstractSlice + * Method: toString0 + * Signature: (JZ)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_AbstractSlice_toString0 + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_AbstractSlice + * Method: compare0 + * Signature: (JJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_AbstractSlice_compare0 + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_AbstractSlice + * Method: startsWith0 + * Signature: (JJ)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_AbstractSlice_startsWith0 + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_AbstractSlice + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_AbstractSlice_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractTableFilter.h b/java/include/org_forstdb_AbstractTableFilter.h new file mode 100644 index 000000000..35fa3f360 --- /dev/null +++ b/java/include/org_forstdb_AbstractTableFilter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractTableFilter */ + +#ifndef _Included_org_forstdb_AbstractTableFilter +#define _Included_org_forstdb_AbstractTableFilter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractTableFilter + * Method: createNewTableFilter + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractTableFilter_createNewTableFilter + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractTraceWriter.h b/java/include/org_forstdb_AbstractTraceWriter.h new file mode 100644 index 000000000..820d6fe0d --- /dev/null +++ b/java/include/org_forstdb_AbstractTraceWriter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractTraceWriter */ + +#ifndef _Included_org_forstdb_AbstractTraceWriter +#define _Included_org_forstdb_AbstractTraceWriter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractTraceWriter + * Method: createNewTraceWriter + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractTraceWriter_createNewTraceWriter + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractTransactionNotifier.h b/java/include/org_forstdb_AbstractTransactionNotifier.h new file mode 100644 index 000000000..b43bad529 --- /dev/null +++ b/java/include/org_forstdb_AbstractTransactionNotifier.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractTransactionNotifier */ + +#ifndef _Included_org_forstdb_AbstractTransactionNotifier +#define _Included_org_forstdb_AbstractTransactionNotifier +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractTransactionNotifier + * Method: createNewTransactionNotifier + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractTransactionNotifier_createNewTransactionNotifier + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_AbstractTransactionNotifier + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_AbstractTransactionNotifier_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_AbstractWalFilter.h b/java/include/org_forstdb_AbstractWalFilter.h new file mode 100644 index 000000000..ff7094403 --- /dev/null +++ b/java/include/org_forstdb_AbstractWalFilter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_AbstractWalFilter */ + +#ifndef _Included_org_forstdb_AbstractWalFilter +#define _Included_org_forstdb_AbstractWalFilter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_AbstractWalFilter + * Method: createNewWalFilter + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_AbstractWalFilter_createNewWalFilter + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_BackupEngine.h b/java/include/org_forstdb_BackupEngine.h new file mode 100644 index 000000000..a88572dd1 --- /dev/null +++ b/java/include/org_forstdb_BackupEngine.h @@ -0,0 +1,101 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_BackupEngine */ + +#ifndef _Included_org_forstdb_BackupEngine +#define _Included_org_forstdb_BackupEngine +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_BackupEngine + * Method: open + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BackupEngine_open + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngine + * Method: createNewBackup + * Signature: (JJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_createNewBackup + (JNIEnv *, jobject, jlong, jlong, jboolean); + +/* + * Class: org_forstdb_BackupEngine + * Method: createNewBackupWithMetadata + * Signature: (JJLjava/lang/String;Z)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_createNewBackupWithMetadata + (JNIEnv *, jobject, jlong, jlong, jstring, jboolean); + +/* + * Class: org_forstdb_BackupEngine + * Method: getBackupInfo + * Signature: (J)Ljava/util/List; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_BackupEngine_getBackupInfo + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngine + * Method: getCorruptedBackups + * Signature: (J)[I + */ +JNIEXPORT jintArray JNICALL Java_org_forstdb_BackupEngine_getCorruptedBackups + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngine + * Method: garbageCollect + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_garbageCollect + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngine + * Method: purgeOldBackups + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_purgeOldBackups + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_BackupEngine + * Method: deleteBackup + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_deleteBackup + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_BackupEngine + * Method: restoreDbFromBackup + * Signature: (JILjava/lang/String;Ljava/lang/String;J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_restoreDbFromBackup + (JNIEnv *, jobject, jlong, jint, jstring, jstring, jlong); + +/* + * Class: org_forstdb_BackupEngine + * Method: restoreDbFromLatestBackup + * Signature: (JLjava/lang/String;Ljava/lang/String;J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_restoreDbFromLatestBackup + (JNIEnv *, jobject, jlong, jstring, jstring, jlong); + +/* + * Class: org_forstdb_BackupEngine + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngine_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_BackupEngineOptions.h b/java/include/org_forstdb_BackupEngineOptions.h new file mode 100644 index 000000000..2368d6f56 --- /dev/null +++ b/java/include/org_forstdb_BackupEngineOptions.h @@ -0,0 +1,213 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_BackupEngineOptions */ + +#ifndef _Included_org_forstdb_BackupEngineOptions +#define _Included_org_forstdb_BackupEngineOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_BackupEngineOptions + * Method: newBackupEngineOptions + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BackupEngineOptions_newBackupEngineOptions + (JNIEnv *, jclass, jstring); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: backupDir + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_BackupEngineOptions_backupDir + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setBackupEnv + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setBackupEnv + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setShareTableFiles + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setShareTableFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: shareTableFiles + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_BackupEngineOptions_shareTableFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setInfoLog + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setInfoLog + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setSync + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setSync + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: sync + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_BackupEngineOptions_sync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setDestroyOldData + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setDestroyOldData + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: destroyOldData + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_BackupEngineOptions_destroyOldData + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setBackupLogFiles + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setBackupLogFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: backupLogFiles + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_BackupEngineOptions_backupLogFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setBackupRateLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setBackupRateLimit + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: backupRateLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BackupEngineOptions_backupRateLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setBackupRateLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setBackupRateLimiter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setRestoreRateLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setRestoreRateLimit + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: restoreRateLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BackupEngineOptions_restoreRateLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setRestoreRateLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setRestoreRateLimiter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setShareFilesWithChecksum + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setShareFilesWithChecksum + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: shareFilesWithChecksum + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_BackupEngineOptions_shareFilesWithChecksum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setMaxBackgroundOperations + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setMaxBackgroundOperations + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: maxBackgroundOperations + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_BackupEngineOptions_maxBackgroundOperations + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: setCallbackTriggerIntervalSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_setCallbackTriggerIntervalSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: callbackTriggerIntervalSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BackupEngineOptions_callbackTriggerIntervalSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_BackupEngineOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_BackupEngineOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_BlockBasedTableConfig.h b/java/include/org_forstdb_BlockBasedTableConfig.h new file mode 100644 index 000000000..b83bdf655 --- /dev/null +++ b/java/include/org_forstdb_BlockBasedTableConfig.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_BlockBasedTableConfig */ + +#ifndef _Included_org_forstdb_BlockBasedTableConfig +#define _Included_org_forstdb_BlockBasedTableConfig +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_BlockBasedTableConfig + * Method: newTableFactoryHandle + * Signature: (ZZZZBBDBZJJJIIIJZZZJZZIIZZBJI)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BlockBasedTableConfig_newTableFactoryHandle + (JNIEnv *, jobject, jboolean, jboolean, jboolean, jboolean, jbyte, jbyte, jdouble, jbyte, jboolean, jlong, jlong, jlong, jint, jint, jint, jlong, jboolean, jboolean, jboolean, jlong, jboolean, jboolean, jint, jint, jboolean, jboolean, jbyte, jlong, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_BloomFilter.h b/java/include/org_forstdb_BloomFilter.h new file mode 100644 index 000000000..95d43d194 --- /dev/null +++ b/java/include/org_forstdb_BloomFilter.h @@ -0,0 +1,23 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_BloomFilter */ + +#ifndef _Included_org_forstdb_BloomFilter +#define _Included_org_forstdb_BloomFilter +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_BloomFilter_DEFAULT_BITS_PER_KEY +#define org_forstdb_BloomFilter_DEFAULT_BITS_PER_KEY 10.0 +/* + * Class: org_forstdb_BloomFilter + * Method: createNewBloomFilter + * Signature: (D)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_BloomFilter_createNewBloomFilter + (JNIEnv *, jclass, jdouble); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Cache.h b/java/include/org_forstdb_Cache.h new file mode 100644 index 000000000..219d121ad --- /dev/null +++ b/java/include/org_forstdb_Cache.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Cache */ + +#ifndef _Included_org_forstdb_Cache +#define _Included_org_forstdb_Cache +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Cache + * Method: getUsage + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Cache_getUsage + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Cache + * Method: getPinnedUsage + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Cache_getPinnedUsage + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CassandraCompactionFilter.h b/java/include/org_forstdb_CassandraCompactionFilter.h new file mode 100644 index 000000000..76c66b9e7 --- /dev/null +++ b/java/include/org_forstdb_CassandraCompactionFilter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CassandraCompactionFilter */ + +#ifndef _Included_org_forstdb_CassandraCompactionFilter +#define _Included_org_forstdb_CassandraCompactionFilter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CassandraCompactionFilter + * Method: createNewCassandraCompactionFilter0 + * Signature: (ZI)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CassandraCompactionFilter_createNewCassandraCompactionFilter0 + (JNIEnv *, jclass, jboolean, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CassandraValueMergeOperator.h b/java/include/org_forstdb_CassandraValueMergeOperator.h new file mode 100644 index 000000000..a467d52cc --- /dev/null +++ b/java/include/org_forstdb_CassandraValueMergeOperator.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CassandraValueMergeOperator */ + +#ifndef _Included_org_forstdb_CassandraValueMergeOperator +#define _Included_org_forstdb_CassandraValueMergeOperator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CassandraValueMergeOperator + * Method: newSharedCassandraValueMergeOperator + * Signature: (II)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CassandraValueMergeOperator_newSharedCassandraValueMergeOperator + (JNIEnv *, jclass, jint, jint); + +/* + * Class: org_forstdb_CassandraValueMergeOperator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CassandraValueMergeOperator_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Checkpoint.h b/java/include/org_forstdb_Checkpoint.h new file mode 100644 index 000000000..59021737c --- /dev/null +++ b/java/include/org_forstdb_Checkpoint.h @@ -0,0 +1,45 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Checkpoint */ + +#ifndef _Included_org_forstdb_Checkpoint +#define _Included_org_forstdb_Checkpoint +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Checkpoint + * Method: newCheckpoint + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Checkpoint_newCheckpoint + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Checkpoint + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Checkpoint_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Checkpoint + * Method: createCheckpoint + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Checkpoint_createCheckpoint + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_Checkpoint + * Method: exportColumnFamily + * Signature: (JJLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Checkpoint_exportColumnFamily + (JNIEnv *, jobject, jlong, jlong, jstring); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ClockCache.h b/java/include/org_forstdb_ClockCache.h new file mode 100644 index 000000000..24533d053 --- /dev/null +++ b/java/include/org_forstdb_ClockCache.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ClockCache */ + +#ifndef _Included_org_forstdb_ClockCache +#define _Included_org_forstdb_ClockCache +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ClockCache + * Method: newClockCache + * Signature: (JIZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ClockCache_newClockCache + (JNIEnv *, jclass, jlong, jint, jboolean); + +/* + * Class: org_forstdb_ClockCache + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ClockCache_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ColumnFamilyHandle.h b/java/include/org_forstdb_ColumnFamilyHandle.h new file mode 100644 index 000000000..d14687dbe --- /dev/null +++ b/java/include/org_forstdb_ColumnFamilyHandle.h @@ -0,0 +1,45 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ColumnFamilyHandle */ + +#ifndef _Included_org_forstdb_ColumnFamilyHandle +#define _Included_org_forstdb_ColumnFamilyHandle +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ColumnFamilyHandle + * Method: getName + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_ColumnFamilyHandle_getName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyHandle + * Method: getID + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyHandle_getID + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyHandle + * Method: getDescriptor + * Signature: (J)Lorg/forstdb/ColumnFamilyDescriptor; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_ColumnFamilyHandle_getDescriptor + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyHandle + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyHandle_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ColumnFamilyOptions.h b/java/include/org_forstdb_ColumnFamilyOptions.h new file mode 100644 index 000000000..0e4e7c3e2 --- /dev/null +++ b/java/include/org_forstdb_ColumnFamilyOptions.h @@ -0,0 +1,1141 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ColumnFamilyOptions */ + +#ifndef _Included_org_forstdb_ColumnFamilyOptions +#define _Included_org_forstdb_ColumnFamilyOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: getColumnFamilyOptionsFromProps + * Signature: (JLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__JLjava_lang_String_2 + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: getColumnFamilyOptionsFromProps + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_getColumnFamilyOptionsFromProps__Ljava_lang_String_2 + (JNIEnv *, jclass, jstring); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: newColumnFamilyOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_newColumnFamilyOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: copyColumnFamilyOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_copyColumnFamilyOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: newColumnFamilyOptionsFromOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_newColumnFamilyOptionsFromOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: oldDefaults + * Signature: (JII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_oldDefaults + (JNIEnv *, jclass, jlong, jint, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: optimizeForSmallDb + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_optimizeForSmallDb__J + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: optimizeForSmallDb + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_optimizeForSmallDb__JJ + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: optimizeForPointLookup + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_optimizeForPointLookup + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: optimizeLevelStyleCompaction + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_optimizeLevelStyleCompaction + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: optimizeUniversalStyleCompaction + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_optimizeUniversalStyleCompaction + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setComparatorHandle + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setComparatorHandle__JI + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setComparatorHandle + * Signature: (JJB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setComparatorHandle__JJB + (JNIEnv *, jobject, jlong, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMergeOperatorName + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMergeOperatorName + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMergeOperator + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMergeOperator + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionFilterHandle + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionFilterHandle + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionFilterFactoryHandle + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionFilterFactoryHandle + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setWriteBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setWriteBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: writeBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_writeBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxWriteBufferNumber + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxWriteBufferNumber + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxWriteBufferNumber + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_maxWriteBufferNumber + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMinWriteBufferNumberToMerge + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMinWriteBufferNumberToMerge + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: minWriteBufferNumberToMerge + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_minWriteBufferNumberToMerge + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompressionType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompressionType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: compressionType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ColumnFamilyOptions_compressionType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompressionPerLevel + * Signature: (J[B)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompressionPerLevel + (JNIEnv *, jobject, jlong, jbyteArray); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: compressionPerLevel + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_ColumnFamilyOptions_compressionPerLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBottommostCompressionType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBottommostCompressionType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: bottommostCompressionType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ColumnFamilyOptions_bottommostCompressionType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBottommostCompressionOptions + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBottommostCompressionOptions + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompressionOptions + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompressionOptions + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: useFixedLengthPrefixExtractor + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_useFixedLengthPrefixExtractor + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: useCappedPrefixExtractor + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_useCappedPrefixExtractor + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setNumLevels + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setNumLevels + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: numLevels + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_numLevels + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevelZeroFileNumCompactionTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevelZeroFileNumCompactionTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: levelZeroFileNumCompactionTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_levelZeroFileNumCompactionTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevelZeroSlowdownWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevelZeroSlowdownWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: levelZeroSlowdownWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_levelZeroSlowdownWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevelZeroStopWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevelZeroStopWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: levelZeroStopWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_levelZeroStopWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setTargetFileSizeBase + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setTargetFileSizeBase + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: targetFileSizeBase + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_targetFileSizeBase + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setTargetFileSizeMultiplier + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setTargetFileSizeMultiplier + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: targetFileSizeMultiplier + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_targetFileSizeMultiplier + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxBytesForLevelBase + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxBytesForLevelBase + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxBytesForLevelBase + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_maxBytesForLevelBase + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevelCompactionDynamicLevelBytes + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevelCompactionDynamicLevelBytes + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: levelCompactionDynamicLevelBytes + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_levelCompactionDynamicLevelBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxBytesForLevelMultiplier + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplier + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxBytesForLevelMultiplier + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_ColumnFamilyOptions_maxBytesForLevelMultiplier + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxCompactionBytes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxCompactionBytes + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxCompactionBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_maxCompactionBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setArenaBlockSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setArenaBlockSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: arenaBlockSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_arenaBlockSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setDisableAutoCompactions + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setDisableAutoCompactions + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: disableAutoCompactions + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_disableAutoCompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionStyle + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionStyle + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: compactionStyle + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ColumnFamilyOptions_compactionStyle + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxTableFilesSizeFIFO + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxTableFilesSizeFIFO + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxTableFilesSizeFIFO + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_maxTableFilesSizeFIFO + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxSequentialSkipInIterations + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxSequentialSkipInIterations + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxSequentialSkipInIterations + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_maxSequentialSkipInIterations + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMemTableFactory + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMemTableFactory + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: memTableFactoryName + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ColumnFamilyOptions_memTableFactoryName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setTableFactory + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setTableFactory + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: tableFactoryName + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ColumnFamilyOptions_tableFactoryName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCfPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCfPaths + (JNIEnv *, jclass, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: cfPathsLen + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_cfPathsLen + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: cfPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_cfPaths + (JNIEnv *, jclass, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setInplaceUpdateSupport + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setInplaceUpdateSupport + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: inplaceUpdateSupport + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_inplaceUpdateSupport + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setInplaceUpdateNumLocks + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setInplaceUpdateNumLocks + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: inplaceUpdateNumLocks + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_inplaceUpdateNumLocks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMemtablePrefixBloomSizeRatio + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMemtablePrefixBloomSizeRatio + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: memtablePrefixBloomSizeRatio + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_ColumnFamilyOptions_memtablePrefixBloomSizeRatio + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setExperimentalMempurgeThreshold + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setExperimentalMempurgeThreshold + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: experimentalMempurgeThreshold + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_ColumnFamilyOptions_experimentalMempurgeThreshold + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMemtableWholeKeyFiltering + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMemtableWholeKeyFiltering + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: memtableWholeKeyFiltering + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_memtableWholeKeyFiltering + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBloomLocality + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBloomLocality + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: bloomLocality + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_bloomLocality + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxSuccessiveMerges + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxSuccessiveMerges + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxSuccessiveMerges + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_maxSuccessiveMerges + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setOptimizeFiltersForHits + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setOptimizeFiltersForHits + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: optimizeFiltersForHits + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_optimizeFiltersForHits + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMemtableHugePageSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMemtableHugePageSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: memtableHugePageSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_memtableHugePageSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setSoftPendingCompactionBytesLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setSoftPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: softPendingCompactionBytesLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_softPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setHardPendingCompactionBytesLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setHardPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: hardPendingCompactionBytesLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_hardPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevel0FileNumCompactionTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevel0FileNumCompactionTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: level0FileNumCompactionTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_level0FileNumCompactionTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevel0SlowdownWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevel0SlowdownWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: level0SlowdownWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_level0SlowdownWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setLevel0StopWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setLevel0StopWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: level0StopWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_level0StopWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxBytesForLevelMultiplierAdditional + * Signature: (J[I)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxBytesForLevelMultiplierAdditional + (JNIEnv *, jobject, jlong, jintArray); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxBytesForLevelMultiplierAdditional + * Signature: (J)[I + */ +JNIEXPORT jintArray JNICALL Java_org_forstdb_ColumnFamilyOptions_maxBytesForLevelMultiplierAdditional + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setParanoidFileChecks + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setParanoidFileChecks + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: paranoidFileChecks + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_paranoidFileChecks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMaxWriteBufferNumberToMaintain + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMaxWriteBufferNumberToMaintain + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: maxWriteBufferNumberToMaintain + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_maxWriteBufferNumberToMaintain + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionPriority + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionPriority + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: compactionPriority + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ColumnFamilyOptions_compactionPriority + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setReportBgIoStats + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setReportBgIoStats + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: reportBgIoStats + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_reportBgIoStats + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setTtl + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setTtl + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: ttl + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_ttl + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setPeriodicCompactionSeconds + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setPeriodicCompactionSeconds + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: periodicCompactionSeconds + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_periodicCompactionSeconds + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionOptionsUniversal + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionOptionsUniversal + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionOptionsFIFO + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionOptionsFIFO + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setForceConsistencyChecks + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setForceConsistencyChecks + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: forceConsistencyChecks + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_forceConsistencyChecks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setSstPartitionerFactory + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setSstPartitionerFactory + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setCompactionThreadLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setCompactionThreadLimiter + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMemtableMaxRangeDeletions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMemtableMaxRangeDeletions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: memtableMaxRangeDeletions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_memtableMaxRangeDeletions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setEnableBlobFiles + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setEnableBlobFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: enableBlobFiles + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_enableBlobFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setMinBlobSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setMinBlobSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: minBlobSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_minBlobSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBlobFileSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBlobFileSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: blobFileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_blobFileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBlobCompressionType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBlobCompressionType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: blobCompressionType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ColumnFamilyOptions_blobCompressionType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setEnableBlobGarbageCollection + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setEnableBlobGarbageCollection + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: enableBlobGarbageCollection + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ColumnFamilyOptions_enableBlobGarbageCollection + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBlobGarbageCollectionAgeCutoff + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBlobGarbageCollectionAgeCutoff + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: blobGarbageCollectionAgeCutoff + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_ColumnFamilyOptions_blobGarbageCollectionAgeCutoff + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBlobGarbageCollectionForceThreshold + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBlobGarbageCollectionForceThreshold + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: blobGarbageCollectionForceThreshold + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_ColumnFamilyOptions_blobGarbageCollectionForceThreshold + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBlobCompactionReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBlobCompactionReadaheadSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: blobCompactionReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ColumnFamilyOptions_blobCompactionReadaheadSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setBlobFileStartingLevel + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setBlobFileStartingLevel + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: blobFileStartingLevel + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ColumnFamilyOptions_blobFileStartingLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: setPrepopulateBlobCache + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ColumnFamilyOptions_setPrepopulateBlobCache + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ColumnFamilyOptions + * Method: prepopulateBlobCache + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ColumnFamilyOptions_prepopulateBlobCache + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompactRangeOptions.h b/java/include/org_forstdb_CompactRangeOptions.h new file mode 100644 index 000000000..40b48a147 --- /dev/null +++ b/java/include/org_forstdb_CompactRangeOptions.h @@ -0,0 +1,181 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompactRangeOptions */ + +#ifndef _Included_org_forstdb_CompactRangeOptions +#define _Included_org_forstdb_CompactRangeOptions +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_CompactRangeOptions_VALUE_kSkip +#define org_forstdb_CompactRangeOptions_VALUE_kSkip 0L +#undef org_forstdb_CompactRangeOptions_VALUE_kIfHaveCompactionFilter +#define org_forstdb_CompactRangeOptions_VALUE_kIfHaveCompactionFilter 1L +#undef org_forstdb_CompactRangeOptions_VALUE_kForce +#define org_forstdb_CompactRangeOptions_VALUE_kForce 2L +#undef org_forstdb_CompactRangeOptions_VALUE_kForceOptimized +#define org_forstdb_CompactRangeOptions_VALUE_kForceOptimized 3L +/* + * Class: org_forstdb_CompactRangeOptions + * Method: newCompactRangeOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactRangeOptions_newCompactRangeOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: exclusiveManualCompaction + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactRangeOptions_exclusiveManualCompaction + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setExclusiveManualCompaction + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setExclusiveManualCompaction + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: changeLevel + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactRangeOptions_changeLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setChangeLevel + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setChangeLevel + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: targetLevel + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactRangeOptions_targetLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setTargetLevel + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setTargetLevel + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: targetPathId + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactRangeOptions_targetPathId + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setTargetPathId + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setTargetPathId + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: bottommostLevelCompaction + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactRangeOptions_bottommostLevelCompaction + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setBottommostLevelCompaction + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setBottommostLevelCompaction + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: allowWriteStall + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactRangeOptions_allowWriteStall + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setAllowWriteStall + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setAllowWriteStall + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setMaxSubcompactions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setMaxSubcompactions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: maxSubcompactions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactRangeOptions_maxSubcompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setFullHistoryTSLow + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setFullHistoryTSLow + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: fullHistoryTSLow + * Signature: (J)Lorg/forstdb/CompactRangeOptions/Timestamp; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_CompactRangeOptions_fullHistoryTSLow + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: setCanceled + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactRangeOptions_setCanceled + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompactRangeOptions + * Method: canceled + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactRangeOptions_canceled + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompactionJobInfo.h b/java/include/org_forstdb_CompactionJobInfo.h new file mode 100644 index 000000000..35122098e --- /dev/null +++ b/java/include/org_forstdb_CompactionJobInfo.h @@ -0,0 +1,125 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompactionJobInfo */ + +#ifndef _Included_org_forstdb_CompactionJobInfo +#define _Included_org_forstdb_CompactionJobInfo +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CompactionJobInfo + * Method: newCompactionJobInfo + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobInfo_newCompactionJobInfo + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionJobInfo_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: columnFamilyName + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_CompactionJobInfo_columnFamilyName + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: status + * Signature: (J)Lorg/forstdb/Status; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_CompactionJobInfo_status + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: threadId + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobInfo_threadId + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: jobId + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionJobInfo_jobId + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: baseInputLevel + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionJobInfo_baseInputLevel + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: outputLevel + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionJobInfo_outputLevel + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: inputFiles + * Signature: (J)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_CompactionJobInfo_inputFiles + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: outputFiles + * Signature: (J)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_CompactionJobInfo_outputFiles + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: tableProperties + * Signature: (J)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_CompactionJobInfo_tableProperties + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: compactionReason + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_CompactionJobInfo_compactionReason + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: compression + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_CompactionJobInfo_compression + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobInfo + * Method: stats + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobInfo_stats + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompactionJobStats.h b/java/include/org_forstdb_CompactionJobStats.h new file mode 100644 index 000000000..5bdb2ec33 --- /dev/null +++ b/java/include/org_forstdb_CompactionJobStats.h @@ -0,0 +1,229 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompactionJobStats */ + +#ifndef _Included_org_forstdb_CompactionJobStats +#define _Included_org_forstdb_CompactionJobStats +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CompactionJobStats + * Method: newCompactionJobStats + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_newCompactionJobStats + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionJobStats_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: reset + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionJobStats_reset + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: add + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionJobStats_add + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: elapsedMicros + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_elapsedMicros + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numInputRecords + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numInputRecords + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numInputFiles + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numInputFiles + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numInputFilesAtOutputLevel + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numInputFilesAtOutputLevel + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numOutputRecords + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numOutputRecords + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numOutputFiles + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numOutputFiles + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: isManualCompaction + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactionJobStats_isManualCompaction + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: totalInputBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_totalInputBytes + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: totalOutputBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_totalOutputBytes + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numRecordsReplaced + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numRecordsReplaced + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: totalInputRawKeyBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_totalInputRawKeyBytes + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: totalInputRawValueBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_totalInputRawValueBytes + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numInputDeletionRecords + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numInputDeletionRecords + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numExpiredDeletionRecords + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numExpiredDeletionRecords + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numCorruptKeys + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numCorruptKeys + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: fileWriteNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_fileWriteNanos + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: fileRangeSyncNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_fileRangeSyncNanos + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: fileFsyncNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_fileFsyncNanos + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: filePrepareWriteNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_filePrepareWriteNanos + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: smallestOutputKeyPrefix + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_CompactionJobStats_smallestOutputKeyPrefix + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: largestOutputKeyPrefix + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_CompactionJobStats_largestOutputKeyPrefix + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numSingleDelFallthru + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numSingleDelFallthru + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionJobStats + * Method: numSingleDelMismatch + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionJobStats_numSingleDelMismatch + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompactionOptions.h b/java/include/org_forstdb_CompactionOptions.h new file mode 100644 index 000000000..9de502251 --- /dev/null +++ b/java/include/org_forstdb_CompactionOptions.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompactionOptions */ + +#ifndef _Included_org_forstdb_CompactionOptions +#define _Included_org_forstdb_CompactionOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CompactionOptions + * Method: newCompactionOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionOptions_newCompactionOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompactionOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptions + * Method: compression + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_CompactionOptions_compression + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionOptions + * Method: setCompression + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptions_setCompression + (JNIEnv *, jclass, jlong, jbyte); + +/* + * Class: org_forstdb_CompactionOptions + * Method: outputFileSizeLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionOptions_outputFileSizeLimit + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionOptions + * Method: setOutputFileSizeLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptions_setOutputFileSizeLimit + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_CompactionOptions + * Method: maxSubcompactions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionOptions_maxSubcompactions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_CompactionOptions + * Method: setMaxSubcompactions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptions_setMaxSubcompactions + (JNIEnv *, jclass, jlong, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompactionOptionsFIFO.h b/java/include/org_forstdb_CompactionOptionsFIFO.h new file mode 100644 index 000000000..aed1c4b69 --- /dev/null +++ b/java/include/org_forstdb_CompactionOptionsFIFO.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompactionOptionsFIFO */ + +#ifndef _Included_org_forstdb_CompactionOptionsFIFO +#define _Included_org_forstdb_CompactionOptionsFIFO +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CompactionOptionsFIFO + * Method: newCompactionOptionsFIFO + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionOptionsFIFO_newCompactionOptionsFIFO + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompactionOptionsFIFO + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsFIFO_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsFIFO + * Method: setMaxTableFilesSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsFIFO_setMaxTableFilesSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_CompactionOptionsFIFO + * Method: maxTableFilesSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionOptionsFIFO_maxTableFilesSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsFIFO + * Method: setAllowCompaction + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsFIFO_setAllowCompaction + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompactionOptionsFIFO + * Method: allowCompaction + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactionOptionsFIFO_allowCompaction + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompactionOptionsUniversal.h b/java/include/org_forstdb_CompactionOptionsUniversal.h new file mode 100644 index 000000000..606032f24 --- /dev/null +++ b/java/include/org_forstdb_CompactionOptionsUniversal.h @@ -0,0 +1,141 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompactionOptionsUniversal */ + +#ifndef _Included_org_forstdb_CompactionOptionsUniversal +#define _Included_org_forstdb_CompactionOptionsUniversal +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: newCompactionOptionsUniversal + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompactionOptionsUniversal_newCompactionOptionsUniversal + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setSizeRatio + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setSizeRatio + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: sizeRatio + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionOptionsUniversal_sizeRatio + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setMinMergeWidth + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setMinMergeWidth + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: minMergeWidth + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionOptionsUniversal_minMergeWidth + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setMaxMergeWidth + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setMaxMergeWidth + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: maxMergeWidth + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionOptionsUniversal_maxMergeWidth + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setMaxSizeAmplificationPercent + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setMaxSizeAmplificationPercent + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: maxSizeAmplificationPercent + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionOptionsUniversal_maxSizeAmplificationPercent + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setCompressionSizePercent + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setCompressionSizePercent + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: compressionSizePercent + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompactionOptionsUniversal_compressionSizePercent + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setStopStyle + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setStopStyle + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: stopStyle + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_CompactionOptionsUniversal_stopStyle + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: setAllowTrivialMove + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompactionOptionsUniversal_setAllowTrivialMove + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompactionOptionsUniversal + * Method: allowTrivialMove + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompactionOptionsUniversal_allowTrivialMove + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ComparatorOptions.h b/java/include/org_forstdb_ComparatorOptions.h new file mode 100644 index 000000000..68c0846ea --- /dev/null +++ b/java/include/org_forstdb_ComparatorOptions.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ComparatorOptions */ + +#ifndef _Included_org_forstdb_ComparatorOptions +#define _Included_org_forstdb_ComparatorOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ComparatorOptions + * Method: newComparatorOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ComparatorOptions_newComparatorOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: reusedSynchronisationType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ComparatorOptions_reusedSynchronisationType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: setReusedSynchronisationType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ComparatorOptions_setReusedSynchronisationType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: useDirectBuffer + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ComparatorOptions_useDirectBuffer + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: setUseDirectBuffer + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ComparatorOptions_setUseDirectBuffer + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: maxReusedBufferSize + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ComparatorOptions_maxReusedBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: setMaxReusedBufferSize + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ComparatorOptions_setMaxReusedBufferSize + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_ComparatorOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ComparatorOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_CompressionOptions.h b/java/include/org_forstdb_CompressionOptions.h new file mode 100644 index 000000000..b5d7fc79b --- /dev/null +++ b/java/include/org_forstdb_CompressionOptions.h @@ -0,0 +1,125 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_CompressionOptions */ + +#ifndef _Included_org_forstdb_CompressionOptions +#define _Included_org_forstdb_CompressionOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_CompressionOptions + * Method: newCompressionOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_CompressionOptions_newCompressionOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_CompressionOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompressionOptions + * Method: setWindowBits + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_setWindowBits + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompressionOptions + * Method: windowBits + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompressionOptions_windowBits + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompressionOptions + * Method: setLevel + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_setLevel + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompressionOptions + * Method: level + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompressionOptions_level + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompressionOptions + * Method: setStrategy + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_setStrategy + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompressionOptions + * Method: strategy + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompressionOptions_strategy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompressionOptions + * Method: setMaxDictBytes + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_setMaxDictBytes + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompressionOptions + * Method: maxDictBytes + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompressionOptions_maxDictBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompressionOptions + * Method: setZstdMaxTrainBytes + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_setZstdMaxTrainBytes + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_CompressionOptions + * Method: zstdMaxTrainBytes + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_CompressionOptions_zstdMaxTrainBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_CompressionOptions + * Method: setEnabled + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_CompressionOptions_setEnabled + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_CompressionOptions + * Method: enabled + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_CompressionOptions_enabled + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ConcurrentTaskLimiterImpl.h b/java/include/org_forstdb_ConcurrentTaskLimiterImpl.h new file mode 100644 index 000000000..e8ae61f40 --- /dev/null +++ b/java/include/org_forstdb_ConcurrentTaskLimiterImpl.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ConcurrentTaskLimiterImpl */ + +#ifndef _Included_org_forstdb_ConcurrentTaskLimiterImpl +#define _Included_org_forstdb_ConcurrentTaskLimiterImpl +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ConcurrentTaskLimiterImpl + * Method: newConcurrentTaskLimiterImpl0 + * Signature: (Ljava/lang/String;I)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ConcurrentTaskLimiterImpl_newConcurrentTaskLimiterImpl0 + (JNIEnv *, jclass, jstring, jint); + +/* + * Class: org_forstdb_ConcurrentTaskLimiterImpl + * Method: name + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ConcurrentTaskLimiterImpl_name + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ConcurrentTaskLimiterImpl + * Method: setMaxOutstandingTask + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConcurrentTaskLimiterImpl_setMaxOutstandingTask + (JNIEnv *, jclass, jlong, jint); + +/* + * Class: org_forstdb_ConcurrentTaskLimiterImpl + * Method: resetMaxOutstandingTask + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConcurrentTaskLimiterImpl_resetMaxOutstandingTask + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ConcurrentTaskLimiterImpl + * Method: outstandingTask + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_ConcurrentTaskLimiterImpl_outstandingTask + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ConcurrentTaskLimiterImpl + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConcurrentTaskLimiterImpl_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ConfigOptions.h b/java/include/org_forstdb_ConfigOptions.h new file mode 100644 index 000000000..cd3afd215 --- /dev/null +++ b/java/include/org_forstdb_ConfigOptions.h @@ -0,0 +1,69 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ConfigOptions */ + +#ifndef _Included_org_forstdb_ConfigOptions +#define _Included_org_forstdb_ConfigOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ConfigOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConfigOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ConfigOptions + * Method: newConfigOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ConfigOptions_newConfigOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_ConfigOptions + * Method: setEnv + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConfigOptions_setEnv + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_ConfigOptions + * Method: setDelimiter + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConfigOptions_setDelimiter + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_forstdb_ConfigOptions + * Method: setIgnoreUnknownOptions + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConfigOptions_setIgnoreUnknownOptions + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_ConfigOptions + * Method: setInputStringsEscaped + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConfigOptions_setInputStringsEscaped + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_ConfigOptions + * Method: setSanityLevel + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ConfigOptions_setSanityLevel + (JNIEnv *, jclass, jlong, jbyte); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_DBOptions.h b/java/include/org_forstdb_DBOptions.h new file mode 100644 index 000000000..1392c0c3d --- /dev/null +++ b/java/include/org_forstdb_DBOptions.h @@ -0,0 +1,1343 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_DBOptions */ + +#ifndef _Included_org_forstdb_DBOptions +#define _Included_org_forstdb_DBOptions +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_DBOptions_DEFAULT_NUM_SHARD_BITS +#define org_forstdb_DBOptions_DEFAULT_NUM_SHARD_BITS -1L +/* + * Class: org_forstdb_DBOptions + * Method: getDBOptionsFromProps + * Signature: (JLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_getDBOptionsFromProps__JLjava_lang_String_2 + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_forstdb_DBOptions + * Method: getDBOptionsFromProps + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_getDBOptionsFromProps__Ljava_lang_String_2 + (JNIEnv *, jclass, jstring); + +/* + * Class: org_forstdb_DBOptions + * Method: newDBOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_newDBOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_DBOptions + * Method: copyDBOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_copyDBOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: newDBOptionsFromOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_newDBOptionsFromOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: optimizeForSmallDb + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_optimizeForSmallDb + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setIncreaseParallelism + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setIncreaseParallelism + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: setCreateIfMissing + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setCreateIfMissing + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: createIfMissing + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_createIfMissing + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setCreateMissingColumnFamilies + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setCreateMissingColumnFamilies + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: createMissingColumnFamilies + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_createMissingColumnFamilies + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setEnv + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setEnv + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setErrorIfExists + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setErrorIfExists + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: errorIfExists + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_errorIfExists + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setParanoidChecks + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setParanoidChecks + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: paranoidChecks + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_paranoidChecks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setRateLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setRateLimiter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setSstFileManager + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setSstFileManager + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setLogger + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setLogger + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setInfoLogLevel + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setInfoLogLevel + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_DBOptions + * Method: infoLogLevel + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_DBOptions_infoLogLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxOpenFiles + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxOpenFiles + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxOpenFiles + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxOpenFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxFileOpeningThreads + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxFileOpeningThreads + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxFileOpeningThreads + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxFileOpeningThreads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxTotalWalSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxTotalWalSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: maxTotalWalSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_maxTotalWalSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setStatistics + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setStatistics + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: statistics + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_statistics + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: useFsync + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_useFsync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setUseFsync + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setUseFsync + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: setDbPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setDbPaths + (JNIEnv *, jobject, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_DBOptions + * Method: dbPathsLen + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_dbPathsLen + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: dbPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_dbPaths + (JNIEnv *, jobject, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_DBOptions + * Method: setDbLogDir + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setDbLogDir + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_DBOptions + * Method: dbLogDir + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_DBOptions_dbLogDir + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWalDir + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWalDir + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_DBOptions + * Method: walDir + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_DBOptions_walDir + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setDeleteObsoleteFilesPeriodMicros + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setDeleteObsoleteFilesPeriodMicros + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: deleteObsoleteFilesPeriodMicros + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_deleteObsoleteFilesPeriodMicros + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxBackgroundCompactions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxBackgroundCompactions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxBackgroundCompactions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxBackgroundCompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxSubcompactions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxSubcompactions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxSubcompactions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxSubcompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxBackgroundFlushes + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxBackgroundFlushes + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxBackgroundFlushes + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxBackgroundFlushes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxBackgroundJobs + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxBackgroundJobs + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxBackgroundJobs + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxBackgroundJobs + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxLogFileSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxLogFileSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: maxLogFileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_maxLogFileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setLogFileTimeToRoll + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setLogFileTimeToRoll + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: logFileTimeToRoll + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_logFileTimeToRoll + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setKeepLogFileNum + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setKeepLogFileNum + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: keepLogFileNum + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_keepLogFileNum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setRecycleLogFileNum + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setRecycleLogFileNum + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: recycleLogFileNum + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_recycleLogFileNum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxManifestFileSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxManifestFileSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: maxManifestFileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_maxManifestFileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setTableCacheNumshardbits + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setTableCacheNumshardbits + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: tableCacheNumshardbits + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_tableCacheNumshardbits + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWalTtlSeconds + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWalTtlSeconds + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: walTtlSeconds + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_walTtlSeconds + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWalSizeLimitMB + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWalSizeLimitMB + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: walSizeLimitMB + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_walSizeLimitMB + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxWriteBatchGroupSizeBytes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxWriteBatchGroupSizeBytes + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: maxWriteBatchGroupSizeBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_maxWriteBatchGroupSizeBytes + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setManifestPreallocationSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setManifestPreallocationSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: manifestPreallocationSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_manifestPreallocationSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setUseDirectReads + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setUseDirectReads + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: useDirectReads + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_useDirectReads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setUseDirectIoForFlushAndCompaction + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setUseDirectIoForFlushAndCompaction + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: useDirectIoForFlushAndCompaction + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_useDirectIoForFlushAndCompaction + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAllowFAllocate + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAllowFAllocate + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: allowFAllocate + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_allowFAllocate + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAllowMmapReads + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAllowMmapReads + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: allowMmapReads + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_allowMmapReads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAllowMmapWrites + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAllowMmapWrites + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: allowMmapWrites + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_allowMmapWrites + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setIsFdCloseOnExec + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setIsFdCloseOnExec + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: isFdCloseOnExec + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_isFdCloseOnExec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setStatsDumpPeriodSec + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setStatsDumpPeriodSec + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: statsDumpPeriodSec + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_statsDumpPeriodSec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setStatsPersistPeriodSec + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setStatsPersistPeriodSec + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: statsPersistPeriodSec + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_statsPersistPeriodSec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setStatsHistoryBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setStatsHistoryBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: statsHistoryBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_statsHistoryBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAdviseRandomOnOpen + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAdviseRandomOnOpen + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: adviseRandomOnOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_adviseRandomOnOpen + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setDbWriteBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setDbWriteBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWriteBufferManager + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWriteBufferManager + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: dbWriteBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_dbWriteBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAccessHintOnCompactionStart + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAccessHintOnCompactionStart + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_DBOptions + * Method: accessHintOnCompactionStart + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_DBOptions_accessHintOnCompactionStart + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setCompactionReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setCompactionReadaheadSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: compactionReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_compactionReadaheadSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setRandomAccessMaxBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setRandomAccessMaxBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: randomAccessMaxBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_randomAccessMaxBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWritableFileMaxBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWritableFileMaxBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: writableFileMaxBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_writableFileMaxBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setUseAdaptiveMutex + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setUseAdaptiveMutex + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: useAdaptiveMutex + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_useAdaptiveMutex + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setBytesPerSync + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setBytesPerSync + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: bytesPerSync + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_bytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWalBytesPerSync + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWalBytesPerSync + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: walBytesPerSync + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_walBytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setStrictBytesPerSync + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setStrictBytesPerSync + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: strictBytesPerSync + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_strictBytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setEventListeners + * Signature: (J[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setEventListeners + (JNIEnv *, jclass, jlong, jlongArray); + +/* + * Class: org_forstdb_DBOptions + * Method: eventListeners + * Signature: (J)[Lorg/forstdb/AbstractEventListener; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_DBOptions_eventListeners + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setEnableThreadTracking + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setEnableThreadTracking + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: enableThreadTracking + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_enableThreadTracking + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setDelayedWriteRate + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setDelayedWriteRate + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: delayedWriteRate + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_delayedWriteRate + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setEnablePipelinedWrite + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setEnablePipelinedWrite + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: enablePipelinedWrite + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_enablePipelinedWrite + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setUnorderedWrite + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setUnorderedWrite + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: unorderedWrite + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_unorderedWrite + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAllowConcurrentMemtableWrite + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAllowConcurrentMemtableWrite + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: allowConcurrentMemtableWrite + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_allowConcurrentMemtableWrite + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setEnableWriteThreadAdaptiveYield + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setEnableWriteThreadAdaptiveYield + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: enableWriteThreadAdaptiveYield + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_enableWriteThreadAdaptiveYield + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWriteThreadMaxYieldUsec + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWriteThreadMaxYieldUsec + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: writeThreadMaxYieldUsec + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_writeThreadMaxYieldUsec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWriteThreadSlowYieldUsec + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWriteThreadSlowYieldUsec + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: writeThreadSlowYieldUsec + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_writeThreadSlowYieldUsec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setSkipStatsUpdateOnDbOpen + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setSkipStatsUpdateOnDbOpen + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: skipStatsUpdateOnDbOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_skipStatsUpdateOnDbOpen + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setSkipCheckingSstFileSizesOnDbOpen + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setSkipCheckingSstFileSizesOnDbOpen + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: skipCheckingSstFileSizesOnDbOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_skipCheckingSstFileSizesOnDbOpen + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWalRecoveryMode + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWalRecoveryMode + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_DBOptions + * Method: walRecoveryMode + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_DBOptions_walRecoveryMode + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAllow2pc + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAllow2pc + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: allow2pc + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_allow2pc + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setRowCache + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setRowCache + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWalFilter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWalFilter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setFailIfOptionsFileError + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setFailIfOptionsFileError + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: failIfOptionsFileError + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_failIfOptionsFileError + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setDumpMallocStats + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setDumpMallocStats + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: dumpMallocStats + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_dumpMallocStats + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAvoidFlushDuringRecovery + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAvoidFlushDuringRecovery + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: avoidFlushDuringRecovery + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_avoidFlushDuringRecovery + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAvoidFlushDuringShutdown + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAvoidFlushDuringShutdown + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: avoidFlushDuringShutdown + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_avoidFlushDuringShutdown + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAllowIngestBehind + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAllowIngestBehind + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: allowIngestBehind + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_allowIngestBehind + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setTwoWriteQueues + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setTwoWriteQueues + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: twoWriteQueues + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_twoWriteQueues + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setManualWalFlush + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setManualWalFlush + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: manualWalFlush + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_manualWalFlush + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAtomicFlush + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAtomicFlush + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: atomicFlush + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_atomicFlush + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setAvoidUnnecessaryBlockingIO + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setAvoidUnnecessaryBlockingIO + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: avoidUnnecessaryBlockingIO + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_avoidUnnecessaryBlockingIO + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setPersistStatsToDisk + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setPersistStatsToDisk + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: persistStatsToDisk + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_persistStatsToDisk + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setWriteDbidToManifest + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setWriteDbidToManifest + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: writeDbidToManifest + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_writeDbidToManifest + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setLogReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setLogReadaheadSize + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: logReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_logReadaheadSize + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setBestEffortsRecovery + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setBestEffortsRecovery + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_DBOptions + * Method: bestEffortsRecovery + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_DBOptions_bestEffortsRecovery + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setMaxBgErrorResumeCount + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setMaxBgErrorResumeCount + (JNIEnv *, jclass, jlong, jint); + +/* + * Class: org_forstdb_DBOptions + * Method: maxBgerrorResumeCount + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_DBOptions_maxBgerrorResumeCount + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: setBgerrorResumeRetryInterval + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DBOptions_setBgerrorResumeRetryInterval + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_DBOptions + * Method: bgerrorResumeRetryInterval + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DBOptions_bgerrorResumeRetryInterval + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_DirectSlice.h b/java/include/org_forstdb_DirectSlice.h new file mode 100644 index 000000000..ea809dcb9 --- /dev/null +++ b/java/include/org_forstdb_DirectSlice.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_DirectSlice */ + +#ifndef _Included_org_forstdb_DirectSlice +#define _Included_org_forstdb_DirectSlice +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_DirectSlice + * Method: createNewDirectSlice0 + * Signature: (Ljava/nio/ByteBuffer;I)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DirectSlice_createNewDirectSlice0 + (JNIEnv *, jclass, jobject, jint); + +/* + * Class: org_forstdb_DirectSlice + * Method: createNewDirectSlice1 + * Signature: (Ljava/nio/ByteBuffer;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_DirectSlice_createNewDirectSlice1 + (JNIEnv *, jclass, jobject); + +/* + * Class: org_forstdb_DirectSlice + * Method: data0 + * Signature: (J)Ljava/nio/ByteBuffer; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_DirectSlice_data0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_DirectSlice + * Method: get0 + * Signature: (JI)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_DirectSlice_get0 + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DirectSlice + * Method: clear0 + * Signature: (JZJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DirectSlice_clear0 + (JNIEnv *, jobject, jlong, jboolean, jlong); + +/* + * Class: org_forstdb_DirectSlice + * Method: removePrefix0 + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DirectSlice_removePrefix0 + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DirectSlice + * Method: setLength0 + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DirectSlice_setLength0 + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_DirectSlice + * Method: disposeInternalBuf + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_DirectSlice_disposeInternalBuf + (JNIEnv *, jobject, jlong, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Env.h b/java/include/org_forstdb_Env.h new file mode 100644 index 000000000..8b9a95d66 --- /dev/null +++ b/java/include/org_forstdb_Env.h @@ -0,0 +1,77 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Env */ + +#ifndef _Included_org_forstdb_Env +#define _Included_org_forstdb_Env +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Env + * Method: getDefaultEnvInternal + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Env_getDefaultEnvInternal + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_Env + * Method: setBackgroundThreads + * Signature: (JIB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Env_setBackgroundThreads + (JNIEnv *, jobject, jlong, jint, jbyte); + +/* + * Class: org_forstdb_Env + * Method: getBackgroundThreads + * Signature: (JB)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Env_getBackgroundThreads + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Env + * Method: getThreadPoolQueueLen + * Signature: (JB)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Env_getThreadPoolQueueLen + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Env + * Method: incBackgroundThreadsIfNeeded + * Signature: (JIB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Env_incBackgroundThreadsIfNeeded + (JNIEnv *, jobject, jlong, jint, jbyte); + +/* + * Class: org_forstdb_Env + * Method: lowerThreadPoolIOPriority + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Env_lowerThreadPoolIOPriority + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Env + * Method: lowerThreadPoolCPUPriority + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Env_lowerThreadPoolCPUPriority + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Env + * Method: getThreadList + * Signature: (J)[Lorg/forstdb/ThreadStatus; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_Env_getThreadList + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_EnvFlinkTestSuite.h b/java/include/org_forstdb_EnvFlinkTestSuite.h new file mode 100644 index 000000000..1a880fa27 --- /dev/null +++ b/java/include/org_forstdb_EnvFlinkTestSuite.h @@ -0,0 +1,37 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_EnvFlinkTestSuite */ + +#ifndef _Included_org_forstdb_EnvFlinkTestSuite +#define _Included_org_forstdb_EnvFlinkTestSuite +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_EnvFlinkTestSuite + * Method: buildNativeObject + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvFlinkTestSuite_buildNativeObject + (JNIEnv *, jobject, jstring); + +/* + * Class: org_forstdb_EnvFlinkTestSuite + * Method: runAllTestSuites + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvFlinkTestSuite_runAllTestSuites + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvFlinkTestSuite + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvFlinkTestSuite_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_EnvOptions.h b/java/include/org_forstdb_EnvOptions.h new file mode 100644 index 000000000..39795651a --- /dev/null +++ b/java/include/org_forstdb_EnvOptions.h @@ -0,0 +1,221 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_EnvOptions */ + +#ifndef _Included_org_forstdb_EnvOptions +#define _Included_org_forstdb_EnvOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_EnvOptions + * Method: newEnvOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvOptions_newEnvOptions__ + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_EnvOptions + * Method: newEnvOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvOptions_newEnvOptions__J + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setUseMmapReads + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setUseMmapReads + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: useMmapReads + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_useMmapReads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setUseMmapWrites + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setUseMmapWrites + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: useMmapWrites + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_useMmapWrites + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setUseDirectReads + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setUseDirectReads + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: useDirectReads + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_useDirectReads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setUseDirectWrites + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setUseDirectWrites + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: useDirectWrites + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_useDirectWrites + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setAllowFallocate + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setAllowFallocate + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: allowFallocate + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_allowFallocate + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setSetFdCloexec + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setSetFdCloexec + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: setFdCloexec + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_setFdCloexec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setBytesPerSync + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setBytesPerSync + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: bytesPerSync + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvOptions_bytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setFallocateWithKeepSize + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setFallocateWithKeepSize + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_EnvOptions + * Method: fallocateWithKeepSize + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_EnvOptions_fallocateWithKeepSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setCompactionReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setCompactionReadaheadSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: compactionReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvOptions_compactionReadaheadSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setRandomAccessMaxBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setRandomAccessMaxBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: randomAccessMaxBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvOptions_randomAccessMaxBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setWritableFileMaxBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setWritableFileMaxBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: writableFileMaxBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_EnvOptions_writableFileMaxBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_EnvOptions + * Method: setRateLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_EnvOptions_setRateLimiter + (JNIEnv *, jobject, jlong, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ExportImportFilesMetaData.h b/java/include/org_forstdb_ExportImportFilesMetaData.h new file mode 100644 index 000000000..077daf31a --- /dev/null +++ b/java/include/org_forstdb_ExportImportFilesMetaData.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ExportImportFilesMetaData */ + +#ifndef _Included_org_forstdb_ExportImportFilesMetaData +#define _Included_org_forstdb_ExportImportFilesMetaData +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ExportImportFilesMetaData + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ExportImportFilesMetaData_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Filter.h b/java/include/org_forstdb_Filter.h new file mode 100644 index 000000000..948c5ecaa --- /dev/null +++ b/java/include/org_forstdb_Filter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Filter */ + +#ifndef _Included_org_forstdb_Filter +#define _Included_org_forstdb_Filter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Filter + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Filter_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_FlinkCompactionFilter.h b/java/include/org_forstdb_FlinkCompactionFilter.h new file mode 100644 index 000000000..bb9bdb15c --- /dev/null +++ b/java/include/org_forstdb_FlinkCompactionFilter.h @@ -0,0 +1,45 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_FlinkCompactionFilter */ + +#ifndef _Included_org_forstdb_FlinkCompactionFilter +#define _Included_org_forstdb_FlinkCompactionFilter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_FlinkCompactionFilter + * Method: createNewFlinkCompactionFilter0 + * Signature: (JLorg/forstdb/FlinkCompactionFilter/TimeProvider;J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0 + (JNIEnv *, jclass, jlong, jobject, jlong); + +/* + * Class: org_forstdb_FlinkCompactionFilter + * Method: createNewFlinkCompactionFilterConfigHolder + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_FlinkCompactionFilter_createNewFlinkCompactionFilterConfigHolder + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_FlinkCompactionFilter + * Method: disposeFlinkCompactionFilterConfigHolder + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_FlinkCompactionFilter_disposeFlinkCompactionFilterConfigHolder + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_FlinkCompactionFilter + * Method: configureFlinkCompactionFilter + * Signature: (JIIJJILorg/forstdb/FlinkCompactionFilter/ListElementFilterFactory;)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_FlinkCompactionFilter_configureFlinkCompactionFilter + (JNIEnv *, jclass, jlong, jint, jint, jlong, jlong, jint, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_FlinkEnv.h b/java/include/org_forstdb_FlinkEnv.h new file mode 100644 index 000000000..4dfe9e786 --- /dev/null +++ b/java/include/org_forstdb_FlinkEnv.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_FlinkEnv */ + +#ifndef _Included_org_forstdb_FlinkEnv +#define _Included_org_forstdb_FlinkEnv +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_FlinkEnv + * Method: createFlinkEnv + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_FlinkEnv_createFlinkEnv + (JNIEnv *, jclass, jstring); + +/* + * Class: org_forstdb_FlinkEnv + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_FlinkEnv_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_FlushOptions.h b/java/include/org_forstdb_FlushOptions.h new file mode 100644 index 000000000..97ff71b99 --- /dev/null +++ b/java/include/org_forstdb_FlushOptions.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_FlushOptions */ + +#ifndef _Included_org_forstdb_FlushOptions +#define _Included_org_forstdb_FlushOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_FlushOptions + * Method: newFlushOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_FlushOptions_newFlushOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_FlushOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_FlushOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_FlushOptions + * Method: setWaitForFlush + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_FlushOptions_setWaitForFlush + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_FlushOptions + * Method: waitForFlush + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_FlushOptions_waitForFlush + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_FlushOptions + * Method: setAllowWriteStall + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_FlushOptions_setAllowWriteStall + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_FlushOptions + * Method: allowWriteStall + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_FlushOptions_allowWriteStall + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_HashLinkedListMemTableConfig.h b/java/include/org_forstdb_HashLinkedListMemTableConfig.h new file mode 100644 index 000000000..bfc29cab3 --- /dev/null +++ b/java/include/org_forstdb_HashLinkedListMemTableConfig.h @@ -0,0 +1,31 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_HashLinkedListMemTableConfig */ + +#ifndef _Included_org_forstdb_HashLinkedListMemTableConfig +#define _Included_org_forstdb_HashLinkedListMemTableConfig +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_HashLinkedListMemTableConfig_DEFAULT_BUCKET_COUNT +#define org_forstdb_HashLinkedListMemTableConfig_DEFAULT_BUCKET_COUNT 50000LL +#undef org_forstdb_HashLinkedListMemTableConfig_DEFAULT_HUGE_PAGE_TLB_SIZE +#define org_forstdb_HashLinkedListMemTableConfig_DEFAULT_HUGE_PAGE_TLB_SIZE 0LL +#undef org_forstdb_HashLinkedListMemTableConfig_DEFAULT_BUCKET_ENTRIES_LOG_THRES +#define org_forstdb_HashLinkedListMemTableConfig_DEFAULT_BUCKET_ENTRIES_LOG_THRES 4096L +#undef org_forstdb_HashLinkedListMemTableConfig_DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH +#define org_forstdb_HashLinkedListMemTableConfig_DEFAULT_IF_LOG_BUCKET_DIST_WHEN_FLUSH 1L +#undef org_forstdb_HashLinkedListMemTableConfig_DEFAUL_THRESHOLD_USE_SKIPLIST +#define org_forstdb_HashLinkedListMemTableConfig_DEFAUL_THRESHOLD_USE_SKIPLIST 256L +/* + * Class: org_forstdb_HashLinkedListMemTableConfig + * Method: newMemTableFactoryHandle + * Signature: (JJIZI)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_HashLinkedListMemTableConfig_newMemTableFactoryHandle + (JNIEnv *, jobject, jlong, jlong, jint, jboolean, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_HashSkipListMemTableConfig.h b/java/include/org_forstdb_HashSkipListMemTableConfig.h new file mode 100644 index 000000000..bc800fe5a --- /dev/null +++ b/java/include/org_forstdb_HashSkipListMemTableConfig.h @@ -0,0 +1,27 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_HashSkipListMemTableConfig */ + +#ifndef _Included_org_forstdb_HashSkipListMemTableConfig +#define _Included_org_forstdb_HashSkipListMemTableConfig +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_HashSkipListMemTableConfig_DEFAULT_BUCKET_COUNT +#define org_forstdb_HashSkipListMemTableConfig_DEFAULT_BUCKET_COUNT 1000000L +#undef org_forstdb_HashSkipListMemTableConfig_DEFAULT_BRANCHING_FACTOR +#define org_forstdb_HashSkipListMemTableConfig_DEFAULT_BRANCHING_FACTOR 4L +#undef org_forstdb_HashSkipListMemTableConfig_DEFAULT_HEIGHT +#define org_forstdb_HashSkipListMemTableConfig_DEFAULT_HEIGHT 4L +/* + * Class: org_forstdb_HashSkipListMemTableConfig + * Method: newMemTableFactoryHandle + * Signature: (JII)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_HashSkipListMemTableConfig_newMemTableFactoryHandle + (JNIEnv *, jobject, jlong, jint, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_HyperClockCache.h b/java/include/org_forstdb_HyperClockCache.h new file mode 100644 index 000000000..c7f5ea634 --- /dev/null +++ b/java/include/org_forstdb_HyperClockCache.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_HyperClockCache */ + +#ifndef _Included_org_forstdb_HyperClockCache +#define _Included_org_forstdb_HyperClockCache +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_HyperClockCache + * Method: disposeInternalJni + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_HyperClockCache_disposeInternalJni + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_HyperClockCache + * Method: newHyperClockCache + * Signature: (JJIZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_HyperClockCache_newHyperClockCache + (JNIEnv *, jclass, jlong, jlong, jint, jboolean); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ImportColumnFamilyOptions.h b/java/include/org_forstdb_ImportColumnFamilyOptions.h new file mode 100644 index 000000000..d97b72abb --- /dev/null +++ b/java/include/org_forstdb_ImportColumnFamilyOptions.h @@ -0,0 +1,45 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ImportColumnFamilyOptions */ + +#ifndef _Included_org_forstdb_ImportColumnFamilyOptions +#define _Included_org_forstdb_ImportColumnFamilyOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ImportColumnFamilyOptions + * Method: newImportColumnFamilyOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ImportColumnFamilyOptions_newImportColumnFamilyOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_ImportColumnFamilyOptions + * Method: moveFiles + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ImportColumnFamilyOptions_moveFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ImportColumnFamilyOptions + * Method: setMoveFiles + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ImportColumnFamilyOptions_setMoveFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ImportColumnFamilyOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ImportColumnFamilyOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_IngestExternalFileOptions.h b/java/include/org_forstdb_IngestExternalFileOptions.h new file mode 100644 index 000000000..7db0ec878 --- /dev/null +++ b/java/include/org_forstdb_IngestExternalFileOptions.h @@ -0,0 +1,133 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_IngestExternalFileOptions */ + +#ifndef _Included_org_forstdb_IngestExternalFileOptions +#define _Included_org_forstdb_IngestExternalFileOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: newIngestExternalFileOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_IngestExternalFileOptions_newIngestExternalFileOptions__ + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: newIngestExternalFileOptions + * Signature: (ZZZZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_IngestExternalFileOptions_newIngestExternalFileOptions__ZZZZ + (JNIEnv *, jclass, jboolean, jboolean, jboolean, jboolean); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: moveFiles + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_IngestExternalFileOptions_moveFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: setMoveFiles + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_setMoveFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: snapshotConsistency + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_IngestExternalFileOptions_snapshotConsistency + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: setSnapshotConsistency + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_setSnapshotConsistency + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: allowGlobalSeqNo + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_IngestExternalFileOptions_allowGlobalSeqNo + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: setAllowGlobalSeqNo + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_setAllowGlobalSeqNo + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: allowBlockingFlush + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_IngestExternalFileOptions_allowBlockingFlush + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: setAllowBlockingFlush + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_setAllowBlockingFlush + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: ingestBehind + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_IngestExternalFileOptions_ingestBehind + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: setIngestBehind + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_setIngestBehind + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: writeGlobalSeqno + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_IngestExternalFileOptions_writeGlobalSeqno + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_IngestExternalFileOptions + * Method: setWriteGlobalSeqno + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_IngestExternalFileOptions_setWriteGlobalSeqno + (JNIEnv *, jobject, jlong, jboolean); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_LRUCache.h b/java/include/org_forstdb_LRUCache.h new file mode 100644 index 000000000..168288330 --- /dev/null +++ b/java/include/org_forstdb_LRUCache.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_LRUCache */ + +#ifndef _Included_org_forstdb_LRUCache +#define _Included_org_forstdb_LRUCache +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_LRUCache + * Method: newLRUCache + * Signature: (JIZDD)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_LRUCache_newLRUCache + (JNIEnv *, jclass, jlong, jint, jboolean, jdouble, jdouble); + +/* + * Class: org_forstdb_LRUCache + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_LRUCache_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_LiveFileMetaData.h b/java/include/org_forstdb_LiveFileMetaData.h new file mode 100644 index 000000000..f89568b61 --- /dev/null +++ b/java/include/org_forstdb_LiveFileMetaData.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_LiveFileMetaData */ + +#ifndef _Included_org_forstdb_LiveFileMetaData +#define _Included_org_forstdb_LiveFileMetaData +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_LiveFileMetaData + * Method: newLiveFileMetaDataHandle + * Signature: ([BIILjava/lang/String;Ljava/lang/String;JJJ[BI[BIJZJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_LiveFileMetaData_newLiveFileMetaDataHandle + (JNIEnv *, jobject, jbyteArray, jint, jint, jstring, jstring, jlong, jlong, jlong, jbyteArray, jint, jbyteArray, jint, jlong, jboolean, jlong, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Logger.h b/java/include/org_forstdb_Logger.h new file mode 100644 index 000000000..d1968a3fd --- /dev/null +++ b/java/include/org_forstdb_Logger.h @@ -0,0 +1,57 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Logger */ + +#ifndef _Included_org_forstdb_Logger +#define _Included_org_forstdb_Logger +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_Logger_WITH_OPTIONS +#define org_forstdb_Logger_WITH_OPTIONS 0LL +#undef org_forstdb_Logger_WITH_DBOPTIONS +#define org_forstdb_Logger_WITH_DBOPTIONS 1LL +/* + * Class: org_forstdb_Logger + * Method: createNewLoggerOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Logger_createNewLoggerOptions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Logger + * Method: createNewLoggerDbOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Logger_createNewLoggerDbOptions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Logger + * Method: setInfoLogLevel + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Logger_setInfoLogLevel + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Logger + * Method: infoLogLevel + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Logger_infoLogLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Logger + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Logger_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_MemoryUtil.h b/java/include/org_forstdb_MemoryUtil.h new file mode 100644 index 000000000..ed7b3fd3f --- /dev/null +++ b/java/include/org_forstdb_MemoryUtil.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_MemoryUtil */ + +#ifndef _Included_org_forstdb_MemoryUtil +#define _Included_org_forstdb_MemoryUtil +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_MemoryUtil + * Method: getApproximateMemoryUsageByType + * Signature: ([J[J)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_MemoryUtil_getApproximateMemoryUsageByType + (JNIEnv *, jclass, jlongArray, jlongArray); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_NativeComparatorWrapper.h b/java/include/org_forstdb_NativeComparatorWrapper.h new file mode 100644 index 000000000..7fb7fb9d1 --- /dev/null +++ b/java/include/org_forstdb_NativeComparatorWrapper.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_NativeComparatorWrapper */ + +#ifndef _Included_org_forstdb_NativeComparatorWrapper +#define _Included_org_forstdb_NativeComparatorWrapper +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_NativeComparatorWrapper + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_NativeComparatorWrapper_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h b/java/include/org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h new file mode 100644 index 000000000..b94d5e91a --- /dev/null +++ b/java/include/org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper */ + +#ifndef _Included_org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper +#define _Included_org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_NativeComparatorWrapperTest_NativeStringComparatorWrapper + * Method: newStringComparator + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_NativeComparatorWrapperTest_00024NativeStringComparatorWrapper_newStringComparator + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_OptimisticTransactionDB.h b/java/include/org_forstdb_OptimisticTransactionDB.h new file mode 100644 index 000000000..86f111d7b --- /dev/null +++ b/java/include/org_forstdb_OptimisticTransactionDB.h @@ -0,0 +1,87 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_OptimisticTransactionDB */ + +#ifndef _Included_org_forstdb_OptimisticTransactionDB +#define _Included_org_forstdb_OptimisticTransactionDB +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_OptimisticTransactionDB_NOT_FOUND +#define org_forstdb_OptimisticTransactionDB_NOT_FOUND -1L +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptimisticTransactionDB_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: open + * Signature: (JLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionDB_open__JLjava_lang_String_2 + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: open + * Signature: (JLjava/lang/String;[[B[J)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_OptimisticTransactionDB_open__JLjava_lang_String_2_3_3B_3J + (JNIEnv *, jclass, jlong, jstring, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: closeDatabase + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptimisticTransactionDB_closeDatabase + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: beginTransaction + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionDB_beginTransaction__JJ + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: beginTransaction + * Signature: (JJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionDB_beginTransaction__JJJ + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: beginTransaction_withOld + * Signature: (JJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJ + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: beginTransaction_withOld + * Signature: (JJJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionDB_beginTransaction_1withOld__JJJJ + (JNIEnv *, jobject, jlong, jlong, jlong, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionDB + * Method: getBaseDB + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionDB_getBaseDB + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_OptimisticTransactionOptions.h b/java/include/org_forstdb_OptimisticTransactionOptions.h new file mode 100644 index 000000000..9060f1b13 --- /dev/null +++ b/java/include/org_forstdb_OptimisticTransactionOptions.h @@ -0,0 +1,53 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_OptimisticTransactionOptions */ + +#ifndef _Included_org_forstdb_OptimisticTransactionOptions +#define _Included_org_forstdb_OptimisticTransactionOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_OptimisticTransactionOptions + * Method: newOptimisticTransactionOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_OptimisticTransactionOptions_newOptimisticTransactionOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_OptimisticTransactionOptions + * Method: isSetSnapshot + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_OptimisticTransactionOptions_isSetSnapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionOptions + * Method: setSetSnapshot + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptimisticTransactionOptions_setSetSnapshot + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_OptimisticTransactionOptions + * Method: setComparator + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptimisticTransactionOptions_setComparator + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_OptimisticTransactionOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptimisticTransactionOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Options.h b/java/include/org_forstdb_Options.h new file mode 100644 index 000000000..363a38321 --- /dev/null +++ b/java/include/org_forstdb_Options.h @@ -0,0 +1,2405 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Options */ + +#ifndef _Included_org_forstdb_Options +#define _Included_org_forstdb_Options +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Options + * Method: newOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_newOptions__ + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_Options + * Method: newOptions + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_newOptions__JJ + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: copyOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_copyOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEnv + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEnv + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: prepareForBulkLoad + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_prepareForBulkLoad + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setIncreaseParallelism + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setIncreaseParallelism + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: setCreateIfMissing + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCreateIfMissing + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: createIfMissing + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_createIfMissing + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCreateMissingColumnFamilies + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCreateMissingColumnFamilies + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: createMissingColumnFamilies + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_createMissingColumnFamilies + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setErrorIfExists + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setErrorIfExists + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: errorIfExists + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_errorIfExists + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setParanoidChecks + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setParanoidChecks + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: paranoidChecks + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_paranoidChecks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setRateLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setRateLimiter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setSstFileManager + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setSstFileManager + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLogger + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLogger + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setInfoLogLevel + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setInfoLogLevel + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: infoLogLevel + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_infoLogLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxOpenFiles + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxOpenFiles + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxOpenFiles + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxOpenFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxTotalWalSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxTotalWalSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxFileOpeningThreads + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxFileOpeningThreads + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxFileOpeningThreads + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxFileOpeningThreads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxTotalWalSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxTotalWalSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setStatistics + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setStatistics + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: statistics + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_statistics + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: useFsync + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_useFsync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setUseFsync + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setUseFsync + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: setDbPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDbPaths + (JNIEnv *, jobject, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_Options + * Method: dbPathsLen + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_dbPathsLen + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: dbPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_dbPaths + (JNIEnv *, jobject, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_Options + * Method: setDbLogDir + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDbLogDir + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_Options + * Method: dbLogDir + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Options_dbLogDir + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWalDir + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWalDir + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_Options + * Method: walDir + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Options_walDir + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setDeleteObsoleteFilesPeriodMicros + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDeleteObsoleteFilesPeriodMicros + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: deleteObsoleteFilesPeriodMicros + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_deleteObsoleteFilesPeriodMicros + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBackgroundCompactions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBackgroundCompactions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxBackgroundCompactions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxBackgroundCompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxSubcompactions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxSubcompactions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxSubcompactions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxSubcompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBackgroundFlushes + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBackgroundFlushes + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxBackgroundFlushes + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxBackgroundFlushes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBackgroundJobs + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBackgroundJobs + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxBackgroundJobs + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxBackgroundJobs + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxLogFileSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxLogFileSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxLogFileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxLogFileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLogFileTimeToRoll + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLogFileTimeToRoll + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: logFileTimeToRoll + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_logFileTimeToRoll + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setKeepLogFileNum + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setKeepLogFileNum + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: keepLogFileNum + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_keepLogFileNum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setRecycleLogFileNum + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setRecycleLogFileNum + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: recycleLogFileNum + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_recycleLogFileNum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxManifestFileSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxManifestFileSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxManifestFileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxManifestFileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxTableFilesSizeFIFO + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxTableFilesSizeFIFO + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxTableFilesSizeFIFO + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxTableFilesSizeFIFO + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setTableCacheNumshardbits + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setTableCacheNumshardbits + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: tableCacheNumshardbits + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_tableCacheNumshardbits + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWalTtlSeconds + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWalTtlSeconds + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: walTtlSeconds + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_walTtlSeconds + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWalSizeLimitMB + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWalSizeLimitMB + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: walSizeLimitMB + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_walSizeLimitMB + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxWriteBatchGroupSizeBytes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxWriteBatchGroupSizeBytes + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxWriteBatchGroupSizeBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxWriteBatchGroupSizeBytes + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setManifestPreallocationSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setManifestPreallocationSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: manifestPreallocationSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_manifestPreallocationSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setUseDirectReads + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setUseDirectReads + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: useDirectReads + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_useDirectReads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setUseDirectIoForFlushAndCompaction + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setUseDirectIoForFlushAndCompaction + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: useDirectIoForFlushAndCompaction + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_useDirectIoForFlushAndCompaction + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAllowFAllocate + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAllowFAllocate + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: allowFAllocate + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_allowFAllocate + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAllowMmapReads + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAllowMmapReads + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: allowMmapReads + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_allowMmapReads + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAllowMmapWrites + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAllowMmapWrites + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: allowMmapWrites + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_allowMmapWrites + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setIsFdCloseOnExec + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setIsFdCloseOnExec + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: isFdCloseOnExec + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_isFdCloseOnExec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setStatsDumpPeriodSec + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setStatsDumpPeriodSec + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: statsDumpPeriodSec + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_statsDumpPeriodSec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setStatsPersistPeriodSec + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setStatsPersistPeriodSec + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: statsPersistPeriodSec + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_statsPersistPeriodSec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setStatsHistoryBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setStatsHistoryBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: statsHistoryBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_statsHistoryBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAdviseRandomOnOpen + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAdviseRandomOnOpen + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: adviseRandomOnOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_adviseRandomOnOpen + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setDbWriteBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDbWriteBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWriteBufferManager + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWriteBufferManager + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: dbWriteBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_dbWriteBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAccessHintOnCompactionStart + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAccessHintOnCompactionStart + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: accessHintOnCompactionStart + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_accessHintOnCompactionStart + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionReadaheadSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: compactionReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_compactionReadaheadSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setRandomAccessMaxBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setRandomAccessMaxBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: randomAccessMaxBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_randomAccessMaxBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWritableFileMaxBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWritableFileMaxBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: writableFileMaxBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_writableFileMaxBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setUseAdaptiveMutex + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setUseAdaptiveMutex + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: useAdaptiveMutex + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_useAdaptiveMutex + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBytesPerSync + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBytesPerSync + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: bytesPerSync + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_bytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWalBytesPerSync + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWalBytesPerSync + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: walBytesPerSync + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_walBytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setStrictBytesPerSync + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setStrictBytesPerSync + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: strictBytesPerSync + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_strictBytesPerSync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEventListeners + * Signature: (J[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEventListeners + (JNIEnv *, jclass, jlong, jlongArray); + +/* + * Class: org_forstdb_Options + * Method: eventListeners + * Signature: (J)[Lorg/forstdb/AbstractEventListener; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_Options_eventListeners + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEnableThreadTracking + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEnableThreadTracking + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: enableThreadTracking + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_enableThreadTracking + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setDelayedWriteRate + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDelayedWriteRate + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: delayedWriteRate + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_delayedWriteRate + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEnablePipelinedWrite + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEnablePipelinedWrite + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: enablePipelinedWrite + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_enablePipelinedWrite + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setUnorderedWrite + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setUnorderedWrite + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: unorderedWrite + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_unorderedWrite + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAllowConcurrentMemtableWrite + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAllowConcurrentMemtableWrite + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: allowConcurrentMemtableWrite + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_allowConcurrentMemtableWrite + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEnableWriteThreadAdaptiveYield + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEnableWriteThreadAdaptiveYield + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: enableWriteThreadAdaptiveYield + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_enableWriteThreadAdaptiveYield + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWriteThreadMaxYieldUsec + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWriteThreadMaxYieldUsec + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: writeThreadMaxYieldUsec + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_writeThreadMaxYieldUsec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWriteThreadSlowYieldUsec + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWriteThreadSlowYieldUsec + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: writeThreadSlowYieldUsec + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_writeThreadSlowYieldUsec + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setSkipStatsUpdateOnDbOpen + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setSkipStatsUpdateOnDbOpen + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: skipStatsUpdateOnDbOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_skipStatsUpdateOnDbOpen + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setSkipCheckingSstFileSizesOnDbOpen + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setSkipCheckingSstFileSizesOnDbOpen + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: skipCheckingSstFileSizesOnDbOpen + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_skipCheckingSstFileSizesOnDbOpen + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWalRecoveryMode + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWalRecoveryMode + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: walRecoveryMode + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_walRecoveryMode + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAllow2pc + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAllow2pc + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: allow2pc + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_allow2pc + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setRowCache + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setRowCache + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWalFilter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWalFilter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setFailIfOptionsFileError + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setFailIfOptionsFileError + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: failIfOptionsFileError + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_failIfOptionsFileError + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setDumpMallocStats + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDumpMallocStats + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: dumpMallocStats + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_dumpMallocStats + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAvoidFlushDuringRecovery + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAvoidFlushDuringRecovery + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: avoidFlushDuringRecovery + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_avoidFlushDuringRecovery + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAvoidFlushDuringShutdown + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAvoidFlushDuringShutdown + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: avoidFlushDuringShutdown + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_avoidFlushDuringShutdown + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAllowIngestBehind + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAllowIngestBehind + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: allowIngestBehind + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_allowIngestBehind + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setTwoWriteQueues + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setTwoWriteQueues + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: twoWriteQueues + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_twoWriteQueues + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setManualWalFlush + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setManualWalFlush + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: manualWalFlush + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_manualWalFlush + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: oldDefaults + * Signature: (JII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_oldDefaults + (JNIEnv *, jclass, jlong, jint, jint); + +/* + * Class: org_forstdb_Options + * Method: optimizeForSmallDb + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_optimizeForSmallDb__J + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: optimizeForSmallDb + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_optimizeForSmallDb__JJ + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: optimizeForPointLookup + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_optimizeForPointLookup + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: optimizeLevelStyleCompaction + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_optimizeLevelStyleCompaction + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: optimizeUniversalStyleCompaction + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_optimizeUniversalStyleCompaction + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setComparatorHandle + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setComparatorHandle__JI + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: setComparatorHandle + * Signature: (JJB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setComparatorHandle__JJB + (JNIEnv *, jobject, jlong, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: setMergeOperatorName + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMergeOperatorName + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_Options + * Method: setMergeOperator + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMergeOperator + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionFilterHandle + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionFilterHandle + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionFilterFactoryHandle + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionFilterFactoryHandle + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWriteBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWriteBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: writeBufferSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_writeBufferSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxWriteBufferNumber + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxWriteBufferNumber + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxWriteBufferNumber + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxWriteBufferNumber + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMinWriteBufferNumberToMerge + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMinWriteBufferNumberToMerge + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: minWriteBufferNumberToMerge + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_minWriteBufferNumberToMerge + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompressionType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompressionType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: compressionType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_compressionType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompressionPerLevel + * Signature: (J[B)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompressionPerLevel + (JNIEnv *, jobject, jlong, jbyteArray); + +/* + * Class: org_forstdb_Options + * Method: compressionPerLevel + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_Options_compressionPerLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBottommostCompressionType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBottommostCompressionType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: bottommostCompressionType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_bottommostCompressionType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBottommostCompressionOptions + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBottommostCompressionOptions + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompressionOptions + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompressionOptions + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: useFixedLengthPrefixExtractor + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_useFixedLengthPrefixExtractor + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: useCappedPrefixExtractor + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_useCappedPrefixExtractor + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: setNumLevels + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setNumLevels + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: numLevels + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_numLevels + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevelZeroFileNumCompactionTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevelZeroFileNumCompactionTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: levelZeroFileNumCompactionTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_levelZeroFileNumCompactionTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevelZeroSlowdownWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevelZeroSlowdownWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: levelZeroSlowdownWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_levelZeroSlowdownWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevelZeroStopWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevelZeroStopWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: levelZeroStopWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_levelZeroStopWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setTargetFileSizeBase + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setTargetFileSizeBase + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: targetFileSizeBase + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_targetFileSizeBase + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setTargetFileSizeMultiplier + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setTargetFileSizeMultiplier + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: targetFileSizeMultiplier + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_targetFileSizeMultiplier + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBytesForLevelBase + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBytesForLevelBase + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxBytesForLevelBase + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxBytesForLevelBase + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevelCompactionDynamicLevelBytes + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevelCompactionDynamicLevelBytes + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: levelCompactionDynamicLevelBytes + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_levelCompactionDynamicLevelBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBytesForLevelMultiplier + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBytesForLevelMultiplier + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_Options + * Method: maxBytesForLevelMultiplier + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_Options_maxBytesForLevelMultiplier + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxCompactionBytes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxCompactionBytes + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxCompactionBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxCompactionBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setArenaBlockSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setArenaBlockSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: arenaBlockSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_arenaBlockSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setDisableAutoCompactions + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setDisableAutoCompactions + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: disableAutoCompactions + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_disableAutoCompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionStyle + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionStyle + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: compactionStyle + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_compactionStyle + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxSequentialSkipInIterations + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxSequentialSkipInIterations + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxSequentialSkipInIterations + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxSequentialSkipInIterations + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMemTableFactory + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMemTableFactory + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: memTableFactoryName + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Options_memTableFactoryName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setTableFactory + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setTableFactory + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: tableFactoryName + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Options_tableFactoryName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCfPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCfPaths + (JNIEnv *, jclass, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_Options + * Method: cfPathsLen + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_cfPathsLen + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: cfPaths + * Signature: (J[Ljava/lang/String;[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_cfPaths + (JNIEnv *, jclass, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_Options + * Method: setInplaceUpdateSupport + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setInplaceUpdateSupport + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: inplaceUpdateSupport + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_inplaceUpdateSupport + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setInplaceUpdateNumLocks + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setInplaceUpdateNumLocks + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: inplaceUpdateNumLocks + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_inplaceUpdateNumLocks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMemtablePrefixBloomSizeRatio + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMemtablePrefixBloomSizeRatio + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_Options + * Method: memtablePrefixBloomSizeRatio + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_Options_memtablePrefixBloomSizeRatio + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setExperimentalMempurgeThreshold + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setExperimentalMempurgeThreshold + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_Options + * Method: experimentalMempurgeThreshold + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_Options_experimentalMempurgeThreshold + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMemtableWholeKeyFiltering + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMemtableWholeKeyFiltering + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: memtableWholeKeyFiltering + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_memtableWholeKeyFiltering + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBloomLocality + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBloomLocality + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: bloomLocality + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_bloomLocality + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxSuccessiveMerges + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxSuccessiveMerges + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: maxSuccessiveMerges + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_maxSuccessiveMerges + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setOptimizeFiltersForHits + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setOptimizeFiltersForHits + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: optimizeFiltersForHits + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_optimizeFiltersForHits + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMemtableHugePageSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMemtableHugePageSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: memtableHugePageSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_memtableHugePageSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setSoftPendingCompactionBytesLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setSoftPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: softPendingCompactionBytesLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_softPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setHardPendingCompactionBytesLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setHardPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: hardPendingCompactionBytesLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_hardPendingCompactionBytesLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevel0FileNumCompactionTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevel0FileNumCompactionTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: level0FileNumCompactionTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_level0FileNumCompactionTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevel0SlowdownWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevel0SlowdownWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: level0SlowdownWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_level0SlowdownWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLevel0StopWritesTrigger + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLevel0StopWritesTrigger + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: level0StopWritesTrigger + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_level0StopWritesTrigger + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBytesForLevelMultiplierAdditional + * Signature: (J[I)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBytesForLevelMultiplierAdditional + (JNIEnv *, jobject, jlong, jintArray); + +/* + * Class: org_forstdb_Options + * Method: maxBytesForLevelMultiplierAdditional + * Signature: (J)[I + */ +JNIEXPORT jintArray JNICALL Java_org_forstdb_Options_maxBytesForLevelMultiplierAdditional + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setParanoidFileChecks + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setParanoidFileChecks + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: paranoidFileChecks + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_paranoidFileChecks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxWriteBufferNumberToMaintain + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxWriteBufferNumberToMaintain + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxWriteBufferNumberToMaintain + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxWriteBufferNumberToMaintain + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionPriority + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionPriority + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: compactionPriority + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_compactionPriority + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setReportBgIoStats + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setReportBgIoStats + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: reportBgIoStats + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_reportBgIoStats + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setTtl + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setTtl + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: ttl + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_ttl + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setPeriodicCompactionSeconds + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setPeriodicCompactionSeconds + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: periodicCompactionSeconds + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_periodicCompactionSeconds + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionOptionsUniversal + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionOptionsUniversal + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionOptionsFIFO + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionOptionsFIFO + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setForceConsistencyChecks + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setForceConsistencyChecks + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: forceConsistencyChecks + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_forceConsistencyChecks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAtomicFlush + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAtomicFlush + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: atomicFlush + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_atomicFlush + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setSstPartitionerFactory + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setSstPartitionerFactory + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMemtableMaxRangeDeletions + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMemtableMaxRangeDeletions + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: memtableMaxRangeDeletions + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_memtableMaxRangeDeletions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setCompactionThreadLimiter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setCompactionThreadLimiter + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: setAvoidUnnecessaryBlockingIO + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setAvoidUnnecessaryBlockingIO + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: avoidUnnecessaryBlockingIO + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_avoidUnnecessaryBlockingIO + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setPersistStatsToDisk + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setPersistStatsToDisk + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: persistStatsToDisk + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_persistStatsToDisk + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setWriteDbidToManifest + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setWriteDbidToManifest + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: writeDbidToManifest + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_writeDbidToManifest + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setLogReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setLogReadaheadSize + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: logReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_logReadaheadSize + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBestEffortsRecovery + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBestEffortsRecovery + (JNIEnv *, jclass, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: bestEffortsRecovery + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_bestEffortsRecovery + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMaxBgErrorResumeCount + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMaxBgErrorResumeCount + (JNIEnv *, jclass, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: maxBgerrorResumeCount + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_maxBgerrorResumeCount + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBgerrorResumeRetryInterval + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBgerrorResumeRetryInterval + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: bgerrorResumeRetryInterval + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_bgerrorResumeRetryInterval + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEnableBlobFiles + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEnableBlobFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: enableBlobFiles + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_enableBlobFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setMinBlobSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setMinBlobSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: minBlobSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_minBlobSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBlobFileSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBlobFileSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: blobFileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_blobFileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBlobCompressionType + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBlobCompressionType + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: blobCompressionType + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_blobCompressionType + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setEnableBlobGarbageCollection + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setEnableBlobGarbageCollection + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_Options + * Method: enableBlobGarbageCollection + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Options_enableBlobGarbageCollection + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBlobGarbageCollectionAgeCutoff + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBlobGarbageCollectionAgeCutoff + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_Options + * Method: blobGarbageCollectionAgeCutoff + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_Options_blobGarbageCollectionAgeCutoff + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBlobGarbageCollectionForceThreshold + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBlobGarbageCollectionForceThreshold + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_Options + * Method: blobGarbageCollectionForceThreshold + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_Options_blobGarbageCollectionForceThreshold + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBlobCompactionReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBlobCompactionReadaheadSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Options + * Method: blobCompactionReadaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Options_blobCompactionReadaheadSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setBlobFileStartingLevel + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setBlobFileStartingLevel + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Options + * Method: blobFileStartingLevel + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Options_blobFileStartingLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Options + * Method: setPrepopulateBlobCache + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Options_setPrepopulateBlobCache + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Options + * Method: prepopulateBlobCache + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Options_prepopulateBlobCache + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_OptionsUtil.h b/java/include/org_forstdb_OptionsUtil.h new file mode 100644 index 000000000..e4bb85ab0 --- /dev/null +++ b/java/include/org_forstdb_OptionsUtil.h @@ -0,0 +1,45 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_OptionsUtil */ + +#ifndef _Included_org_forstdb_OptionsUtil +#define _Included_org_forstdb_OptionsUtil +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_OptionsUtil + * Method: loadLatestOptions + * Signature: (JLjava/lang/String;JLjava/util/List;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptionsUtil_loadLatestOptions + (JNIEnv *, jclass, jlong, jstring, jlong, jobject); + +/* + * Class: org_forstdb_OptionsUtil + * Method: loadOptionsFromFile + * Signature: (JLjava/lang/String;JLjava/util/List;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_OptionsUtil_loadOptionsFromFile + (JNIEnv *, jclass, jlong, jstring, jlong, jobject); + +/* + * Class: org_forstdb_OptionsUtil + * Method: getLatestOptionsFileName + * Signature: (Ljava/lang/String;J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_OptionsUtil_getLatestOptionsFileName + (JNIEnv *, jclass, jstring, jlong); + +/* + * Class: org_forstdb_OptionsUtil + * Method: readTableFormatConfig + * Signature: (J)Lorg/forstdb/TableFormatConfig; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_OptionsUtil_readTableFormatConfig + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_PerfContext.h b/java/include/org_forstdb_PerfContext.h new file mode 100644 index 000000000..50f9155a9 --- /dev/null +++ b/java/include/org_forstdb_PerfContext.h @@ -0,0 +1,805 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_PerfContext */ + +#ifndef _Included_org_forstdb_PerfContext +#define _Included_org_forstdb_PerfContext +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_PerfContext + * Method: reset + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_PerfContext_reset + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getUserKeyComparisonCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getUserKeyComparisonCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockCacheHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockCacheHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockReadCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockReadCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockReadByte + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockReadByte + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockReadTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockReadTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockReadCpuTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockReadCpuTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockCacheIndexHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockCacheIndexHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockCacheStandaloneHandleCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockCacheStandaloneHandleCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockCacheRealHandleCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockCacheRealHandleCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getIndexBlockReadCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getIndexBlockReadCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockCacheFilterHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockCacheFilterHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getFilterBlockReadCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getFilterBlockReadCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getCompressionDictBlockReadCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getCompressionDictBlockReadCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSecondaryCacheHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSecondaryCacheHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getCompressedSecCacheInsertRealCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getCompressedSecCacheInsertRealCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getCompressedSecCacheInsertDummyCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getCompressedSecCacheInsertDummyCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getCompressedSecCacheUncompressedBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getCompressedSecCacheUncompressedBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getCompressedSecCacheCompressedBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getCompressedSecCacheCompressedBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockChecksumTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockChecksumTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockDecompressTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockDecompressTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getReadBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getReadBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getMultigetReadBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getMultigetReadBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getIterReadBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getIterReadBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlobCacheHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlobCacheHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlobReadCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlobReadCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlobReadByte + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlobReadByte + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlobReadTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlobReadTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlobChecksumTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlobChecksumTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlobDecompressTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlobDecompressTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getInternalKeySkippedCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getInternalKeySkippedCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getInternalDeleteSkippedCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getInternalDeleteSkippedCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getInternalRecentSkippedCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getInternalRecentSkippedCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getInternalMergeCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getInternalMergeCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getInternalMergePointLookupCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getInternalMergePointLookupCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getInternalRangeDelReseekCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getInternalRangeDelReseekCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSnapshotTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSnapshotTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getFromMemtableTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getFromMemtableTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getFromMemtableCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getFromMemtableCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getPostProcessTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getPostProcessTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getFromOutputFilesTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getFromOutputFilesTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekOnMemtableTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekOnMemtableTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekOnMemtableCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekOnMemtableCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getNextOnMemtableCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getNextOnMemtableCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getPrevOnMemtableCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getPrevOnMemtableCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekChildSeekTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekChildSeekTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekChildSeekCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekChildSeekCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekMinHeapTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekMinHeapTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekMaxHeapTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekMaxHeapTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getSeekInternalSeekTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getSeekInternalSeekTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getFindNextUserEntryTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getFindNextUserEntryTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getWriteWalTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getWriteWalTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getWriteMemtableTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getWriteMemtableTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getWriteDelayTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getWriteDelayTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getWriteSchedulingFlushesCompactionsTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getWriteSchedulingFlushesCompactionsTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getWritePreAndPostProcessTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getWritePreAndPostProcessTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getWriteThreadWaitNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getWriteThreadWaitNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getDbMutexLockNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getDbMutexLockNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getDbConditionWaitNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getDbConditionWaitNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getMergeOperatorTimeNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getMergeOperatorTimeNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getReadIndexBlockNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getReadIndexBlockNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getReadFilterBlockNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getReadFilterBlockNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getNewTableBlockIterNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getNewTableBlockIterNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getNewTableIteratorNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getNewTableIteratorNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBlockSeekNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBlockSeekNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getFindTableNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getFindTableNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBloomMemtableHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBloomMemtableHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBloomMemtableMissCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBloomMemtableMissCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBloomSstHitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBloomSstHitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getBloomSstMissCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getBloomSstMissCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getKeyLockWaitTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getKeyLockWaitTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getKeyLockWaitCount + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getKeyLockWaitCount + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvNewSequentialFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvNewSequentialFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvNewRandomAccessFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvNewRandomAccessFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvNewWritableFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvNewWritableFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvReuseWritableFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvReuseWritableFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvNewRandomRwFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvNewRandomRwFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvNewDirectoryNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvNewDirectoryNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvFileExistsNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvFileExistsNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvGetChildrenNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvGetChildrenNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvGetChildrenFileAttributesNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvGetChildrenFileAttributesNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvDeleteFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvDeleteFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvCreateDirNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvCreateDirNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvCreateDirIfMissingNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvCreateDirIfMissingNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvDeleteDirNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvDeleteDirNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvGetFileSizeNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvGetFileSizeNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvGetFileModificationTimeNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvGetFileModificationTimeNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvRenameFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvRenameFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvLinkFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvLinkFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvLockFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvLockFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvUnlockFileNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvUnlockFileNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEnvNewLoggerNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEnvNewLoggerNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getGetCpuNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getGetCpuNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getIterNextCpuNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getIterNextCpuNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getIterPrevCpuNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getIterPrevCpuNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getIterSeekCpuNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getIterSeekCpuNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getEncryptDataNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getEncryptDataNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getDecryptDataNanos + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getDecryptDataNanos + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_PerfContext + * Method: getNumberAsyncSeek + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PerfContext_getNumberAsyncSeek + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_PersistentCache.h b/java/include/org_forstdb_PersistentCache.h new file mode 100644 index 000000000..a0358f656 --- /dev/null +++ b/java/include/org_forstdb_PersistentCache.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_PersistentCache */ + +#ifndef _Included_org_forstdb_PersistentCache +#define _Included_org_forstdb_PersistentCache +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_PersistentCache + * Method: newPersistentCache + * Signature: (JLjava/lang/String;JJZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PersistentCache_newPersistentCache + (JNIEnv *, jclass, jlong, jstring, jlong, jlong, jboolean); + +/* + * Class: org_forstdb_PersistentCache + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_PersistentCache_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_PlainTableConfig.h b/java/include/org_forstdb_PlainTableConfig.h new file mode 100644 index 000000000..5be3e76aa --- /dev/null +++ b/java/include/org_forstdb_PlainTableConfig.h @@ -0,0 +1,35 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_PlainTableConfig */ + +#ifndef _Included_org_forstdb_PlainTableConfig +#define _Included_org_forstdb_PlainTableConfig +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_PlainTableConfig_VARIABLE_LENGTH +#define org_forstdb_PlainTableConfig_VARIABLE_LENGTH 0L +#undef org_forstdb_PlainTableConfig_DEFAULT_BLOOM_BITS_PER_KEY +#define org_forstdb_PlainTableConfig_DEFAULT_BLOOM_BITS_PER_KEY 10L +#undef org_forstdb_PlainTableConfig_DEFAULT_HASH_TABLE_RATIO +#define org_forstdb_PlainTableConfig_DEFAULT_HASH_TABLE_RATIO 0.75 +#undef org_forstdb_PlainTableConfig_DEFAULT_INDEX_SPARSENESS +#define org_forstdb_PlainTableConfig_DEFAULT_INDEX_SPARSENESS 16L +#undef org_forstdb_PlainTableConfig_DEFAULT_HUGE_TLB_SIZE +#define org_forstdb_PlainTableConfig_DEFAULT_HUGE_TLB_SIZE 0L +#undef org_forstdb_PlainTableConfig_DEFAULT_FULL_SCAN_MODE +#define org_forstdb_PlainTableConfig_DEFAULT_FULL_SCAN_MODE 0L +#undef org_forstdb_PlainTableConfig_DEFAULT_STORE_INDEX_IN_FILE +#define org_forstdb_PlainTableConfig_DEFAULT_STORE_INDEX_IN_FILE 0L +/* + * Class: org_forstdb_PlainTableConfig + * Method: newTableFactoryHandle + * Signature: (IIDIIBZZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_PlainTableConfig_newTableFactoryHandle + (JNIEnv *, jobject, jint, jint, jdouble, jint, jint, jbyte, jboolean, jboolean); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RateLimiter.h b/java/include/org_forstdb_RateLimiter.h new file mode 100644 index 000000000..8cdab2a11 --- /dev/null +++ b/java/include/org_forstdb_RateLimiter.h @@ -0,0 +1,83 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RateLimiter */ + +#ifndef _Included_org_forstdb_RateLimiter +#define _Included_org_forstdb_RateLimiter +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_RateLimiter_DEFAULT_REFILL_PERIOD_MICROS +#define org_forstdb_RateLimiter_DEFAULT_REFILL_PERIOD_MICROS 100000LL +#undef org_forstdb_RateLimiter_DEFAULT_FAIRNESS +#define org_forstdb_RateLimiter_DEFAULT_FAIRNESS 10L +#undef org_forstdb_RateLimiter_DEFAULT_AUTOTUNE +#define org_forstdb_RateLimiter_DEFAULT_AUTOTUNE 0L +/* + * Class: org_forstdb_RateLimiter + * Method: newRateLimiterHandle + * Signature: (JJIBZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RateLimiter_newRateLimiterHandle + (JNIEnv *, jclass, jlong, jlong, jint, jbyte, jboolean); + +/* + * Class: org_forstdb_RateLimiter + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RateLimiter_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RateLimiter + * Method: setBytesPerSecond + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RateLimiter_setBytesPerSecond + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RateLimiter + * Method: getBytesPerSecond + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RateLimiter_getBytesPerSecond + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RateLimiter + * Method: request + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RateLimiter_request + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RateLimiter + * Method: getSingleBurstBytes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RateLimiter_getSingleBurstBytes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RateLimiter + * Method: getTotalBytesThrough + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RateLimiter_getTotalBytesThrough + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RateLimiter + * Method: getTotalRequests + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RateLimiter_getTotalRequests + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ReadOptions.h b/java/include/org_forstdb_ReadOptions.h new file mode 100644 index 000000000..7082dc8c1 --- /dev/null +++ b/java/include/org_forstdb_ReadOptions.h @@ -0,0 +1,389 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ReadOptions */ + +#ifndef _Included_org_forstdb_ReadOptions +#define _Included_org_forstdb_ReadOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ReadOptions + * Method: newReadOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_newReadOptions__ + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_ReadOptions + * Method: newReadOptions + * Signature: (ZZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_newReadOptions__ZZ + (JNIEnv *, jclass, jboolean, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: copyReadOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_copyReadOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: verifyChecksums + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_verifyChecksums + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setVerifyChecksums + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setVerifyChecksums + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: fillCache + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_fillCache + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setFillCache + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setFillCache + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: snapshot + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_snapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setSnapshot + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setSnapshot + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: readTier + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_ReadOptions_readTier + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setReadTier + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setReadTier + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_ReadOptions + * Method: tailing + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_tailing + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setTailing + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setTailing + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: managed + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_managed + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setManaged + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setManaged + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: totalOrderSeek + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_totalOrderSeek + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setTotalOrderSeek + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setTotalOrderSeek + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: prefixSameAsStart + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_prefixSameAsStart + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setPrefixSameAsStart + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setPrefixSameAsStart + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: pinData + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_pinData + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setPinData + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setPinData + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: backgroundPurgeOnIteratorCleanup + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_backgroundPurgeOnIteratorCleanup + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setBackgroundPurgeOnIteratorCleanup + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setBackgroundPurgeOnIteratorCleanup + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: readaheadSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_readaheadSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setReadaheadSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setReadaheadSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: maxSkippableInternalKeys + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_maxSkippableInternalKeys + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setMaxSkippableInternalKeys + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setMaxSkippableInternalKeys + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: ignoreRangeDeletions + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_ignoreRangeDeletions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setIgnoreRangeDeletions + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setIgnoreRangeDeletions + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: setIterateUpperBound + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setIterateUpperBound + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: iterateUpperBound + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_iterateUpperBound + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setIterateLowerBound + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setIterateLowerBound + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: iterateLowerBound + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_iterateLowerBound + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setTableFilter + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setTableFilter + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: autoPrefixMode + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_ReadOptions_autoPrefixMode + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setAutoPrefixMode + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setAutoPrefixMode + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_ReadOptions + * Method: timestamp + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_timestamp + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setTimestamp + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setTimestamp + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: iterStartTs + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_iterStartTs + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setIterStartTs + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setIterStartTs + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: deadline + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_deadline + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setDeadline + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setDeadline + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: ioTimeout + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_ioTimeout + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setIoTimeout + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setIoTimeout + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: valueSizeSoftLimit + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_ReadOptions_valueSizeSoftLimit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_ReadOptions + * Method: setValueSizeSoftLimit + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_ReadOptions_setValueSizeSoftLimit + (JNIEnv *, jobject, jlong, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RemoveEmptyValueCompactionFilter.h b/java/include/org_forstdb_RemoveEmptyValueCompactionFilter.h new file mode 100644 index 000000000..0fdf0786d --- /dev/null +++ b/java/include/org_forstdb_RemoveEmptyValueCompactionFilter.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RemoveEmptyValueCompactionFilter */ + +#ifndef _Included_org_forstdb_RemoveEmptyValueCompactionFilter +#define _Included_org_forstdb_RemoveEmptyValueCompactionFilter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RemoveEmptyValueCompactionFilter + * Method: createNewRemoveEmptyValueCompactionFilter0 + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RemoveEmptyValueCompactionFilter_createNewRemoveEmptyValueCompactionFilter0 + (JNIEnv *, jclass); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RestoreOptions.h b/java/include/org_forstdb_RestoreOptions.h new file mode 100644 index 000000000..cb0cfaa96 --- /dev/null +++ b/java/include/org_forstdb_RestoreOptions.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RestoreOptions */ + +#ifndef _Included_org_forstdb_RestoreOptions +#define _Included_org_forstdb_RestoreOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RestoreOptions + * Method: newRestoreOptions + * Signature: (Z)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RestoreOptions_newRestoreOptions + (JNIEnv *, jclass, jboolean); + +/* + * Class: org_forstdb_RestoreOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RestoreOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RocksCallbackObject.h b/java/include/org_forstdb_RocksCallbackObject.h new file mode 100644 index 000000000..edd63d253 --- /dev/null +++ b/java/include/org_forstdb_RocksCallbackObject.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RocksCallbackObject */ + +#ifndef _Included_org_forstdb_RocksCallbackObject +#define _Included_org_forstdb_RocksCallbackObject +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RocksCallbackObject + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksCallbackObject_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RocksDB.h b/java/include/org_forstdb_RocksDB.h new file mode 100644 index 000000000..43248af59 --- /dev/null +++ b/java/include/org_forstdb_RocksDB.h @@ -0,0 +1,935 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RocksDB */ + +#ifndef _Included_org_forstdb_RocksDB +#define _Included_org_forstdb_RocksDB +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_RocksDB_NOT_FOUND +#define org_forstdb_RocksDB_NOT_FOUND -1L +/* + * Class: org_forstdb_RocksDB + * Method: open + * Signature: (JLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_open__JLjava_lang_String_2 + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_forstdb_RocksDB + * Method: open + * Signature: (JLjava/lang/String;[[B[J)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_open__JLjava_lang_String_2_3_3B_3J + (JNIEnv *, jclass, jlong, jstring, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: openROnly + * Signature: (JLjava/lang/String;Z)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_openROnly__JLjava_lang_String_2Z + (JNIEnv *, jclass, jlong, jstring, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: openROnly + * Signature: (JLjava/lang/String;[[B[JZ)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_openROnly__JLjava_lang_String_2_3_3B_3JZ + (JNIEnv *, jclass, jlong, jstring, jobjectArray, jlongArray, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: openAsSecondary + * Signature: (JLjava/lang/String;Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2 + (JNIEnv *, jclass, jlong, jstring, jstring); + +/* + * Class: org_forstdb_RocksDB + * Method: openAsSecondary + * Signature: (JLjava/lang/String;Ljava/lang/String;[[B[J)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_openAsSecondary__JLjava_lang_String_2Ljava_lang_String_2_3_3B_3J + (JNIEnv *, jclass, jlong, jstring, jstring, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: closeDatabase + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_closeDatabase + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: listColumnFamilies + * Signature: (JLjava/lang/String;)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_listColumnFamilies + (JNIEnv *, jclass, jlong, jstring); + +/* + * Class: org_forstdb_RocksDB + * Method: createColumnFamily + * Signature: (J[BIJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_createColumnFamily + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: createColumnFamilies + * Signature: (JJ[[B)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_createColumnFamilies__JJ_3_3B + (JNIEnv *, jobject, jlong, jlong, jobjectArray); + +/* + * Class: org_forstdb_RocksDB + * Method: createColumnFamilies + * Signature: (J[J[[B)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_createColumnFamilies__J_3J_3_3B + (JNIEnv *, jobject, jlong, jlongArray, jobjectArray); + +/* + * Class: org_forstdb_RocksDB + * Method: createColumnFamilyWithImport + * Signature: (J[BIJJ[J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_createColumnFamilyWithImport + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong, jlong, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: dropColumnFamily + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_dropColumnFamily + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: dropColumnFamilies + * Signature: (J[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_dropColumnFamilies + (JNIEnv *, jobject, jlong, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: put + * Signature: (J[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_put__J_3BII_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: put + * Signature: (J[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_put__J_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: put + * Signature: (JJ[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_put__JJ_3BII_3BII + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: put + * Signature: (JJ[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_put__JJ_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: delete + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_delete__J_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: delete + * Signature: (J[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_delete__J_3BIIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: delete + * Signature: (JJ[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_delete__JJ_3BII + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: delete + * Signature: (JJ[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_delete__JJ_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: singleDelete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_singleDelete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: singleDelete + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_singleDelete__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: singleDelete + * Signature: (JJ[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_singleDelete__JJ_3BI + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: singleDelete + * Signature: (JJ[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_singleDelete__JJ_3BIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteRange + * Signature: (J[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteRange__J_3BII_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteRange + * Signature: (J[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteRange__J_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteRange + * Signature: (JJ[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteRange__JJ_3BII_3BII + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteRange + * Signature: (JJ[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteRange__JJ_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: clipColumnFamily + * Signature: (JJ[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_clipColumnFamily + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: merge + * Signature: (J[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_merge__J_3BII_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: merge + * Signature: (J[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_merge__J_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: merge + * Signature: (JJ[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_merge__JJ_3BII_3BII + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: merge + * Signature: (JJ[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_merge__JJ_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: mergeDirect + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_mergeDirect + (JNIEnv *, jobject, jlong, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: write0 + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_write0 + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: write1 + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_write1 + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (J[BII[BII)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_get__J_3BII_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (J[BII[BIIJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_get__J_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (JJ[BII[BII)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_get__JJ_3BII_3BII + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (JJ[BII[BIIJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_get__JJ_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (J[BII)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_RocksDB_get__J_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (J[BIIJ)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_RocksDB_get__J_3BIIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (JJ[BII)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_RocksDB_get__JJ_3BII + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: get + * Signature: (JJ[BIIJ)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_RocksDB_get__JJ_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: multiGet + * Signature: (J[[B[I[I)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_multiGet__J_3_3B_3I_3I + (JNIEnv *, jobject, jlong, jobjectArray, jintArray, jintArray); + +/* + * Class: org_forstdb_RocksDB + * Method: multiGet + * Signature: (J[[B[I[I[J)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_multiGet__J_3_3B_3I_3I_3J + (JNIEnv *, jobject, jlong, jobjectArray, jintArray, jintArray, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: multiGet + * Signature: (JJ[[B[I[I)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_multiGet__JJ_3_3B_3I_3I + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jintArray, jintArray); + +/* + * Class: org_forstdb_RocksDB + * Method: multiGet + * Signature: (JJ[[B[I[I[J)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jintArray, jintArray, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: multiGet + * Signature: (JJ[J[Ljava/nio/ByteBuffer;[I[I[Ljava/nio/ByteBuffer;[I[Lorg/forstdb/Status;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_multiGet__JJ_3J_3Ljava_nio_ByteBuffer_2_3I_3I_3Ljava_nio_ByteBuffer_2_3I_3Lorg_forstdb_Status_2 + (JNIEnv *, jobject, jlong, jlong, jlongArray, jobjectArray, jintArray, jintArray, jobjectArray, jintArray, jobjectArray); + +/* + * Class: org_forstdb_RocksDB + * Method: keyExists + * Signature: (JJJ[BII)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_RocksDB_keyExists + (JNIEnv *, jobject, jlong, jlong, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: keyExistsDirect + * Signature: (JJJLjava/nio/ByteBuffer;II)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_RocksDB_keyExistsDirect + (JNIEnv *, jobject, jlong, jlong, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: keyMayExist + * Signature: (JJJ[BII)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_RocksDB_keyMayExist + (JNIEnv *, jobject, jlong, jlong, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: keyMayExistFoundValue + * Signature: (JJJ[BII)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_keyMayExistFoundValue + (JNIEnv *, jobject, jlong, jlong, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: putDirect + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_putDirect + (JNIEnv *, jobject, jlong, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: iterator + * Signature: (JJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_iterator + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: iterators + * Signature: (J[JJ)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_iterators + (JNIEnv *, jobject, jlong, jlongArray, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getSnapshot + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getSnapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: releaseSnapshot + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_releaseSnapshot + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getProperty + * Signature: (JJLjava/lang/String;I)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_RocksDB_getProperty + (JNIEnv *, jobject, jlong, jlong, jstring, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: getMapProperty + * Signature: (JJLjava/lang/String;I)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_RocksDB_getMapProperty + (JNIEnv *, jobject, jlong, jlong, jstring, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: getDirect + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_getDirect + (JNIEnv *, jobject, jlong, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: keyMayExistDirect + * Signature: (JJJLjava/nio/ByteBuffer;II)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_RocksDB_keyMayExistDirect + (JNIEnv *, jobject, jlong, jlong, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: keyMayExistDirectFoundValue + * Signature: (JJJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)[I + */ +JNIEXPORT jintArray JNICALL Java_org_forstdb_RocksDB_keyMayExistDirectFoundValue + (JNIEnv *, jobject, jlong, jlong, jlong, jobject, jint, jint, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteDirect + * Signature: (JJLjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteDirect + (JNIEnv *, jobject, jlong, jlong, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getLongProperty + * Signature: (JJLjava/lang/String;I)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getLongProperty + (JNIEnv *, jobject, jlong, jlong, jstring, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: resetStats + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_resetStats + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getAggregatedLongProperty + * Signature: (JLjava/lang/String;I)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getAggregatedLongProperty + (JNIEnv *, jobject, jlong, jstring, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: getApproximateSizes + * Signature: (JJ[JB)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_getApproximateSizes + (JNIEnv *, jobject, jlong, jlong, jlongArray, jbyte); + +/* + * Class: org_forstdb_RocksDB + * Method: getApproximateMemTableStats + * Signature: (JJJJ)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_getApproximateMemTableStats + (JNIEnv *, jobject, jlong, jlong, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: compactRange + * Signature: (J[BI[BIJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_compactRange + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: setOptions + * Signature: (JJ[Ljava/lang/String;[Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_setOptions + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jobjectArray); + +/* + * Class: org_forstdb_RocksDB + * Method: getOptions + * Signature: (JJ)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_RocksDB_getOptions + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: setDBOptions + * Signature: (J[Ljava/lang/String;[Ljava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_setDBOptions + (JNIEnv *, jobject, jlong, jobjectArray, jobjectArray); + +/* + * Class: org_forstdb_RocksDB + * Method: getDBOptions + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_RocksDB_getDBOptions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: setPerfLevel + * Signature: (B)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_setPerfLevel + (JNIEnv *, jobject, jbyte); + +/* + * Class: org_forstdb_RocksDB + * Method: getPerfLevelNative + * Signature: ()B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_RocksDB_getPerfLevelNative + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDB + * Method: getPerfContextNative + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getPerfContextNative + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDB + * Method: compactFiles + * Signature: (JJJ[Ljava/lang/String;IIJ)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_compactFiles + (JNIEnv *, jobject, jlong, jlong, jlong, jobjectArray, jint, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: cancelAllBackgroundWork + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_cancelAllBackgroundWork + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: pauseBackgroundWork + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_pauseBackgroundWork + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: continueBackgroundWork + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_continueBackgroundWork + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: enableAutoCompaction + * Signature: (J[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_enableAutoCompaction + (JNIEnv *, jobject, jlong, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: numberLevels + * Signature: (JJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_numberLevels + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: maxMemCompactionLevel + * Signature: (JJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_maxMemCompactionLevel + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: level0StopWriteTrigger + * Signature: (JJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_level0StopWriteTrigger + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getName + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_RocksDB_getName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getEnv + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getEnv + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: flush + * Signature: (JJ[J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_flush + (JNIEnv *, jobject, jlong, jlong, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: flushWal + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_flushWal + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: syncWal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_syncWal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getLatestSequenceNumber + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getLatestSequenceNumber + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: disableFileDeletions + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_disableFileDeletions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: enableFileDeletions + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_enableFileDeletions + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: getLiveFiles + * Signature: (JZ)[Ljava/lang/String; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_getLiveFiles + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: getSortedWalFiles + * Signature: (J)[Lorg/forstdb/LogFile; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_getSortedWalFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getUpdatesSince + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getUpdatesSince + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteFile + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteFile + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_RocksDB + * Method: getLiveFilesMetaData + * Signature: (J)[Lorg/forstdb/LiveFileMetaData; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_RocksDB_getLiveFilesMetaData + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getColumnFamilyMetaData + * Signature: (JJ)Lorg/forstdb/ColumnFamilyMetaData; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_RocksDB_getColumnFamilyMetaData + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: ingestExternalFile + * Signature: (JJ[Ljava/lang/String;IJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_ingestExternalFile + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jint, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: verifyChecksum + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_verifyChecksum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getDefaultColumnFamily + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksDB_getDefaultColumnFamily + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getPropertiesOfAllTables + * Signature: (JJ)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_RocksDB_getPropertiesOfAllTables + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: getPropertiesOfTablesInRange + * Signature: (JJ[J)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_RocksDB_getPropertiesOfTablesInRange + (JNIEnv *, jobject, jlong, jlong, jlongArray); + +/* + * Class: org_forstdb_RocksDB + * Method: suggestCompactRange + * Signature: (JJ)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_RocksDB_suggestCompactRange + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: promoteL0 + * Signature: (JJI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_promoteL0 + (JNIEnv *, jobject, jlong, jlong, jint); + +/* + * Class: org_forstdb_RocksDB + * Method: startTrace + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_startTrace + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: endTrace + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_endTrace + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: tryCatchUpWithPrimary + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_tryCatchUpWithPrimary + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: deleteFilesInRanges + * Signature: (JJ[[BZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_deleteFilesInRanges + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jboolean); + +/* + * Class: org_forstdb_RocksDB + * Method: destroyDB + * Signature: (Ljava/lang/String;J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDB_destroyDB + (JNIEnv *, jclass, jstring, jlong); + +/* + * Class: org_forstdb_RocksDB + * Method: version + * Signature: ()I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksDB_version + (JNIEnv *, jclass); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RocksDBExceptionTest.h b/java/include/org_forstdb_RocksDBExceptionTest.h new file mode 100644 index 000000000..0b707eff1 --- /dev/null +++ b/java/include/org_forstdb_RocksDBExceptionTest.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RocksDBExceptionTest */ + +#ifndef _Included_org_forstdb_RocksDBExceptionTest +#define _Included_org_forstdb_RocksDBExceptionTest +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RocksDBExceptionTest + * Method: raiseException + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDBExceptionTest_raiseException + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDBExceptionTest + * Method: raiseExceptionWithStatusCode + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDBExceptionTest_raiseExceptionWithStatusCode + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDBExceptionTest + * Method: raiseExceptionNoMsgWithStatusCode + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCode + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDBExceptionTest + * Method: raiseExceptionWithStatusCodeSubCode + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeSubCode + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDBExceptionTest + * Method: raiseExceptionNoMsgWithStatusCodeSubCode + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDBExceptionTest_raiseExceptionNoMsgWithStatusCodeSubCode + (JNIEnv *, jobject); + +/* + * Class: org_forstdb_RocksDBExceptionTest + * Method: raiseExceptionWithStatusCodeState + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksDBExceptionTest_raiseExceptionWithStatusCodeState + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RocksEnv.h b/java/include/org_forstdb_RocksEnv.h new file mode 100644 index 000000000..6c9bc74c3 --- /dev/null +++ b/java/include/org_forstdb_RocksEnv.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RocksEnv */ + +#ifndef _Included_org_forstdb_RocksEnv +#define _Included_org_forstdb_RocksEnv +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RocksEnv + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksEnv_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RocksIterator.h b/java/include/org_forstdb_RocksIterator.h new file mode 100644 index 000000000..f89e51591 --- /dev/null +++ b/java/include/org_forstdb_RocksIterator.h @@ -0,0 +1,173 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RocksIterator */ + +#ifndef _Included_org_forstdb_RocksIterator +#define _Included_org_forstdb_RocksIterator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RocksIterator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: isValid0 + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_RocksIterator_isValid0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekToFirst0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekToFirst0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekToLast0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekToLast0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: next0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_next0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: prev0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_prev0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: refresh0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_refresh0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: seek0 + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seek0 + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekForPrev0 + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekForPrev0 + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekByteArray0 + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekForPrevDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekForPrevDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: seekForPrevByteArray0 + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_seekForPrevByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: status0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksIterator_status0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: key0 + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_RocksIterator_key0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: value0 + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_RocksIterator_value0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_RocksIterator + * Method: keyDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksIterator_keyDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: keyByteArray0 + * Signature: (J[BII)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksIterator_keyByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: valueDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksIterator_valueDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_RocksIterator + * Method: valueByteArray0 + * Signature: (J[BII)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_RocksIterator_valueByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_RocksMemEnv.h b/java/include/org_forstdb_RocksMemEnv.h new file mode 100644 index 000000000..b4a080847 --- /dev/null +++ b/java/include/org_forstdb_RocksMemEnv.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_RocksMemEnv */ + +#ifndef _Included_org_forstdb_RocksMemEnv +#define _Included_org_forstdb_RocksMemEnv +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_RocksMemEnv + * Method: createMemEnv + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_RocksMemEnv_createMemEnv + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_RocksMemEnv + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_RocksMemEnv_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_SkipListMemTableConfig.h b/java/include/org_forstdb_SkipListMemTableConfig.h new file mode 100644 index 000000000..43a6f1946 --- /dev/null +++ b/java/include/org_forstdb_SkipListMemTableConfig.h @@ -0,0 +1,23 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_SkipListMemTableConfig */ + +#ifndef _Included_org_forstdb_SkipListMemTableConfig +#define _Included_org_forstdb_SkipListMemTableConfig +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_SkipListMemTableConfig_DEFAULT_LOOKAHEAD +#define org_forstdb_SkipListMemTableConfig_DEFAULT_LOOKAHEAD 0LL +/* + * Class: org_forstdb_SkipListMemTableConfig + * Method: newMemTableFactoryHandle0 + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SkipListMemTableConfig_newMemTableFactoryHandle0 + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Slice.h b/java/include/org_forstdb_Slice.h new file mode 100644 index 000000000..45fae672a --- /dev/null +++ b/java/include/org_forstdb_Slice.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Slice */ + +#ifndef _Included_org_forstdb_Slice +#define _Included_org_forstdb_Slice +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Slice + * Method: data0 + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_Slice_data0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Slice + * Method: createNewSlice0 + * Signature: ([BI)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Slice_createNewSlice0 + (JNIEnv *, jclass, jbyteArray, jint); + +/* + * Class: org_forstdb_Slice + * Method: createNewSlice1 + * Signature: ([B)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Slice_createNewSlice1 + (JNIEnv *, jclass, jbyteArray); + +/* + * Class: org_forstdb_Slice + * Method: clear0 + * Signature: (JZJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Slice_clear0 + (JNIEnv *, jobject, jlong, jboolean, jlong); + +/* + * Class: org_forstdb_Slice + * Method: removePrefix0 + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Slice_removePrefix0 + (JNIEnv *, jobject, jlong, jint); + +/* + * Class: org_forstdb_Slice + * Method: disposeInternalBuf + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Slice_disposeInternalBuf + (JNIEnv *, jobject, jlong, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Snapshot.h b/java/include/org_forstdb_Snapshot.h new file mode 100644 index 000000000..595a18e68 --- /dev/null +++ b/java/include/org_forstdb_Snapshot.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Snapshot */ + +#ifndef _Included_org_forstdb_Snapshot +#define _Included_org_forstdb_Snapshot +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Snapshot + * Method: getSequenceNumber + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Snapshot_getSequenceNumber + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_SstFileManager.h b/java/include/org_forstdb_SstFileManager.h new file mode 100644 index 000000000..25fe9e0db --- /dev/null +++ b/java/include/org_forstdb_SstFileManager.h @@ -0,0 +1,117 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_SstFileManager */ + +#ifndef _Included_org_forstdb_SstFileManager +#define _Included_org_forstdb_SstFileManager +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_SstFileManager_RATE_BYTES_PER_SEC_DEFAULT +#define org_forstdb_SstFileManager_RATE_BYTES_PER_SEC_DEFAULT 0LL +#undef org_forstdb_SstFileManager_DELETE_EXISTING_TRASH_DEFAULT +#define org_forstdb_SstFileManager_DELETE_EXISTING_TRASH_DEFAULT 1L +#undef org_forstdb_SstFileManager_MAX_TRASH_DB_RATION_DEFAULT +#define org_forstdb_SstFileManager_MAX_TRASH_DB_RATION_DEFAULT 0.25 +#undef org_forstdb_SstFileManager_BYTES_MAX_DELETE_CHUNK_DEFAULT +#define org_forstdb_SstFileManager_BYTES_MAX_DELETE_CHUNK_DEFAULT 67108864LL +/* + * Class: org_forstdb_SstFileManager + * Method: newSstFileManager + * Signature: (JJJDJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileManager_newSstFileManager + (JNIEnv *, jclass, jlong, jlong, jlong, jdouble, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: setMaxAllowedSpaceUsage + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileManager_setMaxAllowedSpaceUsage + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: setCompactionBufferSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileManager_setCompactionBufferSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: isMaxAllowedSpaceReached + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_SstFileManager_isMaxAllowedSpaceReached + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: isMaxAllowedSpaceReachedIncludingCompactions + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_SstFileManager_isMaxAllowedSpaceReachedIncludingCompactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: getTotalSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileManager_getTotalSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: getTrackedFiles + * Signature: (J)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_SstFileManager_getTrackedFiles + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: getDeleteRateBytesPerSecond + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileManager_getDeleteRateBytesPerSecond + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: setDeleteRateBytesPerSecond + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileManager_setDeleteRateBytesPerSecond + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: getMaxTrashDBRatio + * Signature: (J)D + */ +JNIEXPORT jdouble JNICALL Java_org_forstdb_SstFileManager_getMaxTrashDBRatio + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileManager + * Method: setMaxTrashDBRatio + * Signature: (JD)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileManager_setMaxTrashDBRatio + (JNIEnv *, jobject, jlong, jdouble); + +/* + * Class: org_forstdb_SstFileManager + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileManager_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_SstFileReader.h b/java/include/org_forstdb_SstFileReader.h new file mode 100644 index 000000000..688f87a4e --- /dev/null +++ b/java/include/org_forstdb_SstFileReader.h @@ -0,0 +1,61 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_SstFileReader */ + +#ifndef _Included_org_forstdb_SstFileReader +#define _Included_org_forstdb_SstFileReader +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_SstFileReader + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReader_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReader + * Method: newIterator + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileReader_newIterator + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_SstFileReader + * Method: open + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReader_open + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_SstFileReader + * Method: newSstFileReader + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileReader_newSstFileReader + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_SstFileReader + * Method: verifyChecksum + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReader_verifyChecksum + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReader + * Method: getTableProperties + * Signature: (J)Lorg/forstdb/TableProperties; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_SstFileReader_getTableProperties + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_SstFileReaderIterator.h b/java/include/org_forstdb_SstFileReaderIterator.h new file mode 100644 index 000000000..e8fde1efb --- /dev/null +++ b/java/include/org_forstdb_SstFileReaderIterator.h @@ -0,0 +1,173 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_SstFileReaderIterator */ + +#ifndef _Included_org_forstdb_SstFileReaderIterator +#define _Included_org_forstdb_SstFileReaderIterator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: isValid0 + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_SstFileReaderIterator_isValid0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekToFirst0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekToFirst0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekToLast0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekToLast0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: next0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_next0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: prev0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_prev0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: refresh0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_refresh0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seek0 + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seek0 + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekForPrev0 + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekForPrev0 + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: status0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_status0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekForPrevDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekForPrevDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekByteArray0 + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: seekForPrevByteArray0 + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileReaderIterator_seekForPrevByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: key0 + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_SstFileReaderIterator_key0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: value0 + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_SstFileReaderIterator_value0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: keyDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_SstFileReaderIterator_keyDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: keyByteArray0 + * Signature: (J[BII)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_SstFileReaderIterator_keyByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: valueDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_SstFileReaderIterator_valueDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_SstFileReaderIterator + * Method: valueByteArray0 + * Signature: (J[BII)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_SstFileReaderIterator_valueByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_SstFileWriter.h b/java/include/org_forstdb_SstFileWriter.h new file mode 100644 index 000000000..58af1dd58 --- /dev/null +++ b/java/include/org_forstdb_SstFileWriter.h @@ -0,0 +1,117 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_SstFileWriter */ + +#ifndef _Included_org_forstdb_SstFileWriter +#define _Included_org_forstdb_SstFileWriter +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_SstFileWriter + * Method: newSstFileWriter + * Signature: (JJJB)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileWriter_newSstFileWriter__JJJB + (JNIEnv *, jclass, jlong, jlong, jlong, jbyte); + +/* + * Class: org_forstdb_SstFileWriter + * Method: newSstFileWriter + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileWriter_newSstFileWriter__JJ + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_SstFileWriter + * Method: open + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_open + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_SstFileWriter + * Method: put + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_put__JJJ + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_SstFileWriter + * Method: put + * Signature: (J[B[B)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_put__J_3B_3B + (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray); + +/* + * Class: org_forstdb_SstFileWriter + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_putDirect + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint); + +/* + * Class: org_forstdb_SstFileWriter + * Method: fileSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstFileWriter_fileSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileWriter + * Method: merge + * Signature: (JJJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_merge__JJJ + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_SstFileWriter + * Method: merge + * Signature: (J[B[B)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_merge__J_3B_3B + (JNIEnv *, jobject, jlong, jbyteArray, jbyteArray); + +/* + * Class: org_forstdb_SstFileWriter + * Method: delete + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_delete__JJ + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_SstFileWriter + * Method: delete + * Signature: (J[B)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_delete__J_3B + (JNIEnv *, jobject, jlong, jbyteArray); + +/* + * Class: org_forstdb_SstFileWriter + * Method: finish + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_finish + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_SstFileWriter + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstFileWriter_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_SstPartitionerFixedPrefixFactory.h b/java/include/org_forstdb_SstPartitionerFixedPrefixFactory.h new file mode 100644 index 000000000..13b7db72e --- /dev/null +++ b/java/include/org_forstdb_SstPartitionerFixedPrefixFactory.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_SstPartitionerFixedPrefixFactory */ + +#ifndef _Included_org_forstdb_SstPartitionerFixedPrefixFactory +#define _Included_org_forstdb_SstPartitionerFixedPrefixFactory +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_SstPartitionerFixedPrefixFactory + * Method: newSstPartitionerFixedPrefixFactory0 + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_SstPartitionerFixedPrefixFactory_newSstPartitionerFixedPrefixFactory0 + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_SstPartitionerFixedPrefixFactory + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_SstPartitionerFixedPrefixFactory_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Statistics.h b/java/include/org_forstdb_Statistics.h new file mode 100644 index 000000000..de20acdc6 --- /dev/null +++ b/java/include/org_forstdb_Statistics.h @@ -0,0 +1,117 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Statistics */ + +#ifndef _Included_org_forstdb_Statistics +#define _Included_org_forstdb_Statistics +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Statistics + * Method: newStatistics + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Statistics_newStatistics__ + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_Statistics + * Method: newStatistics + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Statistics_newStatistics__J + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_Statistics + * Method: newStatistics + * Signature: ([B)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Statistics_newStatistics___3B + (JNIEnv *, jclass, jbyteArray); + +/* + * Class: org_forstdb_Statistics + * Method: newStatistics + * Signature: ([BJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Statistics_newStatistics___3BJ + (JNIEnv *, jclass, jbyteArray, jlong); + +/* + * Class: org_forstdb_Statistics + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Statistics_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Statistics + * Method: statsLevel + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Statistics_statsLevel + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Statistics + * Method: setStatsLevel + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Statistics_setStatsLevel + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Statistics + * Method: getTickerCount + * Signature: (JB)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Statistics_getTickerCount + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Statistics + * Method: getAndResetTickerCount + * Signature: (JB)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Statistics_getAndResetTickerCount + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Statistics + * Method: getHistogramData + * Signature: (JB)Lorg/forstdb/HistogramData; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_Statistics_getHistogramData + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Statistics + * Method: getHistogramString + * Signature: (JB)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Statistics_getHistogramString + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_Statistics + * Method: reset + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Statistics_reset + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Statistics + * Method: toString + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Statistics_toString + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_StringAppendOperator.h b/java/include/org_forstdb_StringAppendOperator.h new file mode 100644 index 000000000..b4a7fa77c --- /dev/null +++ b/java/include/org_forstdb_StringAppendOperator.h @@ -0,0 +1,37 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_StringAppendOperator */ + +#ifndef _Included_org_forstdb_StringAppendOperator +#define _Included_org_forstdb_StringAppendOperator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_StringAppendOperator + * Method: newSharedStringAppendOperator + * Signature: (C)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_StringAppendOperator_newSharedStringAppendOperator__C + (JNIEnv *, jclass, jchar); + +/* + * Class: org_forstdb_StringAppendOperator + * Method: newSharedStringAppendOperator + * Signature: (Ljava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_StringAppendOperator_newSharedStringAppendOperator__Ljava_lang_String_2 + (JNIEnv *, jclass, jstring); + +/* + * Class: org_forstdb_StringAppendOperator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_StringAppendOperator_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_ThreadStatus.h b/java/include/org_forstdb_ThreadStatus.h new file mode 100644 index 000000000..6c358e4e2 --- /dev/null +++ b/java/include/org_forstdb_ThreadStatus.h @@ -0,0 +1,69 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_ThreadStatus */ + +#ifndef _Included_org_forstdb_ThreadStatus +#define _Included_org_forstdb_ThreadStatus +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_ThreadStatus + * Method: getThreadTypeName + * Signature: (B)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ThreadStatus_getThreadTypeName + (JNIEnv *, jclass, jbyte); + +/* + * Class: org_forstdb_ThreadStatus + * Method: getOperationName + * Signature: (B)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ThreadStatus_getOperationName + (JNIEnv *, jclass, jbyte); + +/* + * Class: org_forstdb_ThreadStatus + * Method: microsToStringNative + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ThreadStatus_microsToStringNative + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_ThreadStatus + * Method: getOperationStageName + * Signature: (B)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ThreadStatus_getOperationStageName + (JNIEnv *, jclass, jbyte); + +/* + * Class: org_forstdb_ThreadStatus + * Method: getOperationPropertyName + * Signature: (BI)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ThreadStatus_getOperationPropertyName + (JNIEnv *, jclass, jbyte, jint); + +/* + * Class: org_forstdb_ThreadStatus + * Method: interpretOperationProperties + * Signature: (B[J)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_ThreadStatus_interpretOperationProperties + (JNIEnv *, jclass, jbyte, jlongArray); + +/* + * Class: org_forstdb_ThreadStatus + * Method: getStateName + * Signature: (B)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_ThreadStatus_getStateName + (JNIEnv *, jclass, jbyte); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_TimedEnv.h b/java/include/org_forstdb_TimedEnv.h new file mode 100644 index 000000000..9fbc7ae94 --- /dev/null +++ b/java/include/org_forstdb_TimedEnv.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_TimedEnv */ + +#ifndef _Included_org_forstdb_TimedEnv +#define _Included_org_forstdb_TimedEnv +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_TimedEnv + * Method: createTimedEnv + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TimedEnv_createTimedEnv + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_TimedEnv + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TimedEnv_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_Transaction.h b/java/include/org_forstdb_Transaction.h new file mode 100644 index 000000000..eeb9dc73e --- /dev/null +++ b/java/include/org_forstdb_Transaction.h @@ -0,0 +1,613 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_Transaction */ + +#ifndef _Included_org_forstdb_Transaction +#define _Included_org_forstdb_Transaction +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_Transaction + * Method: setSnapshot + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setSnapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setSnapshotOnNextOperation + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setSnapshotOnNextOperation__J + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setSnapshotOnNextOperation + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setSnapshotOnNextOperation__JJ + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getSnapshot + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getSnapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: clearSnapshot + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_clearSnapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: prepare + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_prepare + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: commit + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_commit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: rollback + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_rollback + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setSavePoint + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setSavePoint + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: rollbackToSavePoint + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_rollbackToSavePoint + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: get + * Signature: (JJ[BIIJ)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_Transaction_get__JJ_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: get + * Signature: (JJ[BII[BIIJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Transaction_get__JJ_3BII_3BIIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getDirect + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Transaction_getDirect + (JNIEnv *, jobject, jlong, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: multiGet + * Signature: (JJ[[B[J)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_Transaction_multiGet__JJ_3_3B_3J + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_Transaction + * Method: multiGet + * Signature: (JJ[[B)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_Transaction_multiGet__JJ_3_3B + (JNIEnv *, jobject, jlong, jlong, jobjectArray); + +/* + * Class: org_forstdb_Transaction + * Method: getForUpdate + * Signature: (JJ[BIIJZZ)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_Transaction_getForUpdate__JJ_3BIIJZZ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jlong, jboolean, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: getForUpdate + * Signature: (JJ[BII[BIIJZZ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Transaction_getForUpdate__JJ_3BII_3BIIJZZ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong, jboolean, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: getDirectForUpdate + * Signature: (JJLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZZ)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_Transaction_getDirectForUpdate + (JNIEnv *, jobject, jlong, jlong, jobject, jint, jint, jobject, jint, jint, jlong, jboolean, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: multiGetForUpdate + * Signature: (JJ[[B[J)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_Transaction_multiGetForUpdate__JJ_3_3B_3J + (JNIEnv *, jobject, jlong, jlong, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_Transaction + * Method: multiGetForUpdate + * Signature: (JJ[[B)[[B + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_Transaction_multiGetForUpdate__JJ_3_3B + (JNIEnv *, jobject, jlong, jlong, jobjectArray); + +/* + * Class: org_forstdb_Transaction + * Method: getIterator + * Signature: (JJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getIterator + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: put + * Signature: (J[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_put__J_3BII_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_Transaction + * Method: put + * Signature: (J[BII[BIIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_put__J_3BII_3BIIJZ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: put + * Signature: (J[[BI[[BIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_put__J_3_3BI_3_3BIJZ + (JNIEnv *, jobject, jlong, jobjectArray, jint, jobjectArray, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: put + * Signature: (J[[BI[[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_put__J_3_3BI_3_3BI + (JNIEnv *, jobject, jlong, jobjectArray, jint, jobjectArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint); + +/* + * Class: org_forstdb_Transaction + * Method: merge + * Signature: (J[BII[BIIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_merge__J_3BII_3BIIJZ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: mergeDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2IIJZ + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: mergeDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_mergeDirect__JLjava_nio_ByteBuffer_2IILjava_nio_ByteBuffer_2II + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint); + +/* + * Class: org_forstdb_Transaction + * Method: merge + * Signature: (J[BII[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_merge__J_3BII_3BII + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_Transaction + * Method: delete + * Signature: (J[BIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_delete__J_3BIJZ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: delete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_delete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: delete + * Signature: (J[[BIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_delete__J_3_3BIJZ + (JNIEnv *, jobject, jlong, jobjectArray, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: delete + * Signature: (J[[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_delete__J_3_3BI + (JNIEnv *, jobject, jlong, jobjectArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: singleDelete + * Signature: (J[BIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_singleDelete__J_3BIJZ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: singleDelete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_singleDelete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: singleDelete + * Signature: (J[[BIJZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_singleDelete__J_3_3BIJZ + (JNIEnv *, jobject, jlong, jobjectArray, jint, jlong, jboolean); + +/* + * Class: org_forstdb_Transaction + * Method: singleDelete + * Signature: (J[[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_singleDelete__J_3_3BI + (JNIEnv *, jobject, jlong, jobjectArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: putUntracked + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putUntracked__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: putUntracked + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putUntracked__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: putUntracked + * Signature: (J[[BI[[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putUntracked__J_3_3BI_3_3BIJ + (JNIEnv *, jobject, jlong, jobjectArray, jint, jobjectArray, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: putUntracked + * Signature: (J[[BI[[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putUntracked__J_3_3BI_3_3BI + (JNIEnv *, jobject, jlong, jobjectArray, jint, jobjectArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: mergeUntracked + * Signature: (J[BII[BIIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_mergeUntracked + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint, jbyteArray, jint, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: mergeUntrackedDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_mergeUntrackedDirect + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: deleteUntracked + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_deleteUntracked__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: deleteUntracked + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_deleteUntracked__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: deleteUntracked + * Signature: (J[[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_deleteUntracked__J_3_3BIJ + (JNIEnv *, jobject, jlong, jobjectArray, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: deleteUntracked + * Signature: (J[[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_deleteUntracked__J_3_3BI + (JNIEnv *, jobject, jlong, jobjectArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: putLogData + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_putLogData + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: disableIndexing + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_disableIndexing + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: enableIndexing + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_enableIndexing + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getNumKeys + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getNumKeys + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getNumPuts + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getNumPuts + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getNumDeletes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getNumDeletes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getNumMerges + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getNumMerges + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getElapsedTime + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getElapsedTime + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getWriteBatch + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getWriteBatch + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setLockTimeout + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setLockTimeout + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getWriteOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getWriteOptions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setWriteOptions + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setWriteOptions + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: undoGetForUpdate + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_undoGetForUpdate__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: undoGetForUpdate + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_undoGetForUpdate__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_Transaction + * Method: rebuildFromWriteBatch + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_rebuildFromWriteBatch + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getCommitTimeWriteBatch + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getCommitTimeWriteBatch + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setLogNumber + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setLogNumber + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getLogNumber + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getLogNumber + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: setName + * Signature: (JLjava/lang/String;)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_setName + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_Transaction + * Method: getName + * Signature: (J)Ljava/lang/String; + */ +JNIEXPORT jstring JNICALL Java_org_forstdb_Transaction_getName + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getID + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getID + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: isDeadlockDetect + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_Transaction_isDeadlockDetect + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getWaitingTxns + * Signature: (J)Lorg/forstdb/Transaction/WaitingTransactions; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_Transaction_getWaitingTxns + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getState + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_Transaction_getState + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: getId + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_Transaction_getId + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_Transaction + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_Transaction_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_TransactionDB.h b/java/include/org_forstdb_TransactionDB.h new file mode 100644 index 000000000..6e71740dd --- /dev/null +++ b/java/include/org_forstdb_TransactionDB.h @@ -0,0 +1,119 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_TransactionDB */ + +#ifndef _Included_org_forstdb_TransactionDB +#define _Included_org_forstdb_TransactionDB +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_TransactionDB_NOT_FOUND +#define org_forstdb_TransactionDB_NOT_FOUND -1L +/* + * Class: org_forstdb_TransactionDB + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDB_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: open + * Signature: (JJLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDB_open__JJLjava_lang_String_2 + (JNIEnv *, jclass, jlong, jlong, jstring); + +/* + * Class: org_forstdb_TransactionDB + * Method: open + * Signature: (JJLjava/lang/String;[[B[J)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_TransactionDB_open__JJLjava_lang_String_2_3_3B_3J + (JNIEnv *, jclass, jlong, jlong, jstring, jobjectArray, jlongArray); + +/* + * Class: org_forstdb_TransactionDB + * Method: closeDatabase + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDB_closeDatabase + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: beginTransaction + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDB_beginTransaction__JJ + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: beginTransaction + * Signature: (JJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDB_beginTransaction__JJJ + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: beginTransaction_withOld + * Signature: (JJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDB_beginTransaction_1withOld__JJJ + (JNIEnv *, jobject, jlong, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: beginTransaction_withOld + * Signature: (JJJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDB_beginTransaction_1withOld__JJJJ + (JNIEnv *, jobject, jlong, jlong, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: getTransactionByName + * Signature: (JLjava/lang/String;)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDB_getTransactionByName + (JNIEnv *, jobject, jlong, jstring); + +/* + * Class: org_forstdb_TransactionDB + * Method: getAllPreparedTransactions + * Signature: (J)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_TransactionDB_getAllPreparedTransactions + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: getLockStatusData + * Signature: (J)Ljava/util/Map; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_TransactionDB_getLockStatusData + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: getDeadlockInfoBuffer + * Signature: (J)[Lorg/forstdb/TransactionDB/DeadlockPath; + */ +JNIEXPORT jobjectArray JNICALL Java_org_forstdb_TransactionDB_getDeadlockInfoBuffer + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDB + * Method: setDeadlockInfoBufferSize + * Signature: (JI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDB_setDeadlockInfoBufferSize + (JNIEnv *, jobject, jlong, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_TransactionDBOptions.h b/java/include/org_forstdb_TransactionDBOptions.h new file mode 100644 index 000000000..2fd6def68 --- /dev/null +++ b/java/include/org_forstdb_TransactionDBOptions.h @@ -0,0 +1,109 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_TransactionDBOptions */ + +#ifndef _Included_org_forstdb_TransactionDBOptions +#define _Included_org_forstdb_TransactionDBOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_TransactionDBOptions + * Method: newTransactionDBOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDBOptions_newTransactionDBOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: getMaxNumLocks + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDBOptions_getMaxNumLocks + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: setMaxNumLocks + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDBOptions_setMaxNumLocks + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: getNumStripes + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDBOptions_getNumStripes + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: setNumStripes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDBOptions_setNumStripes + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: getTransactionLockTimeout + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDBOptions_getTransactionLockTimeout + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: setTransactionLockTimeout + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDBOptions_setTransactionLockTimeout + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: getDefaultLockTimeout + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionDBOptions_getDefaultLockTimeout + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: setDefaultLockTimeout + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDBOptions_setDefaultLockTimeout + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: getWritePolicy + * Signature: (J)B + */ +JNIEXPORT jbyte JNICALL Java_org_forstdb_TransactionDBOptions_getWritePolicy + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: setWritePolicy + * Signature: (JB)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDBOptions_setWritePolicy + (JNIEnv *, jobject, jlong, jbyte); + +/* + * Class: org_forstdb_TransactionDBOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionDBOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_TransactionLogIterator.h b/java/include/org_forstdb_TransactionLogIterator.h new file mode 100644 index 000000000..ee8c79d99 --- /dev/null +++ b/java/include/org_forstdb_TransactionLogIterator.h @@ -0,0 +1,53 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_TransactionLogIterator */ + +#ifndef _Included_org_forstdb_TransactionLogIterator +#define _Included_org_forstdb_TransactionLogIterator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_TransactionLogIterator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionLogIterator_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionLogIterator + * Method: isValid + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_TransactionLogIterator_isValid + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionLogIterator + * Method: next + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionLogIterator_next + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionLogIterator + * Method: status + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionLogIterator_status + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionLogIterator + * Method: getBatch + * Signature: (J)Lorg/forstdb/TransactionLogIterator/BatchResult; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_TransactionLogIterator_getBatch + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_TransactionOptions.h b/java/include/org_forstdb_TransactionOptions.h new file mode 100644 index 000000000..673a41c5f --- /dev/null +++ b/java/include/org_forstdb_TransactionOptions.h @@ -0,0 +1,125 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_TransactionOptions */ + +#ifndef _Included_org_forstdb_TransactionOptions +#define _Included_org_forstdb_TransactionOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_TransactionOptions + * Method: newTransactionOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionOptions_newTransactionOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_TransactionOptions + * Method: isSetSnapshot + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_TransactionOptions_isSetSnapshot + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: setSetSnapshot + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_setSetSnapshot + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_TransactionOptions + * Method: isDeadlockDetect + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_TransactionOptions_isDeadlockDetect + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: setDeadlockDetect + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_setDeadlockDetect + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_TransactionOptions + * Method: getLockTimeout + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionOptions_getLockTimeout + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: setLockTimeout + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_setLockTimeout + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: getExpiration + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionOptions_getExpiration + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: setExpiration + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_setExpiration + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: getDeadlockDetectDepth + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionOptions_getDeadlockDetectDepth + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: setDeadlockDetectDepth + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_setDeadlockDetectDepth + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: getMaxWriteBatchSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TransactionOptions_getMaxWriteBatchSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: setMaxWriteBatchSize + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_setMaxWriteBatchSize + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_TransactionOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TransactionOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_TtlDB.h b/java/include/org_forstdb_TtlDB.h new file mode 100644 index 000000000..9f77960ed --- /dev/null +++ b/java/include/org_forstdb_TtlDB.h @@ -0,0 +1,55 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_TtlDB */ + +#ifndef _Included_org_forstdb_TtlDB +#define _Included_org_forstdb_TtlDB +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_TtlDB_NOT_FOUND +#define org_forstdb_TtlDB_NOT_FOUND -1L +/* + * Class: org_forstdb_TtlDB + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TtlDB_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_TtlDB + * Method: open + * Signature: (JLjava/lang/String;IZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TtlDB_open + (JNIEnv *, jclass, jlong, jstring, jint, jboolean); + +/* + * Class: org_forstdb_TtlDB + * Method: openCF + * Signature: (JLjava/lang/String;[[B[J[IZ)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_TtlDB_openCF + (JNIEnv *, jclass, jlong, jstring, jobjectArray, jlongArray, jintArray, jboolean); + +/* + * Class: org_forstdb_TtlDB + * Method: createColumnFamilyWithTtl + * Signature: (J[BJI)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_TtlDB_createColumnFamilyWithTtl + (JNIEnv *, jobject, jlong, jbyteArray, jlong, jint); + +/* + * Class: org_forstdb_TtlDB + * Method: closeDatabase + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_TtlDB_closeDatabase + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_UInt64AddOperator.h b/java/include/org_forstdb_UInt64AddOperator.h new file mode 100644 index 000000000..930b61362 --- /dev/null +++ b/java/include/org_forstdb_UInt64AddOperator.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_UInt64AddOperator */ + +#ifndef _Included_org_forstdb_UInt64AddOperator +#define _Included_org_forstdb_UInt64AddOperator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_UInt64AddOperator + * Method: newSharedUInt64AddOperator + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_UInt64AddOperator_newSharedUInt64AddOperator + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_UInt64AddOperator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_UInt64AddOperator_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_VectorMemTableConfig.h b/java/include/org_forstdb_VectorMemTableConfig.h new file mode 100644 index 000000000..b25ed0fbb --- /dev/null +++ b/java/include/org_forstdb_VectorMemTableConfig.h @@ -0,0 +1,23 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_VectorMemTableConfig */ + +#ifndef _Included_org_forstdb_VectorMemTableConfig +#define _Included_org_forstdb_VectorMemTableConfig +#ifdef __cplusplus +extern "C" { +#endif +#undef org_forstdb_VectorMemTableConfig_DEFAULT_RESERVED_SIZE +#define org_forstdb_VectorMemTableConfig_DEFAULT_RESERVED_SIZE 0L +/* + * Class: org_forstdb_VectorMemTableConfig + * Method: newMemTableFactoryHandle + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_VectorMemTableConfig_newMemTableFactoryHandle + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WBWIRocksIterator.h b/java/include/org_forstdb_WBWIRocksIterator.h new file mode 100644 index 000000000..d42e5b6b8 --- /dev/null +++ b/java/include/org_forstdb_WBWIRocksIterator.h @@ -0,0 +1,133 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WBWIRocksIterator */ + +#ifndef _Included_org_forstdb_WBWIRocksIterator +#define _Included_org_forstdb_WBWIRocksIterator +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: isValid0 + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WBWIRocksIterator_isValid0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekToFirst0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekToFirst0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekToLast0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekToLast0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: next0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_next0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: prev0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_prev0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: refresh0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_refresh0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seek0 + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seek0 + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekForPrev0 + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekForPrev0 + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: status0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_status0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekForPrevDirect0 + * Signature: (JLjava/nio/ByteBuffer;II)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekForPrevDirect0 + (JNIEnv *, jobject, jlong, jobject, jint, jint); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekByteArray0 + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: seekForPrevByteArray0 + * Signature: (J[BII)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WBWIRocksIterator_seekForPrevByteArray0 + (JNIEnv *, jobject, jlong, jbyteArray, jint, jint); + +/* + * Class: org_forstdb_WBWIRocksIterator + * Method: entry1 + * Signature: (J)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_forstdb_WBWIRocksIterator_entry1 + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteBatch.h b/java/include/org_forstdb_WriteBatch.h new file mode 100644 index 000000000..b485ce83a --- /dev/null +++ b/java/include/org_forstdb_WriteBatch.h @@ -0,0 +1,301 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteBatch */ + +#ifndef _Included_org_forstdb_WriteBatch +#define _Included_org_forstdb_WriteBatch +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteBatch + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: count0 + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_WriteBatch_count0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: put + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_put__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: put + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_put__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_putDirect + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: merge + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_merge__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: merge + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_merge__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: delete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_delete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: delete + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_delete__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: singleDelete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_singleDelete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: singleDelete + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_singleDelete__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: deleteDirect + * Signature: (JLjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_deleteDirect + (JNIEnv *, jobject, jlong, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: deleteRange + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_deleteRange__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: deleteRange + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_deleteRange__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: putLogData + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_putLogData + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: clear0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_clear0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: setSavePoint0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_setSavePoint0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: rollbackToSavePoint0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_rollbackToSavePoint0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: popSavePoint + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_popSavePoint + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: setMaxBytes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_setMaxBytes + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: newWriteBatch + * Signature: (I)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatch_newWriteBatch__I + (JNIEnv *, jclass, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: newWriteBatch + * Signature: ([BI)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatch_newWriteBatch___3BI + (JNIEnv *, jclass, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatch + * Method: iterate + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_iterate + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: data + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_WriteBatch_data + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: getDataSize + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatch_getDataSize + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasPut + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasPut + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasDelete + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasDelete + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasSingleDelete + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasSingleDelete + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasDeleteRange + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasDeleteRange + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasMerge + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasMerge + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasBeginPrepare + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasBeginPrepare + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasEndPrepare + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasEndPrepare + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasCommit + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasCommit + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: hasRollback + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteBatch_hasRollback + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: markWalTerminationPoint + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatch_markWalTerminationPoint + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatch + * Method: getWalTerminationPoint + * Signature: (J)Lorg/forstdb/WriteBatch/SavePoint; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_WriteBatch_getWalTerminationPoint + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteBatchTest.h b/java/include/org_forstdb_WriteBatchTest.h new file mode 100644 index 000000000..2bb6651d4 --- /dev/null +++ b/java/include/org_forstdb_WriteBatchTest.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteBatchTest */ + +#ifndef _Included_org_forstdb_WriteBatchTest +#define _Included_org_forstdb_WriteBatchTest +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteBatchTest + * Method: getContents + * Signature: (J)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_WriteBatchTest_getContents + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteBatchTestInternalHelper.h b/java/include/org_forstdb_WriteBatchTestInternalHelper.h new file mode 100644 index 000000000..15d6e041f --- /dev/null +++ b/java/include/org_forstdb_WriteBatchTestInternalHelper.h @@ -0,0 +1,37 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteBatchTestInternalHelper */ + +#ifndef _Included_org_forstdb_WriteBatchTestInternalHelper +#define _Included_org_forstdb_WriteBatchTestInternalHelper +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteBatchTestInternalHelper + * Method: setSequence + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchTestInternalHelper_setSequence + (JNIEnv *, jclass, jlong, jlong); + +/* + * Class: org_forstdb_WriteBatchTestInternalHelper + * Method: sequence + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchTestInternalHelper_sequence + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_WriteBatchTestInternalHelper + * Method: append + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchTestInternalHelper_append + (JNIEnv *, jclass, jlong, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteBatchWithIndex.h b/java/include/org_forstdb_WriteBatchWithIndex.h new file mode 100644 index 000000000..a39427580 --- /dev/null +++ b/java/include/org_forstdb_WriteBatchWithIndex.h @@ -0,0 +1,261 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteBatchWithIndex */ + +#ifndef _Included_org_forstdb_WriteBatchWithIndex +#define _Included_org_forstdb_WriteBatchWithIndex +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: count0 + * Signature: (J)I + */ +JNIEXPORT jint JNICALL Java_org_forstdb_WriteBatchWithIndex_count0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: put + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_put__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: put + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_put__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: putDirect + * Signature: (JLjava/nio/ByteBuffer;IILjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_putDirect + (JNIEnv *, jobject, jlong, jobject, jint, jint, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: merge + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_merge__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: merge + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_merge__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: delete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_delete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: delete + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_delete__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: singleDelete + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_singleDelete__J_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: singleDelete + * Signature: (J[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_singleDelete__J_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: deleteDirect + * Signature: (JLjava/nio/ByteBuffer;IIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_deleteDirect + (JNIEnv *, jobject, jlong, jobject, jint, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: deleteRange + * Signature: (J[BI[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_deleteRange__J_3BI_3BI + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: deleteRange + * Signature: (J[BI[BIJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_deleteRange__J_3BI_3BIJ + (JNIEnv *, jobject, jlong, jbyteArray, jint, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: putLogData + * Signature: (J[BI)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_putLogData + (JNIEnv *, jobject, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: clear0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_clear0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: setSavePoint0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_setSavePoint0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: rollbackToSavePoint0 + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_rollbackToSavePoint0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: popSavePoint + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_popSavePoint + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: setMaxBytes + * Signature: (JJ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBatchWithIndex_setMaxBytes + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: getWriteBatch + * Signature: (J)Lorg/forstdb/WriteBatch; + */ +JNIEXPORT jobject JNICALL Java_org_forstdb_WriteBatchWithIndex_getWriteBatch + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: newWriteBatchWithIndex + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchWithIndex_newWriteBatchWithIndex__ + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: newWriteBatchWithIndex + * Signature: (Z)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchWithIndex_newWriteBatchWithIndex__Z + (JNIEnv *, jclass, jboolean); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: newWriteBatchWithIndex + * Signature: (JBIZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchWithIndex_newWriteBatchWithIndex__JBIZ + (JNIEnv *, jclass, jlong, jbyte, jint, jboolean); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: iterator0 + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchWithIndex_iterator0 + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: iterator1 + * Signature: (JJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchWithIndex_iterator1 + (JNIEnv *, jobject, jlong, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: iteratorWithBase + * Signature: (JJJJ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatchWithIndex_iteratorWithBase + (JNIEnv *, jobject, jlong, jlong, jlong, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: getFromBatch + * Signature: (JJ[BI)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_WriteBatchWithIndex_getFromBatch__JJ_3BI + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: getFromBatch + * Signature: (JJ[BIJ)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_WriteBatchWithIndex_getFromBatch__JJ_3BIJ + (JNIEnv *, jobject, jlong, jlong, jbyteArray, jint, jlong); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: getFromBatchAndDB + * Signature: (JJJ[BI)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BI + (JNIEnv *, jobject, jlong, jlong, jlong, jbyteArray, jint); + +/* + * Class: org_forstdb_WriteBatchWithIndex + * Method: getFromBatchAndDB + * Signature: (JJJ[BIJ)[B + */ +JNIEXPORT jbyteArray JNICALL Java_org_forstdb_WriteBatchWithIndex_getFromBatchAndDB__JJJ_3BIJ + (JNIEnv *, jobject, jlong, jlong, jlong, jbyteArray, jint, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteBatch_Handler.h b/java/include/org_forstdb_WriteBatch_Handler.h new file mode 100644 index 000000000..1015031f2 --- /dev/null +++ b/java/include/org_forstdb_WriteBatch_Handler.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteBatch_Handler */ + +#ifndef _Included_org_forstdb_WriteBatch_Handler +#define _Included_org_forstdb_WriteBatch_Handler +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteBatch_Handler + * Method: createNewHandler0 + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBatch_00024Handler_createNewHandler0 + (JNIEnv *, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteBufferManager.h b/java/include/org_forstdb_WriteBufferManager.h new file mode 100644 index 000000000..0af6a74bd --- /dev/null +++ b/java/include/org_forstdb_WriteBufferManager.h @@ -0,0 +1,29 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteBufferManager */ + +#ifndef _Included_org_forstdb_WriteBufferManager +#define _Included_org_forstdb_WriteBufferManager +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteBufferManager + * Method: newWriteBufferManager + * Signature: (JJZ)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteBufferManager_newWriteBufferManager + (JNIEnv *, jclass, jlong, jlong, jboolean); + +/* + * Class: org_forstdb_WriteBufferManager + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteBufferManager_disposeInternal + (JNIEnv *, jobject, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_WriteOptions.h b/java/include/org_forstdb_WriteOptions.h new file mode 100644 index 000000000..01ecfa9df --- /dev/null +++ b/java/include/org_forstdb_WriteOptions.h @@ -0,0 +1,133 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_WriteOptions */ + +#ifndef _Included_org_forstdb_WriteOptions +#define _Included_org_forstdb_WriteOptions +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_WriteOptions + * Method: newWriteOptions + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteOptions_newWriteOptions + (JNIEnv *, jclass); + +/* + * Class: org_forstdb_WriteOptions + * Method: copyWriteOptions + * Signature: (J)J + */ +JNIEXPORT jlong JNICALL Java_org_forstdb_WriteOptions_copyWriteOptions + (JNIEnv *, jclass, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: disposeInternal + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_disposeInternal + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: setSync + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_setSync + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_WriteOptions + * Method: sync + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteOptions_sync + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: setDisableWAL + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_setDisableWAL + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_WriteOptions + * Method: disableWAL + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteOptions_disableWAL + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: setIgnoreMissingColumnFamilies + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_setIgnoreMissingColumnFamilies + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_WriteOptions + * Method: ignoreMissingColumnFamilies + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteOptions_ignoreMissingColumnFamilies + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: setNoSlowdown + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_setNoSlowdown + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_WriteOptions + * Method: noSlowdown + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteOptions_noSlowdown + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: setLowPri + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_setLowPri + (JNIEnv *, jobject, jlong, jboolean); + +/* + * Class: org_forstdb_WriteOptions + * Method: lowPri + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteOptions_lowPri + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: memtableInsertHintPerBatch + * Signature: (J)Z + */ +JNIEXPORT jboolean JNICALL Java_org_forstdb_WriteOptions_memtableInsertHintPerBatch + (JNIEnv *, jobject, jlong); + +/* + * Class: org_forstdb_WriteOptions + * Method: setMemtableInsertHintPerBatch + * Signature: (JZ)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_WriteOptions_setMemtableInsertHintPerBatch + (JNIEnv *, jobject, jlong, jboolean); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/include/org_forstdb_test_TestableEventListener.h b/java/include/org_forstdb_test_TestableEventListener.h new file mode 100644 index 000000000..4e9d36df5 --- /dev/null +++ b/java/include/org_forstdb_test_TestableEventListener.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_forstdb_test_TestableEventListener */ + +#ifndef _Included_org_forstdb_test_TestableEventListener +#define _Included_org_forstdb_test_TestableEventListener +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_forstdb_test_TestableEventListener + * Method: invokeAllCallbacks + * Signature: (J)V + */ +JNIEXPORT void JNICALL Java_org_forstdb_test_TestableEventListener_invokeAllCallbacks + (JNIEnv *, jclass, jlong); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/java/jmh/pom.xml b/java/jmh/pom.xml index 3016aefa7..6c606f6bd 100644 --- a/java/jmh/pom.xml +++ b/java/jmh/pom.xml @@ -4,7 +4,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.rocksdb + org.forstdb rocksdbjni-jmh 1.0-SNAPSHOT @@ -48,7 +48,7 @@ - org.rocksdb + org.forstdb rocksdbjni 7.9.0-SNAPSHOT diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java index 1973b5487..e0c9a437c 100644 --- a/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java +++ b/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java @@ -4,20 +4,20 @@ * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ -package org.rocksdb.jmh; +package org.forstdb.jmh; import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.BytewiseComparator; -import org.rocksdb.util.FileUtils; -import org.rocksdb.util.ReverseBytewiseComparator; +import org.forstdb.*; +import org.forstdb.util.BytewiseComparator; +import org.forstdb.util.FileUtils; +import org.forstdb.util.ReverseBytewiseComparator; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.concurrent.atomic.AtomicInteger; -import static org.rocksdb.util.KVUtils.ba; +import static org.forstdb.util.KVUtils.ba; @State(Scope.Benchmark) public class ComparatorBenchmarks { diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java index 1c4329b3a..6155585e2 100644 --- a/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java +++ b/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java @@ -4,9 +4,9 @@ * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ -package org.rocksdb.jmh; +package org.forstdb.jmh; -import static org.rocksdb.util.KVUtils.ba; +import static org.forstdb.util.KVUtils.ba; import java.io.IOException; import java.nio.ByteBuffer; @@ -17,8 +17,8 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; +import org.forstdb.*; +import org.forstdb.util.FileUtils; @State(Scope.Benchmark) public class GetBenchmarks { diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java index d37447716..933906cde 100644 --- a/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java +++ b/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java @@ -4,10 +4,10 @@ * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ -package org.rocksdb.jmh; +package org.forstdb.jmh; -import static org.rocksdb.util.KVUtils.ba; -import static org.rocksdb.util.KVUtils.keys; +import static org.forstdb.util.KVUtils.ba; +import static org.forstdb.util.KVUtils.keys; import java.io.IOException; import java.nio.ByteBuffer; @@ -21,8 +21,8 @@ import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.OptionsBuilder; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; +import org.forstdb.*; +import org.forstdb.util.FileUtils; @State(Scope.Thread) public class MultiGetBenchmarks { diff --git a/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java b/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java index cf82401c1..705e57fb8 100644 --- a/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java +++ b/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java @@ -4,9 +4,9 @@ * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ -package org.rocksdb.jmh; +package org.forstdb.jmh; -import static org.rocksdb.util.KVUtils.ba; +import static org.forstdb.util.KVUtils.ba; import java.io.IOException; import java.nio.ByteBuffer; @@ -17,8 +17,8 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.openjdk.jmh.annotations.*; -import org.rocksdb.*; -import org.rocksdb.util.FileUtils; +import org.forstdb.*; +import org.forstdb.util.FileUtils; @State(Scope.Benchmark) public class PutBenchmarks { diff --git a/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java b/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java index 63744a14f..6c66f0c13 100644 --- a/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java +++ b/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java @@ -4,7 +4,7 @@ * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ -package org.rocksdb.util; +package org.forstdb.util; import java.io.IOException; import java.nio.file.FileVisitResult; diff --git a/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java b/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java index 5077291c8..a419ba78c 100644 --- a/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java +++ b/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java @@ -4,7 +4,7 @@ * COPYING file in the root directory) and Apache 2.0 License * (found in the LICENSE.Apache file in the root directory). */ -package org.rocksdb.util; +package org.forstdb.util; import static java.nio.charset.StandardCharsets.UTF_8; diff --git a/java/samples/src/main/java/OptimisticTransactionSample.java b/java/samples/src/main/java/OptimisticTransactionSample.java index 7e7a22e94..63c09d23b 100644 --- a/java/samples/src/main/java/OptimisticTransactionSample.java +++ b/java/samples/src/main/java/OptimisticTransactionSample.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -import org.rocksdb.*; +import org.forstdb.*; import static java.nio.charset.StandardCharsets.UTF_8; diff --git a/java/samples/src/main/java/RocksDBColumnFamilySample.java b/java/samples/src/main/java/RocksDBColumnFamilySample.java index 72f5731a1..8aaa8a793 100644 --- a/java/samples/src/main/java/RocksDBColumnFamilySample.java +++ b/java/samples/src/main/java/RocksDBColumnFamilySample.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -import org.rocksdb.*; +import org.forstdb.*; import java.util.ArrayList; import java.util.List; diff --git a/java/samples/src/main/java/RocksDBSample.java b/java/samples/src/main/java/RocksDBSample.java index 8ab9b2de3..3f8960093 100644 --- a/java/samples/src/main/java/RocksDBSample.java +++ b/java/samples/src/main/java/RocksDBSample.java @@ -9,8 +9,8 @@ import java.util.Map; import java.util.ArrayList; -import org.rocksdb.*; -import org.rocksdb.util.SizeUnit; +import org.forstdb.*; +import org.forstdb.util.SizeUnit; public class RocksDBSample { static { diff --git a/java/samples/src/main/java/TransactionSample.java b/java/samples/src/main/java/TransactionSample.java index b88a68f12..81102d972 100644 --- a/java/samples/src/main/java/TransactionSample.java +++ b/java/samples/src/main/java/TransactionSample.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -import org.rocksdb.*; +import org.forstdb.*; import static java.nio.charset.StandardCharsets.UTF_8; diff --git a/java/spotbugs-exclude.xml b/java/spotbugs-exclude.xml index bc3d5ea9a..0c8d44929 100644 --- a/java/spotbugs-exclude.xml +++ b/java/spotbugs-exclude.xml @@ -3,136 +3,136 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -146,6 +146,6 @@ - + \ No newline at end of file diff --git a/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java b/java/src/main/java/org/forstdb/AbstractCompactionFilter.java similarity index 98% rename from java/src/main/java/org/rocksdb/AbstractCompactionFilter.java rename to java/src/main/java/org/forstdb/AbstractCompactionFilter.java index fd7eef4d4..fc401252e 100644 --- a/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java +++ b/java/src/main/java/org/forstdb/AbstractCompactionFilter.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * A CompactionFilter allows an application to modify/delete a key-value at diff --git a/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java b/java/src/main/java/org/forstdb/AbstractCompactionFilterFactory.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java rename to java/src/main/java/org/forstdb/AbstractCompactionFilterFactory.java index 728cda8c1..0fbfcb839 100644 --- a/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java +++ b/java/src/main/java/org/forstdb/AbstractCompactionFilterFactory.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Each compaction will create a new {@link AbstractCompactionFilter} diff --git a/java/src/main/java/org/rocksdb/AbstractComparator.java b/java/src/main/java/org/forstdb/AbstractComparator.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractComparator.java rename to java/src/main/java/org/forstdb/AbstractComparator.java index 83e0f0676..f66a663ce 100644 --- a/java/src/main/java/org/rocksdb/AbstractComparator.java +++ b/java/src/main/java/org/forstdb/AbstractComparator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java b/java/src/main/java/org/forstdb/AbstractComparatorJniBridge.java similarity index 98% rename from java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java rename to java/src/main/java/org/forstdb/AbstractComparatorJniBridge.java index d0ceef93d..c7e1fa1df 100644 --- a/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java +++ b/java/src/main/java/org/forstdb/AbstractComparatorJniBridge.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -15,7 +15,7 @@ * * Placing these bridge methods in this * class keeps the API of the - * {@link org.rocksdb.AbstractComparator} clean. + * {@link org.forstdb.AbstractComparator} clean. */ class AbstractComparatorJniBridge { /** diff --git a/java/src/main/java/org/rocksdb/AbstractEventListener.java b/java/src/main/java/org/forstdb/AbstractEventListener.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractEventListener.java rename to java/src/main/java/org/forstdb/AbstractEventListener.java index c9371c45e..d03a599be 100644 --- a/java/src/main/java/org/rocksdb/AbstractEventListener.java +++ b/java/src/main/java/org/forstdb/AbstractEventListener.java @@ -3,9 +3,9 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; -import static org.rocksdb.AbstractEventListener.EnabledEventCallback.*; +import static org.forstdb.AbstractEventListener.EnabledEventCallback.*; /** * Base class for Event Listeners. diff --git a/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java b/java/src/main/java/org/forstdb/AbstractImmutableNativeReference.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java rename to java/src/main/java/org/forstdb/AbstractImmutableNativeReference.java index 173d63e90..7e667bbea 100644 --- a/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java +++ b/java/src/main/java/org/forstdb/AbstractImmutableNativeReference.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.concurrent.atomic.AtomicBoolean; diff --git a/java/src/main/java/org/rocksdb/AbstractMutableOptions.java b/java/src/main/java/org/forstdb/AbstractMutableOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractMutableOptions.java rename to java/src/main/java/org/forstdb/AbstractMutableOptions.java index ff9b8569f..802ca7c81 100644 --- a/java/src/main/java/org/rocksdb/AbstractMutableOptions.java +++ b/java/src/main/java/org/forstdb/AbstractMutableOptions.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import java.util.*; diff --git a/java/src/main/java/org/rocksdb/AbstractNativeReference.java b/java/src/main/java/org/forstdb/AbstractNativeReference.java similarity index 98% rename from java/src/main/java/org/rocksdb/AbstractNativeReference.java rename to java/src/main/java/org/forstdb/AbstractNativeReference.java index 1ce54fcba..b0cc585d1 100644 --- a/java/src/main/java/org/rocksdb/AbstractNativeReference.java +++ b/java/src/main/java/org/forstdb/AbstractNativeReference.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * AbstractNativeReference is the base-class of all RocksDB classes that have diff --git a/java/src/main/java/org/rocksdb/AbstractRocksIterator.java b/java/src/main/java/org/forstdb/AbstractRocksIterator.java similarity index 98% rename from java/src/main/java/org/rocksdb/AbstractRocksIterator.java rename to java/src/main/java/org/forstdb/AbstractRocksIterator.java index 1aade1b89..a68b63157 100644 --- a/java/src/main/java/org/rocksdb/AbstractRocksIterator.java +++ b/java/src/main/java/org/forstdb/AbstractRocksIterator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -19,7 +19,7 @@ * @param

    The type of the Parent Object from which the Rocks Iterator was * created. This is used by disposeInternal to avoid double-free * issues with the underlying C++ object. - * @see org.rocksdb.RocksObject + * @see org.forstdb.RocksObject */ public abstract class AbstractRocksIterator

    extends RocksObject implements RocksIteratorInterface { diff --git a/java/src/main/java/org/rocksdb/AbstractSlice.java b/java/src/main/java/org/forstdb/AbstractSlice.java similarity index 93% rename from java/src/main/java/org/rocksdb/AbstractSlice.java rename to java/src/main/java/org/forstdb/AbstractSlice.java index f321b9910..afc6b3e70 100644 --- a/java/src/main/java/org/rocksdb/AbstractSlice.java +++ b/java/src/main/java/org/forstdb/AbstractSlice.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Slices are used by RocksDB to provide @@ -11,18 +11,18 @@ *

    * This class is package private, implementers * should extend either of the public abstract classes: - * @see org.rocksdb.Slice - * @see org.rocksdb.DirectSlice + * @see org.forstdb.Slice + * @see org.forstdb.DirectSlice * * Regards the lifecycle of Java Slices in RocksDB: * At present when you configure a Comparator from Java, it creates an * instance of a C++ BaseComparatorJniCallback subclass and * passes that to RocksDB as the comparator. That subclass of * BaseComparatorJniCallback creates the Java - * @see org.rocksdb.AbstractSlice subclass Objects. When you dispose - * the Java @see org.rocksdb.AbstractComparator subclass, it disposes the + * @see org.forstdb.AbstractSlice subclass Objects. When you dispose + * the Java @see org.forstdb.AbstractComparator subclass, it disposes the * C++ BaseComparatorJniCallback subclass, which in turn destroys the - * Java @see org.rocksdb.AbstractSlice subclass Objects. + * Java @see org.forstdb.AbstractSlice subclass Objects. */ public abstract class AbstractSlice extends RocksMutableObject { @@ -39,7 +39,7 @@ protected AbstractSlice(final long nativeHandle) { * * @return The slice data. Note, the type of access is * determined by the subclass - * @see org.rocksdb.AbstractSlice#data0(long) + * @see org.forstdb.AbstractSlice#data0(long) */ public T data() { return data0(getNativeHandle()); diff --git a/java/src/main/java/org/rocksdb/AbstractTableFilter.java b/java/src/main/java/org/forstdb/AbstractTableFilter.java similarity index 95% rename from java/src/main/java/org/rocksdb/AbstractTableFilter.java rename to java/src/main/java/org/forstdb/AbstractTableFilter.java index c696c3e13..d83a9839f 100644 --- a/java/src/main/java/org/rocksdb/AbstractTableFilter.java +++ b/java/src/main/java/org/forstdb/AbstractTableFilter.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * Base class for Table Filters. diff --git a/java/src/main/java/org/rocksdb/AbstractTraceWriter.java b/java/src/main/java/org/forstdb/AbstractTraceWriter.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractTraceWriter.java rename to java/src/main/java/org/forstdb/AbstractTraceWriter.java index e235c9296..aee94cf94 100644 --- a/java/src/main/java/org/rocksdb/AbstractTraceWriter.java +++ b/java/src/main/java/org/forstdb/AbstractTraceWriter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Base class for TraceWriters. diff --git a/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java b/java/src/main/java/org/forstdb/AbstractTransactionNotifier.java similarity index 98% rename from java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java rename to java/src/main/java/org/forstdb/AbstractTransactionNotifier.java index b117e5cc2..d0c98eab2 100644 --- a/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java +++ b/java/src/main/java/org/forstdb/AbstractTransactionNotifier.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Provides notification to the caller of SetSnapshotOnNextOperation when diff --git a/java/src/main/java/org/rocksdb/AbstractWalFilter.java b/java/src/main/java/org/forstdb/AbstractWalFilter.java similarity index 98% rename from java/src/main/java/org/rocksdb/AbstractWalFilter.java rename to java/src/main/java/org/forstdb/AbstractWalFilter.java index 92180f90e..2a1cb9095 100644 --- a/java/src/main/java/org/rocksdb/AbstractWalFilter.java +++ b/java/src/main/java/org/forstdb/AbstractWalFilter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Base class for WAL Filters. diff --git a/java/src/main/java/org/rocksdb/AbstractWriteBatch.java b/java/src/main/java/org/forstdb/AbstractWriteBatch.java similarity index 99% rename from java/src/main/java/org/rocksdb/AbstractWriteBatch.java rename to java/src/main/java/org/forstdb/AbstractWriteBatch.java index 41d967f53..2bb2ec324 100644 --- a/java/src/main/java/org/rocksdb/AbstractWriteBatch.java +++ b/java/src/main/java/org/forstdb/AbstractWriteBatch.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/AccessHint.java b/java/src/main/java/org/forstdb/AccessHint.java similarity index 98% rename from java/src/main/java/org/rocksdb/AccessHint.java rename to java/src/main/java/org/forstdb/AccessHint.java index b7ccadd84..a70968f7c 100644 --- a/java/src/main/java/org/rocksdb/AccessHint.java +++ b/java/src/main/java/org/forstdb/AccessHint.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * File access pattern once a compaction has started diff --git a/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java b/java/src/main/java/org/forstdb/AdvancedColumnFamilyOptionsInterface.java similarity index 98% rename from java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java rename to java/src/main/java/org/forstdb/AdvancedColumnFamilyOptionsInterface.java index d1d1123dd..27c5f9f47 100644 --- a/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/forstdb/AdvancedColumnFamilyOptionsInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; @@ -192,7 +192,7 @@ T setInplaceUpdateSupport( *

    Default: empty

    * * @param compressionLevels list of - * {@link org.rocksdb.CompressionType} instances. + * {@link org.forstdb.CompressionType} instances. * * @return the reference to the current options. */ @@ -200,12 +200,12 @@ T setCompressionPerLevel( List compressionLevels); /** - *

    Return the currently set {@link org.rocksdb.CompressionType} + *

    Return the currently set {@link org.forstdb.CompressionType} * per instances.

    * *

    See: {@link #setCompressionPerLevel(java.util.List)}

    * - * @return list of {@link org.rocksdb.CompressionType} + * @return list of {@link org.forstdb.CompressionType} * instances. */ List compressionPerLevel(); diff --git a/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java b/java/src/main/java/org/forstdb/AdvancedMutableColumnFamilyOptionsInterface.java similarity index 99% rename from java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java rename to java/src/main/java/org/forstdb/AdvancedMutableColumnFamilyOptionsInterface.java index c8fc84173..1b6717e19 100644 --- a/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/forstdb/AdvancedMutableColumnFamilyOptionsInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Advanced Column Family Options which are mutable diff --git a/java/src/main/java/org/rocksdb/BackgroundErrorReason.java b/java/src/main/java/org/forstdb/BackgroundErrorReason.java similarity index 98% rename from java/src/main/java/org/rocksdb/BackgroundErrorReason.java rename to java/src/main/java/org/forstdb/BackgroundErrorReason.java index eec593d35..11d6431af 100644 --- a/java/src/main/java/org/rocksdb/BackgroundErrorReason.java +++ b/java/src/main/java/org/forstdb/BackgroundErrorReason.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum BackgroundErrorReason { FLUSH((byte) 0x0), diff --git a/java/src/main/java/org/rocksdb/BackupEngine.java b/java/src/main/java/org/forstdb/BackupEngine.java similarity index 99% rename from java/src/main/java/org/rocksdb/BackupEngine.java rename to java/src/main/java/org/forstdb/BackupEngine.java index 3ab220683..7f4298d1d 100644 --- a/java/src/main/java/org/rocksdb/BackupEngine.java +++ b/java/src/main/java/org/forstdb/BackupEngine.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/BackupEngineOptions.java b/java/src/main/java/org/forstdb/BackupEngineOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/BackupEngineOptions.java rename to java/src/main/java/org/forstdb/BackupEngineOptions.java index 7747b944f..8f5bf6f0f 100644 --- a/java/src/main/java/org/rocksdb/BackupEngineOptions.java +++ b/java/src/main/java/org/forstdb/BackupEngineOptions.java @@ -3,18 +3,18 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.io.File; /** *

    BackupEngineOptions controls the behavior of a - * {@link org.rocksdb.BackupEngine}. + * {@link org.forstdb.BackupEngine}. *

    *

    Note that dispose() must be called before an Options instance * become out-of-scope to release the allocated memory in c++.

    * - * @see org.rocksdb.BackupEngine + * @see org.forstdb.BackupEngine */ public class BackupEngineOptions extends RocksObject { private Env backupEnv = null; diff --git a/java/src/main/java/org/rocksdb/BackupInfo.java b/java/src/main/java/org/forstdb/BackupInfo.java similarity index 93% rename from java/src/main/java/org/rocksdb/BackupInfo.java rename to java/src/main/java/org/forstdb/BackupInfo.java index 9581b098f..24179123c 100644 --- a/java/src/main/java/org/rocksdb/BackupInfo.java +++ b/java/src/main/java/org/forstdb/BackupInfo.java @@ -2,17 +2,17 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Instances of this class describe a Backup made by - * {@link org.rocksdb.BackupEngine}. + * {@link org.forstdb.BackupEngine}. */ public class BackupInfo { /** * Package private constructor used to create instances - * of BackupInfo by {@link org.rocksdb.BackupEngine} + * of BackupInfo by {@link org.forstdb.BackupEngine} * * @param backupId id of backup * @param timestamp timestamp of backup diff --git a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java b/java/src/main/java/org/forstdb/BlockBasedTableConfig.java similarity index 98% rename from java/src/main/java/org/rocksdb/BlockBasedTableConfig.java rename to java/src/main/java/org/forstdb/BlockBasedTableConfig.java index c82c3ea10..9723eb3d9 100644 --- a/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java +++ b/java/src/main/java/org/forstdb/BlockBasedTableConfig.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The config for block based table sst format. @@ -205,7 +205,7 @@ public IndexType indexType() { /** * Sets the index type to used with this table. * - * @param indexType {@link org.rocksdb.IndexType} value + * @param indexType {@link org.forstdb.IndexType} value * @return the reference to the current option. */ public BlockBasedTableConfig setIndexType( @@ -226,7 +226,7 @@ public DataBlockIndexType dataBlockIndexType() { /** * Sets the data block index type to used with this table. * - * @param dataBlockIndexType {@link org.rocksdb.DataBlockIndexType} value + * @param dataBlockIndexType {@link org.forstdb.DataBlockIndexType} value * @return the reference to the current option. */ public BlockBasedTableConfig setDataBlockIndexType( @@ -270,7 +270,7 @@ public ChecksumType checksumType() { /** * Sets * - * @param checksumType {@link org.rocksdb.ChecksumType} value. + * @param checksumType {@link org.forstdb.ChecksumType} value. * @return the reference to the current option. */ public BlockBasedTableConfig setChecksumType( @@ -307,13 +307,13 @@ public BlockBasedTableConfig setNoBlockCache(final boolean noBlockCache) { * Use the specified cache for blocks. * When not null this take precedence even if the user sets a block cache size. *

    - * {@link org.rocksdb.Cache} should not be disposed before options instances + * {@link org.forstdb.Cache} should not be disposed before options instances * using this cache is disposed. *

    - * {@link org.rocksdb.Cache} instance can be re-used in multiple options + * {@link org.forstdb.Cache} instance can be re-used in multiple options * instances. * - * @param blockCache {@link org.rocksdb.Cache} Cache java instance + * @param blockCache {@link org.forstdb.Cache} Cache java instance * (e.g. LRUCache). * * @return the reference to the current config. @@ -571,13 +571,13 @@ public Filter filterPolicy() { /** * Use the specified filter policy to reduce disk reads. *

    - * {@link org.rocksdb.Filter} should not be closed before options instances + * {@link org.forstdb.Filter} should not be closed before options instances * using this filter are closed. *

    - * {@link org.rocksdb.Filter} instance can be re-used in multiple options + * {@link org.forstdb.Filter} instance can be re-used in multiple options * instances. * - * @param filterPolicy {@link org.rocksdb.Filter} Filter Policy java instance. + * @param filterPolicy {@link org.forstdb.Filter} Filter Policy java instance. * @return the reference to the current config. */ public BlockBasedTableConfig setFilterPolicy( diff --git a/java/src/main/java/org/rocksdb/BloomFilter.java b/java/src/main/java/org/forstdb/BloomFilter.java similarity index 99% rename from java/src/main/java/org/rocksdb/BloomFilter.java rename to java/src/main/java/org/forstdb/BloomFilter.java index c08966c0e..ea1ebdce5 100644 --- a/java/src/main/java/org/rocksdb/BloomFilter.java +++ b/java/src/main/java/org/forstdb/BloomFilter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/BuiltinComparator.java b/java/src/main/java/org/forstdb/BuiltinComparator.java similarity index 96% rename from java/src/main/java/org/rocksdb/BuiltinComparator.java rename to java/src/main/java/org/forstdb/BuiltinComparator.java index 2c89bf218..89faa1611 100644 --- a/java/src/main/java/org/rocksdb/BuiltinComparator.java +++ b/java/src/main/java/org/forstdb/BuiltinComparator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Builtin RocksDB comparators diff --git a/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java b/java/src/main/java/org/forstdb/ByteBufferGetStatus.java similarity index 98% rename from java/src/main/java/org/rocksdb/ByteBufferGetStatus.java rename to java/src/main/java/org/forstdb/ByteBufferGetStatus.java index 4ab9e8475..4ead43026 100644 --- a/java/src/main/java/org/rocksdb/ByteBufferGetStatus.java +++ b/java/src/main/java/org/forstdb/ByteBufferGetStatus.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/Cache.java b/java/src/main/java/org/forstdb/Cache.java similarity index 97% rename from java/src/main/java/org/rocksdb/Cache.java rename to java/src/main/java/org/forstdb/Cache.java index 04bd3fcaa..3db3ef10f 100644 --- a/java/src/main/java/org/rocksdb/Cache.java +++ b/java/src/main/java/org/forstdb/Cache.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public abstract class Cache extends RocksObject { diff --git a/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java b/java/src/main/java/org/forstdb/CassandraCompactionFilter.java similarity index 97% rename from java/src/main/java/org/rocksdb/CassandraCompactionFilter.java rename to java/src/main/java/org/forstdb/CassandraCompactionFilter.java index 12854c510..58fc0c7f2 100644 --- a/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java +++ b/java/src/main/java/org/forstdb/CassandraCompactionFilter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Just a Java wrapper around CassandraCompactionFilter implemented in C++ diff --git a/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java b/java/src/main/java/org/forstdb/CassandraValueMergeOperator.java similarity index 97% rename from java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java rename to java/src/main/java/org/forstdb/CassandraValueMergeOperator.java index 732faee20..202a59c55 100644 --- a/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java +++ b/java/src/main/java/org/forstdb/CassandraValueMergeOperator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * CassandraValueMergeOperator is a merge operator that merges two cassandra wide column diff --git a/java/src/main/java/org/rocksdb/Checkpoint.java b/java/src/main/java/org/forstdb/Checkpoint.java similarity index 99% rename from java/src/main/java/org/rocksdb/Checkpoint.java rename to java/src/main/java/org/forstdb/Checkpoint.java index 347221df6..47bc74294 100644 --- a/java/src/main/java/org/rocksdb/Checkpoint.java +++ b/java/src/main/java/org/forstdb/Checkpoint.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Provides Checkpoint functionality. Checkpoints diff --git a/java/src/main/java/org/rocksdb/ChecksumType.java b/java/src/main/java/org/forstdb/ChecksumType.java similarity index 97% rename from java/src/main/java/org/rocksdb/ChecksumType.java rename to java/src/main/java/org/forstdb/ChecksumType.java index 5b3d22492..8623f2541 100644 --- a/java/src/main/java/org/rocksdb/ChecksumType.java +++ b/java/src/main/java/org/forstdb/ChecksumType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Checksum types used in conjunction with BlockBasedTable. diff --git a/java/src/main/java/org/rocksdb/ClockCache.java b/java/src/main/java/org/forstdb/ClockCache.java similarity index 99% rename from java/src/main/java/org/rocksdb/ClockCache.java rename to java/src/main/java/org/forstdb/ClockCache.java index f9f6da74c..4ed6f7077 100644 --- a/java/src/main/java/org/rocksdb/ClockCache.java +++ b/java/src/main/java/org/forstdb/ClockCache.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Similar to {@link LRUCache}, but based on the CLOCK algorithm with diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java b/java/src/main/java/org/forstdb/ColumnFamilyDescriptor.java similarity index 99% rename from java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java rename to java/src/main/java/org/forstdb/ColumnFamilyDescriptor.java index dd9567829..ec2c99fa0 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java +++ b/java/src/main/java/org/forstdb/ColumnFamilyDescriptor.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java b/java/src/main/java/org/forstdb/ColumnFamilyHandle.java similarity index 99% rename from java/src/main/java/org/rocksdb/ColumnFamilyHandle.java rename to java/src/main/java/org/forstdb/ColumnFamilyHandle.java index 9fd63e768..4f07375bd 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java +++ b/java/src/main/java/org/forstdb/ColumnFamilyHandle.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java b/java/src/main/java/org/forstdb/ColumnFamilyMetaData.java similarity index 98% rename from java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java rename to java/src/main/java/org/forstdb/ColumnFamilyMetaData.java index 9b6d1a70c..7c2734ec4 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java +++ b/java/src/main/java/org/forstdb/ColumnFamilyMetaData.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java b/java/src/main/java/org/forstdb/ColumnFamilyOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/ColumnFamilyOptions.java rename to java/src/main/java/org/forstdb/ColumnFamilyOptions.java index 607a17936..a7105806b 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java +++ b/java/src/main/java/org/forstdb/ColumnFamilyOptions.java @@ -3,14 +3,14 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.file.Paths; import java.util.*; /** * ColumnFamilyOptions to control the behavior of a database. It will be used - * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). + * during the creation of a {@link org.forstdb.RocksDB} (i.e., RocksDB.open()). *

    * As a descendant of {@link AbstractNativeReference}, this class is {@link AutoCloseable} * and will be automatically released if opened in the preamble of a try with resources block. @@ -85,7 +85,7 @@ public ColumnFamilyOptions(final Options options) { * * @param properties {@link java.util.Properties} instance. * - * @return {@link org.rocksdb.ColumnFamilyOptions instance} + * @return {@link org.forstdb.ColumnFamilyOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty @@ -116,7 +116,7 @@ public static ColumnFamilyOptions getColumnFamilyOptionsFromProps( * @param cfgOpts ConfigOptions controlling how the properties are parsed. * @param properties {@link java.util.Properties} instance. * - * @return {@link org.rocksdb.ColumnFamilyOptions instance} + * @return {@link org.forstdb.ColumnFamilyOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty diff --git a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java b/java/src/main/java/org/forstdb/ColumnFamilyOptionsInterface.java similarity index 99% rename from java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java rename to java/src/main/java/org/forstdb/ColumnFamilyOptionsInterface.java index 4776773bd..06db0ffeb 100644 --- a/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/forstdb/ColumnFamilyOptionsInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Collection; import java.util.List; @@ -370,7 +370,7 @@ T setMaxTableFilesSizeFIFO( * Memtable format can be set using setTableFormatConfig. * * @return the name of the currently-used memtable factory. - * @see #setTableFormatConfig(org.rocksdb.TableFormatConfig) + * @see #setTableFormatConfig(org.forstdb.TableFormatConfig) */ String memTableFactoryName(); diff --git a/java/src/main/java/org/rocksdb/CompactRangeOptions.java b/java/src/main/java/org/forstdb/CompactRangeOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactRangeOptions.java rename to java/src/main/java/org/forstdb/CompactRangeOptions.java index 616a77572..823be4e1f 100644 --- a/java/src/main/java/org/rocksdb/CompactRangeOptions.java +++ b/java/src/main/java/org/forstdb/CompactRangeOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/CompactionJobInfo.java b/java/src/main/java/org/forstdb/CompactionJobInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionJobInfo.java rename to java/src/main/java/org/forstdb/CompactionJobInfo.java index cf04bde24..2a5c8c4f5 100644 --- a/java/src/main/java/org/rocksdb/CompactionJobInfo.java +++ b/java/src/main/java/org/forstdb/CompactionJobInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/CompactionJobStats.java b/java/src/main/java/org/forstdb/CompactionJobStats.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionJobStats.java rename to java/src/main/java/org/forstdb/CompactionJobStats.java index 3d53b5565..a10998a3a 100644 --- a/java/src/main/java/org/rocksdb/CompactionJobStats.java +++ b/java/src/main/java/org/forstdb/CompactionJobStats.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class CompactionJobStats extends RocksObject { diff --git a/java/src/main/java/org/rocksdb/CompactionOptions.java b/java/src/main/java/org/forstdb/CompactionOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionOptions.java rename to java/src/main/java/org/forstdb/CompactionOptions.java index 2c7e391fb..69b11eb87 100644 --- a/java/src/main/java/org/rocksdb/CompactionOptions.java +++ b/java/src/main/java/org/forstdb/CompactionOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java b/java/src/main/java/org/forstdb/CompactionOptionsFIFO.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java rename to java/src/main/java/org/forstdb/CompactionOptionsFIFO.java index 92b21fc50..0b321a32e 100644 --- a/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java +++ b/java/src/main/java/org/forstdb/CompactionOptionsFIFO.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Options for FIFO Compaction diff --git a/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java b/java/src/main/java/org/forstdb/CompactionOptionsUniversal.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java rename to java/src/main/java/org/forstdb/CompactionOptionsUniversal.java index 4d2ebdb1f..10e974e43 100644 --- a/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java +++ b/java/src/main/java/org/forstdb/CompactionOptionsUniversal.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Options for Universal Compaction diff --git a/java/src/main/java/org/rocksdb/CompactionPriority.java b/java/src/main/java/org/forstdb/CompactionPriority.java similarity index 96% rename from java/src/main/java/org/rocksdb/CompactionPriority.java rename to java/src/main/java/org/forstdb/CompactionPriority.java index eda05942e..1ba172dfc 100644 --- a/java/src/main/java/org/rocksdb/CompactionPriority.java +++ b/java/src/main/java/org/forstdb/CompactionPriority.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Compaction Priorities @@ -64,7 +64,7 @@ public byte getValue() { * * @param value byte representation of CompactionPriority. * - * @return {@link org.rocksdb.CompactionPriority} instance or null. + * @return {@link org.forstdb.CompactionPriority} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/CompactionReason.java b/java/src/main/java/org/forstdb/CompactionReason.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionReason.java rename to java/src/main/java/org/forstdb/CompactionReason.java index 46ec33f3f..4e6b19860 100644 --- a/java/src/main/java/org/rocksdb/CompactionReason.java +++ b/java/src/main/java/org/forstdb/CompactionReason.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum CompactionReason { kUnknown((byte)0x0), diff --git a/java/src/main/java/org/rocksdb/CompactionStopStyle.java b/java/src/main/java/org/forstdb/CompactionStopStyle.java similarity index 93% rename from java/src/main/java/org/rocksdb/CompactionStopStyle.java rename to java/src/main/java/org/forstdb/CompactionStopStyle.java index f6e63209c..fe1abf2a0 100644 --- a/java/src/main/java/org/rocksdb/CompactionStopStyle.java +++ b/java/src/main/java/org/forstdb/CompactionStopStyle.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * Algorithm used to make a compaction request stop picking new files @@ -38,7 +38,7 @@ public byte getValue() { * * @param value byte representation of CompactionStopStyle. * - * @return {@link org.rocksdb.CompactionStopStyle} instance or null. + * @return {@link org.forstdb.CompactionStopStyle} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/CompactionStyle.java b/java/src/main/java/org/forstdb/CompactionStyle.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompactionStyle.java rename to java/src/main/java/org/forstdb/CompactionStyle.java index 7b955a7a2..cf6047e26 100644 --- a/java/src/main/java/org/rocksdb/CompactionStyle.java +++ b/java/src/main/java/org/forstdb/CompactionStyle.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Enum CompactionStyle diff --git a/java/src/main/java/org/rocksdb/ComparatorOptions.java b/java/src/main/java/org/forstdb/ComparatorOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/ComparatorOptions.java rename to java/src/main/java/org/forstdb/ComparatorOptions.java index ee5beb8f6..d14ffc095 100644 --- a/java/src/main/java/org/rocksdb/ComparatorOptions.java +++ b/java/src/main/java/org/forstdb/ComparatorOptions.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * This class controls the behaviour diff --git a/java/src/main/java/org/rocksdb/ComparatorType.java b/java/src/main/java/org/forstdb/ComparatorType.java similarity index 98% rename from java/src/main/java/org/rocksdb/ComparatorType.java rename to java/src/main/java/org/forstdb/ComparatorType.java index 199980b6e..a2585ecfc 100644 --- a/java/src/main/java/org/rocksdb/ComparatorType.java +++ b/java/src/main/java/org/forstdb/ComparatorType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; enum ComparatorType { JAVA_COMPARATOR((byte)0x0), diff --git a/java/src/main/java/org/rocksdb/CompressionOptions.java b/java/src/main/java/org/forstdb/CompressionOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompressionOptions.java rename to java/src/main/java/org/forstdb/CompressionOptions.java index 2e1ee5731..2cce1622a 100644 --- a/java/src/main/java/org/rocksdb/CompressionOptions.java +++ b/java/src/main/java/org/forstdb/CompressionOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Options for Compression diff --git a/java/src/main/java/org/rocksdb/CompressionType.java b/java/src/main/java/org/forstdb/CompressionType.java similarity index 99% rename from java/src/main/java/org/rocksdb/CompressionType.java rename to java/src/main/java/org/forstdb/CompressionType.java index d1ecf0ac8..52003386d 100644 --- a/java/src/main/java/org/rocksdb/CompressionType.java +++ b/java/src/main/java/org/forstdb/CompressionType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Enum CompressionType diff --git a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java b/java/src/main/java/org/forstdb/ConcurrentTaskLimiter.java similarity index 98% rename from java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java rename to java/src/main/java/org/forstdb/ConcurrentTaskLimiter.java index b4e34303b..ee3d854c5 100644 --- a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiter.java +++ b/java/src/main/java/org/forstdb/ConcurrentTaskLimiter.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public abstract class ConcurrentTaskLimiter extends RocksObject { protected ConcurrentTaskLimiter(final long nativeHandle) { diff --git a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java b/java/src/main/java/org/forstdb/ConcurrentTaskLimiterImpl.java similarity index 98% rename from java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java rename to java/src/main/java/org/forstdb/ConcurrentTaskLimiterImpl.java index d28b9060a..b41f5e1d1 100644 --- a/java/src/main/java/org/rocksdb/ConcurrentTaskLimiterImpl.java +++ b/java/src/main/java/org/forstdb/ConcurrentTaskLimiterImpl.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class ConcurrentTaskLimiterImpl extends ConcurrentTaskLimiter { public ConcurrentTaskLimiterImpl(final String name, final int maxOutstandingTask) { diff --git a/java/src/main/java/org/rocksdb/ConfigOptions.java b/java/src/main/java/org/forstdb/ConfigOptions.java similarity index 98% rename from java/src/main/java/org/rocksdb/ConfigOptions.java rename to java/src/main/java/org/forstdb/ConfigOptions.java index b3b5423c8..0ea711e3d 100644 --- a/java/src/main/java/org/rocksdb/ConfigOptions.java +++ b/java/src/main/java/org/forstdb/ConfigOptions.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class ConfigOptions extends RocksObject { /** diff --git a/java/src/main/java/org/rocksdb/DBOptions.java b/java/src/main/java/org/forstdb/DBOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/DBOptions.java rename to java/src/main/java/org/forstdb/DBOptions.java index de10c0585..dd2722cdc 100644 --- a/java/src/main/java/org/rocksdb/DBOptions.java +++ b/java/src/main/java/org/forstdb/DBOptions.java @@ -3,14 +3,14 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.file.Paths; import java.util.*; /** * DBOptions to control the behavior of a database. It will be used - * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). + * during the creation of a {@link org.forstdb.RocksDB} (i.e., RocksDB.open()). *

    * As a descendent of {@link AbstractNativeReference}, this class is {@link AutoCloseable} * and will be automatically released if opened in the preamble of a try with resources block. @@ -70,7 +70,7 @@ public DBOptions(final Options options) { * @param cfgOpts The ConfigOptions to control how the string is processed. * @param properties {@link java.util.Properties} instance. * - * @return {@link org.rocksdb.DBOptions instance} + * @return {@link org.forstdb.DBOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty @@ -100,7 +100,7 @@ public static DBOptions getDBOptionsFromProps( * * @param properties {@link java.util.Properties} instance. * - * @return {@link org.rocksdb.DBOptions instance} + * @return {@link org.forstdb.DBOptions instance} * or null. * * @throws java.lang.IllegalArgumentException if null or empty diff --git a/java/src/main/java/org/rocksdb/DBOptionsInterface.java b/java/src/main/java/org/forstdb/DBOptionsInterface.java similarity index 99% rename from java/src/main/java/org/rocksdb/DBOptionsInterface.java rename to java/src/main/java/org/forstdb/DBOptionsInterface.java index 084a399cd..648502cc7 100644 --- a/java/src/main/java/org/rocksdb/DBOptionsInterface.java +++ b/java/src/main/java/org/forstdb/DBOptionsInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Collection; import java.util.List; @@ -55,10 +55,10 @@ public interface DBOptionsInterface> { * Default: false * * @param flag a flag indicating whether to create a database the - * specified database in {@link RocksDB#open(org.rocksdb.Options, String)} operation + * specified database in {@link RocksDB#open(org.forstdb.Options, String)} operation * is missing. * @return the instance of the current Options - * @see RocksDB#open(org.rocksdb.Options, String) + * @see RocksDB#open(org.forstdb.Options, String) */ T setCreateIfMissing(boolean flag); @@ -101,7 +101,7 @@ public interface DBOptionsInterface> { * @param errorIfExists if true, an exception will be thrown * during {@code RocksDB.open()} if the database already exists. * @return the reference to the current option. - * @see RocksDB#open(org.rocksdb.Options, String) + * @see RocksDB#open(org.forstdb.Options, String) */ T setErrorIfExists(boolean errorIfExists); @@ -150,7 +150,7 @@ public interface DBOptionsInterface> { * priority than compaction. Rate limiting is disabled if nullptr. * Default: nullptr * - * @param rateLimiter {@link org.rocksdb.RateLimiter} instance. + * @param rateLimiter {@link org.forstdb.RateLimiter} instance. * @return the instance of the current object. * * @since 3.10.0 @@ -200,7 +200,7 @@ public interface DBOptionsInterface> { /** *

    Returns currently set log level.

    - * @return {@link org.rocksdb.InfoLogLevel} instance. + * @return {@link org.forstdb.InfoLogLevel} instance. */ InfoLogLevel infoLogLevel(); @@ -238,7 +238,7 @@ public interface DBOptionsInterface> { * * @return the instance of the current object. * - * @see RocksDB#open(org.rocksdb.Options, String) + * @see RocksDB#open(org.forstdb.Options, String) */ T setStatistics(final Statistics statistics); diff --git a/java/src/main/java/org/rocksdb/DataBlockIndexType.java b/java/src/main/java/org/forstdb/DataBlockIndexType.java similarity index 96% rename from java/src/main/java/org/rocksdb/DataBlockIndexType.java rename to java/src/main/java/org/forstdb/DataBlockIndexType.java index 513e5b429..446ee21e8 100644 --- a/java/src/main/java/org/rocksdb/DataBlockIndexType.java +++ b/java/src/main/java/org/forstdb/DataBlockIndexType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** diff --git a/java/src/main/java/org/rocksdb/DbPath.java b/java/src/main/java/org/forstdb/DbPath.java similarity index 98% rename from java/src/main/java/org/rocksdb/DbPath.java rename to java/src/main/java/org/forstdb/DbPath.java index 3f0b67557..b7d511206 100644 --- a/java/src/main/java/org/rocksdb/DbPath.java +++ b/java/src/main/java/org/forstdb/DbPath.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.file.Path; diff --git a/java/src/main/java/org/rocksdb/DirectSlice.java b/java/src/main/java/org/forstdb/DirectSlice.java similarity index 98% rename from java/src/main/java/org/rocksdb/DirectSlice.java rename to java/src/main/java/org/forstdb/DirectSlice.java index 5aa0866ff..af76395ef 100644 --- a/java/src/main/java/org/rocksdb/DirectSlice.java +++ b/java/src/main/java/org/forstdb/DirectSlice.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -13,7 +13,7 @@ *

    * ByteBuffer backed slices typically perform better with * larger keys and values. When using smaller keys and - * values consider using @see org.rocksdb.Slice + * values consider using @see org.forstdb.Slice */ public class DirectSlice extends AbstractSlice { public static final DirectSlice NONE = new DirectSlice(); diff --git a/java/src/main/java/org/rocksdb/EncodingType.java b/java/src/main/java/org/forstdb/EncodingType.java similarity index 98% rename from java/src/main/java/org/rocksdb/EncodingType.java rename to java/src/main/java/org/forstdb/EncodingType.java index e93ffcc23..77244d63d 100644 --- a/java/src/main/java/org/rocksdb/EncodingType.java +++ b/java/src/main/java/org/forstdb/EncodingType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * EncodingType diff --git a/java/src/main/java/org/rocksdb/Env.java b/java/src/main/java/org/forstdb/Env.java similarity index 98% rename from java/src/main/java/org/rocksdb/Env.java rename to java/src/main/java/org/forstdb/Env.java index 6783d8158..15b99fd7a 100644 --- a/java/src/main/java/org/rocksdb/Env.java +++ b/java/src/main/java/org/forstdb/Env.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.List; @@ -24,7 +24,7 @@ public abstract class Env extends RocksObject { * have the ownership of its c++ resource, and calling its dispose()/close() * will be no-op.

    * - * @return the default {@link org.rocksdb.RocksEnv} instance. + * @return the default {@link org.forstdb.RocksEnv} instance. */ @SuppressWarnings({"PMD.CloseResource", "PMD.AssignmentInOperand"}) public static Env getDefault() { diff --git a/java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java b/java/src/main/java/org/forstdb/EnvFlinkTestSuite.java similarity index 98% rename from java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java rename to java/src/main/java/org/forstdb/EnvFlinkTestSuite.java index 92e503509..469528059 100644 --- a/java/src/main/java/org/rocksdb/EnvFlinkTestSuite.java +++ b/java/src/main/java/org/forstdb/EnvFlinkTestSuite.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.rocksdb; +package org.forstdb; /** * The test suite used for flink-env interfaces testing. You could define and implement test diff --git a/java/src/main/java/org/rocksdb/EnvOptions.java b/java/src/main/java/org/forstdb/EnvOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/EnvOptions.java rename to java/src/main/java/org/forstdb/EnvOptions.java index fd56bc49e..99c3005b8 100644 --- a/java/src/main/java/org/rocksdb/EnvOptions.java +++ b/java/src/main/java/org/forstdb/EnvOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Options while opening a file to read/write diff --git a/java/src/main/java/org/rocksdb/EventListener.java b/java/src/main/java/org/forstdb/EventListener.java similarity index 99% rename from java/src/main/java/org/rocksdb/EventListener.java rename to java/src/main/java/org/forstdb/EventListener.java index a26325806..48244331b 100644 --- a/java/src/main/java/org/rocksdb/EventListener.java +++ b/java/src/main/java/org/forstdb/EventListener.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * EventListener class contains a set of callback functions that will diff --git a/java/src/main/java/org/rocksdb/Experimental.java b/java/src/main/java/org/forstdb/Experimental.java similarity index 97% rename from java/src/main/java/org/rocksdb/Experimental.java rename to java/src/main/java/org/forstdb/Experimental.java index 64b404d6f..b39272d3e 100644 --- a/java/src/main/java/org/rocksdb/Experimental.java +++ b/java/src/main/java/org/forstdb/Experimental.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.lang.annotation.ElementType; import java.lang.annotation.Documented; diff --git a/java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java b/java/src/main/java/org/forstdb/ExportImportFilesMetaData.java similarity index 96% rename from java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java rename to java/src/main/java/org/forstdb/ExportImportFilesMetaData.java index 1589f631c..a2bac70c9 100644 --- a/java/src/main/java/org/rocksdb/ExportImportFilesMetaData.java +++ b/java/src/main/java/org/forstdb/ExportImportFilesMetaData.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The metadata that describes a column family. diff --git a/java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java b/java/src/main/java/org/forstdb/ExternalFileIngestionInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java rename to java/src/main/java/org/forstdb/ExternalFileIngestionInfo.java index 7a99dd6bf..ab92c64d5 100644 --- a/java/src/main/java/org/rocksdb/ExternalFileIngestionInfo.java +++ b/java/src/main/java/org/forstdb/ExternalFileIngestionInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/FileOperationInfo.java b/java/src/main/java/org/forstdb/FileOperationInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/FileOperationInfo.java rename to java/src/main/java/org/forstdb/FileOperationInfo.java index fae9cd5de..9f3f8d50a 100644 --- a/java/src/main/java/org/rocksdb/FileOperationInfo.java +++ b/java/src/main/java/org/forstdb/FileOperationInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/Filter.java b/java/src/main/java/org/forstdb/Filter.java similarity index 98% rename from java/src/main/java/org/rocksdb/Filter.java rename to java/src/main/java/org/forstdb/Filter.java index 7f490cf59..94cd7e527 100644 --- a/java/src/main/java/org/rocksdb/Filter.java +++ b/java/src/main/java/org/forstdb/Filter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Filters are stored in rocksdb and are consulted automatically diff --git a/java/src/main/java/org/rocksdb/FilterPolicyType.java b/java/src/main/java/org/forstdb/FilterPolicyType.java similarity index 98% rename from java/src/main/java/org/rocksdb/FilterPolicyType.java rename to java/src/main/java/org/forstdb/FilterPolicyType.java index 6a693ee40..36621e152 100644 --- a/java/src/main/java/org/rocksdb/FilterPolicyType.java +++ b/java/src/main/java/org/forstdb/FilterPolicyType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * IndexType used in conjunction with BlockBasedTable. diff --git a/java/src/main/java/org/rocksdb/FlinkCompactionFilter.java b/java/src/main/java/org/forstdb/FlinkCompactionFilter.java similarity index 99% rename from java/src/main/java/org/rocksdb/FlinkCompactionFilter.java rename to java/src/main/java/org/forstdb/FlinkCompactionFilter.java index ee575d5ba..40e867b23 100644 --- a/java/src/main/java/org/rocksdb/FlinkCompactionFilter.java +++ b/java/src/main/java/org/forstdb/FlinkCompactionFilter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Just a Java wrapper around FlinkCompactionFilter implemented in C++. diff --git a/java/src/main/java/org/rocksdb/FlinkEnv.java b/java/src/main/java/org/forstdb/FlinkEnv.java similarity index 98% rename from java/src/main/java/org/rocksdb/FlinkEnv.java rename to java/src/main/java/org/forstdb/FlinkEnv.java index 91e6d46b6..758e72952 100644 --- a/java/src/main/java/org/rocksdb/FlinkEnv.java +++ b/java/src/main/java/org/forstdb/FlinkEnv.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.rocksdb; +package org.forstdb; /** * Flink Env which proxy all filesystem access to Flink FileSystem. diff --git a/java/src/main/java/org/rocksdb/FlushJobInfo.java b/java/src/main/java/org/forstdb/FlushJobInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/FlushJobInfo.java rename to java/src/main/java/org/forstdb/FlushJobInfo.java index 414d3a2f3..c58f5d3f0 100644 --- a/java/src/main/java/org/rocksdb/FlushJobInfo.java +++ b/java/src/main/java/org/forstdb/FlushJobInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/FlushOptions.java b/java/src/main/java/org/forstdb/FlushOptions.java similarity index 98% rename from java/src/main/java/org/rocksdb/FlushOptions.java rename to java/src/main/java/org/forstdb/FlushOptions.java index be8c4bc94..27de9dfef 100644 --- a/java/src/main/java/org/rocksdb/FlushOptions.java +++ b/java/src/main/java/org/forstdb/FlushOptions.java @@ -3,11 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * FlushOptions to be passed to flush operations of - * {@link org.rocksdb.RocksDB}. + * {@link org.forstdb.RocksDB}. */ public class FlushOptions extends RocksObject { /** diff --git a/java/src/main/java/org/rocksdb/FlushReason.java b/java/src/main/java/org/forstdb/FlushReason.java similarity index 98% rename from java/src/main/java/org/rocksdb/FlushReason.java rename to java/src/main/java/org/forstdb/FlushReason.java index 9d486cda1..093b97b12 100644 --- a/java/src/main/java/org/rocksdb/FlushReason.java +++ b/java/src/main/java/org/forstdb/FlushReason.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum FlushReason { OTHERS((byte) 0x00), diff --git a/java/src/main/java/org/rocksdb/GetStatus.java b/java/src/main/java/org/forstdb/GetStatus.java similarity index 98% rename from java/src/main/java/org/rocksdb/GetStatus.java rename to java/src/main/java/org/forstdb/GetStatus.java index a2afafe39..2e82c13e1 100644 --- a/java/src/main/java/org/rocksdb/GetStatus.java +++ b/java/src/main/java/org/forstdb/GetStatus.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The result for a fetch diff --git a/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java b/java/src/main/java/org/forstdb/HashLinkedListMemTableConfig.java similarity index 99% rename from java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java rename to java/src/main/java/org/forstdb/HashLinkedListMemTableConfig.java index a9868df57..0acb02a89 100644 --- a/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java +++ b/java/src/main/java/org/forstdb/HashLinkedListMemTableConfig.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * The config for hash linked list memtable representation diff --git a/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java b/java/src/main/java/org/forstdb/HashSkipListMemTableConfig.java similarity index 99% rename from java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java rename to java/src/main/java/org/forstdb/HashSkipListMemTableConfig.java index 80d6b7115..cc2680121 100644 --- a/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java +++ b/java/src/main/java/org/forstdb/HashSkipListMemTableConfig.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * The config for hash skip-list mem-table representation. diff --git a/java/src/main/java/org/rocksdb/HistogramData.java b/java/src/main/java/org/forstdb/HistogramData.java similarity index 98% rename from java/src/main/java/org/rocksdb/HistogramData.java rename to java/src/main/java/org/forstdb/HistogramData.java index 81d890883..439f89a19 100644 --- a/java/src/main/java/org/rocksdb/HistogramData.java +++ b/java/src/main/java/org/forstdb/HistogramData.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class HistogramData { private final double median_; diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/forstdb/HistogramType.java similarity index 98% rename from java/src/main/java/org/rocksdb/HistogramType.java rename to java/src/main/java/org/forstdb/HistogramType.java index 41fe241ad..342c44991 100644 --- a/java/src/main/java/org/rocksdb/HistogramType.java +++ b/java/src/main/java/org/forstdb/HistogramType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum HistogramType { @@ -208,7 +208,7 @@ public byte getValue() { * * @param value byte representation of HistogramType. * - * @return {@link org.rocksdb.HistogramType} instance. + * @return {@link org.forstdb.HistogramType} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/Holder.java b/java/src/main/java/org/forstdb/Holder.java similarity index 97% rename from java/src/main/java/org/rocksdb/Holder.java rename to java/src/main/java/org/forstdb/Holder.java index 716a0bda0..ffe1759a4 100644 --- a/java/src/main/java/org/rocksdb/Holder.java +++ b/java/src/main/java/org/forstdb/Holder.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Simple instance reference wrapper. diff --git a/java/src/main/java/org/rocksdb/HyperClockCache.java b/java/src/main/java/org/forstdb/HyperClockCache.java similarity index 96% rename from java/src/main/java/org/rocksdb/HyperClockCache.java rename to java/src/main/java/org/forstdb/HyperClockCache.java index f8fe42be7..2ad072e39 100644 --- a/java/src/main/java/org/rocksdb/HyperClockCache.java +++ b/java/src/main/java/org/forstdb/HyperClockCache.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * HyperClockCache - A lock-free Cache alternative for RocksDB block cache diff --git a/java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java b/java/src/main/java/org/forstdb/ImportColumnFamilyOptions.java similarity index 98% rename from java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java rename to java/src/main/java/org/forstdb/ImportColumnFamilyOptions.java index 652bd19dc..26e1c8db3 100644 --- a/java/src/main/java/org/rocksdb/ImportColumnFamilyOptions.java +++ b/java/src/main/java/org/forstdb/ImportColumnFamilyOptions.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * ImportColumnFamilyOptions is used by diff --git a/java/src/main/java/org/rocksdb/IndexShorteningMode.java b/java/src/main/java/org/forstdb/IndexShorteningMode.java similarity index 99% rename from java/src/main/java/org/rocksdb/IndexShorteningMode.java rename to java/src/main/java/org/forstdb/IndexShorteningMode.java index a68346c38..bc2c79b83 100644 --- a/java/src/main/java/org/rocksdb/IndexShorteningMode.java +++ b/java/src/main/java/org/forstdb/IndexShorteningMode.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * This enum allows trading off increased index size for improved iterator diff --git a/java/src/main/java/org/rocksdb/IndexType.java b/java/src/main/java/org/forstdb/IndexType.java similarity index 98% rename from java/src/main/java/org/rocksdb/IndexType.java rename to java/src/main/java/org/forstdb/IndexType.java index 5615e929b..0e838fe51 100644 --- a/java/src/main/java/org/rocksdb/IndexType.java +++ b/java/src/main/java/org/forstdb/IndexType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * IndexType used in conjunction with BlockBasedTable. diff --git a/java/src/main/java/org/rocksdb/InfoLogLevel.java b/java/src/main/java/org/forstdb/InfoLogLevel.java similarity index 93% rename from java/src/main/java/org/rocksdb/InfoLogLevel.java rename to java/src/main/java/org/forstdb/InfoLogLevel.java index 197bd89da..3edbc5602 100644 --- a/java/src/main/java/org/rocksdb/InfoLogLevel.java +++ b/java/src/main/java/org/forstdb/InfoLogLevel.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * RocksDB log levels. @@ -33,7 +33,7 @@ public byte getValue() { * * @param value byte representation of InfoLogLevel. * - * @return {@link org.rocksdb.InfoLogLevel} instance. + * @return {@link org.forstdb.InfoLogLevel} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java b/java/src/main/java/org/forstdb/IngestExternalFileOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/IngestExternalFileOptions.java rename to java/src/main/java/org/forstdb/IngestExternalFileOptions.java index 1a6a5fccd..7718c2082 100644 --- a/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java +++ b/java/src/main/java/org/forstdb/IngestExternalFileOptions.java @@ -1,4 +1,4 @@ -package org.rocksdb; +package org.forstdb; // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License diff --git a/java/src/main/java/org/rocksdb/KeyMayExist.java b/java/src/main/java/org/forstdb/KeyMayExist.java similarity index 97% rename from java/src/main/java/org/rocksdb/KeyMayExist.java rename to java/src/main/java/org/forstdb/KeyMayExist.java index 6149b8529..31edabb99 100644 --- a/java/src/main/java/org/rocksdb/KeyMayExist.java +++ b/java/src/main/java/org/forstdb/KeyMayExist.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/LRUCache.java b/java/src/main/java/org/forstdb/LRUCache.java similarity index 99% rename from java/src/main/java/org/rocksdb/LRUCache.java rename to java/src/main/java/org/forstdb/LRUCache.java index 0a9d02e87..1799c2bfc 100644 --- a/java/src/main/java/org/rocksdb/LRUCache.java +++ b/java/src/main/java/org/forstdb/LRUCache.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Least Recently Used Cache diff --git a/java/src/main/java/org/rocksdb/LevelMetaData.java b/java/src/main/java/org/forstdb/LevelMetaData.java similarity index 98% rename from java/src/main/java/org/rocksdb/LevelMetaData.java rename to java/src/main/java/org/forstdb/LevelMetaData.java index 424bcb026..28a0d3a89 100644 --- a/java/src/main/java/org/rocksdb/LevelMetaData.java +++ b/java/src/main/java/org/forstdb/LevelMetaData.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/LiveFileMetaData.java b/java/src/main/java/org/forstdb/LiveFileMetaData.java similarity index 99% rename from java/src/main/java/org/rocksdb/LiveFileMetaData.java rename to java/src/main/java/org/forstdb/LiveFileMetaData.java index 5242496a3..0b2af8b12 100644 --- a/java/src/main/java/org/rocksdb/LiveFileMetaData.java +++ b/java/src/main/java/org/forstdb/LiveFileMetaData.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The full set of metadata associated with each SST file. diff --git a/java/src/main/java/org/rocksdb/LogFile.java b/java/src/main/java/org/forstdb/LogFile.java similarity index 98% rename from java/src/main/java/org/rocksdb/LogFile.java rename to java/src/main/java/org/forstdb/LogFile.java index 5ee2c9fcc..7a1503b77 100644 --- a/java/src/main/java/org/rocksdb/LogFile.java +++ b/java/src/main/java/org/forstdb/LogFile.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; @SuppressWarnings("PMD.MissingStaticMethodInNonInstantiatableClass") public class LogFile { diff --git a/java/src/main/java/org/rocksdb/Logger.java b/java/src/main/java/org/forstdb/Logger.java similarity index 84% rename from java/src/main/java/org/rocksdb/Logger.java rename to java/src/main/java/org/forstdb/Logger.java index 614a7fa50..deea5d740 100644 --- a/java/src/main/java/org/rocksdb/Logger.java +++ b/java/src/main/java/org/forstdb/Logger.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** *

    This class provides a custom logger functionality @@ -20,7 +20,7 @@ *

    * *

    - * A log level can be set using {@link org.rocksdb.Options} or + * A log level can be set using {@link org.forstdb.Options} or * {@link Logger#setInfoLogLevel(InfoLogLevel)}. The set log level * influences the underlying native code. Each log message is * checked against the set log level and if the log level is more @@ -31,8 +31,8 @@ *

    Every log message which will be emitted by native code will * trigger expensive native to Java transitions. So the preferred * setting for production use is either - * {@link org.rocksdb.InfoLogLevel#ERROR_LEVEL} or - * {@link org.rocksdb.InfoLogLevel#FATAL_LEVEL}. + * {@link org.forstdb.InfoLogLevel#ERROR_LEVEL} or + * {@link org.forstdb.InfoLogLevel#FATAL_LEVEL}. *

    */ public abstract class Logger extends RocksCallbackObject { @@ -43,10 +43,10 @@ public abstract class Logger extends RocksCallbackObject { *

    AbstractLogger constructor.

    * *

    Important: the log level set within - * the {@link org.rocksdb.Options} instance will be used as + * the {@link org.forstdb.Options} instance will be used as * maximum log level of RocksDB.

    * - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. */ public Logger(final Options options) { super(options.nativeHandle_, WITH_OPTIONS); @@ -57,10 +57,10 @@ public Logger(final Options options) { *

    AbstractLogger constructor.

    * *

    Important: the log level set within - * the {@link org.rocksdb.DBOptions} instance will be used + * the {@link org.forstdb.DBOptions} instance will be used * as maximum log level of RocksDB.

    * - * @param dboptions {@link org.rocksdb.DBOptions} instance. + * @param dboptions {@link org.forstdb.DBOptions} instance. */ public Logger(final DBOptions dboptions) { super(dboptions.nativeHandle_, WITH_DBOPTIONS); @@ -78,9 +78,9 @@ protected long initializeNative(final long... nativeParameterHandles) { } /** - * Set {@link org.rocksdb.InfoLogLevel} to AbstractLogger. + * Set {@link org.forstdb.InfoLogLevel} to AbstractLogger. * - * @param infoLogLevel {@link org.rocksdb.InfoLogLevel} instance. + * @param infoLogLevel {@link org.forstdb.InfoLogLevel} instance. */ public void setInfoLogLevel(final InfoLogLevel infoLogLevel) { setInfoLogLevel(nativeHandle_, infoLogLevel.getValue()); @@ -89,7 +89,7 @@ public void setInfoLogLevel(final InfoLogLevel infoLogLevel) { /** * Return the loggers log level. * - * @return {@link org.rocksdb.InfoLogLevel} instance. + * @return {@link org.forstdb.InfoLogLevel} instance. */ public InfoLogLevel infoLogLevel() { return InfoLogLevel.getInfoLogLevel( diff --git a/java/src/main/java/org/rocksdb/MemTableConfig.java b/java/src/main/java/org/forstdb/MemTableConfig.java similarity index 98% rename from java/src/main/java/org/rocksdb/MemTableConfig.java rename to java/src/main/java/org/forstdb/MemTableConfig.java index 17033d251..a9076b5aa 100644 --- a/java/src/main/java/org/rocksdb/MemTableConfig.java +++ b/java/src/main/java/org/forstdb/MemTableConfig.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * MemTableConfig is used to config the internal mem-table of a RocksDB. diff --git a/java/src/main/java/org/rocksdb/MemTableInfo.java b/java/src/main/java/org/forstdb/MemTableInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/MemTableInfo.java rename to java/src/main/java/org/forstdb/MemTableInfo.java index 3d429035a..8c738afe3 100644 --- a/java/src/main/java/org/rocksdb/MemTableInfo.java +++ b/java/src/main/java/org/forstdb/MemTableInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/MemoryUsageType.java b/java/src/main/java/org/forstdb/MemoryUsageType.java similarity index 98% rename from java/src/main/java/org/rocksdb/MemoryUsageType.java rename to java/src/main/java/org/forstdb/MemoryUsageType.java index 40e6d1716..18c707ac3 100644 --- a/java/src/main/java/org/rocksdb/MemoryUsageType.java +++ b/java/src/main/java/org/forstdb/MemoryUsageType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * MemoryUsageType diff --git a/java/src/main/java/org/rocksdb/MemoryUtil.java b/java/src/main/java/org/forstdb/MemoryUtil.java similarity index 99% rename from java/src/main/java/org/rocksdb/MemoryUtil.java rename to java/src/main/java/org/forstdb/MemoryUtil.java index dac6d9b84..01a87dada 100644 --- a/java/src/main/java/org/rocksdb/MemoryUtil.java +++ b/java/src/main/java/org/forstdb/MemoryUtil.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.*; diff --git a/java/src/main/java/org/rocksdb/MergeOperator.java b/java/src/main/java/org/forstdb/MergeOperator.java similarity index 96% rename from java/src/main/java/org/rocksdb/MergeOperator.java rename to java/src/main/java/org/forstdb/MergeOperator.java index c299f6221..ea0430594 100644 --- a/java/src/main/java/org/rocksdb/MergeOperator.java +++ b/java/src/main/java/org/forstdb/MergeOperator.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * MergeOperator holds an operator to be applied when compacting diff --git a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java b/java/src/main/java/org/forstdb/MutableColumnFamilyOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java rename to java/src/main/java/org/forstdb/MutableColumnFamilyOptions.java index e54db7171..50b0fe8b1 100644 --- a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java +++ b/java/src/main/java/org/forstdb/MutableColumnFamilyOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.*; diff --git a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java b/java/src/main/java/org/forstdb/MutableColumnFamilyOptionsInterface.java similarity index 99% rename from java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java rename to java/src/main/java/org/forstdb/MutableColumnFamilyOptionsInterface.java index 729b0e882..59a5c5dfa 100644 --- a/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java +++ b/java/src/main/java/org/forstdb/MutableColumnFamilyOptionsInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public interface MutableColumnFamilyOptionsInterface< T extends MutableColumnFamilyOptionsInterface> diff --git a/java/src/main/java/org/rocksdb/MutableDBOptions.java b/java/src/main/java/org/forstdb/MutableDBOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/MutableDBOptions.java rename to java/src/main/java/org/forstdb/MutableDBOptions.java index 927e80522..051303e6c 100644 --- a/java/src/main/java/org/rocksdb/MutableDBOptions.java +++ b/java/src/main/java/org/forstdb/MutableDBOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.HashMap; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java b/java/src/main/java/org/forstdb/MutableDBOptionsInterface.java similarity index 99% rename from java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java rename to java/src/main/java/org/forstdb/MutableDBOptionsInterface.java index 1521fb4d0..f8bb3f5b5 100644 --- a/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java +++ b/java/src/main/java/org/forstdb/MutableDBOptionsInterface.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; public interface MutableDBOptionsInterface> { /** diff --git a/java/src/main/java/org/rocksdb/MutableOptionKey.java b/java/src/main/java/org/forstdb/MutableOptionKey.java similarity index 92% rename from java/src/main/java/org/rocksdb/MutableOptionKey.java rename to java/src/main/java/org/forstdb/MutableOptionKey.java index ec1b9ff3b..315f78c43 100644 --- a/java/src/main/java/org/rocksdb/MutableOptionKey.java +++ b/java/src/main/java/org/forstdb/MutableOptionKey.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; public interface MutableOptionKey { enum ValueType { diff --git a/java/src/main/java/org/rocksdb/MutableOptionValue.java b/java/src/main/java/org/forstdb/MutableOptionValue.java similarity index 99% rename from java/src/main/java/org/rocksdb/MutableOptionValue.java rename to java/src/main/java/org/forstdb/MutableOptionValue.java index fe689b5d0..b2bdca0f1 100644 --- a/java/src/main/java/org/rocksdb/MutableOptionValue.java +++ b/java/src/main/java/org/forstdb/MutableOptionValue.java @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; -import static org.rocksdb.AbstractMutableOptions.INT_ARRAY_INT_SEPARATOR; +import static org.forstdb.AbstractMutableOptions.INT_ARRAY_INT_SEPARATOR; public abstract class MutableOptionValue { diff --git a/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java b/java/src/main/java/org/forstdb/NativeComparatorWrapper.java similarity index 98% rename from java/src/main/java/org/rocksdb/NativeComparatorWrapper.java rename to java/src/main/java/org/forstdb/NativeComparatorWrapper.java index b270b8d36..1db25332b 100644 --- a/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java +++ b/java/src/main/java/org/forstdb/NativeComparatorWrapper.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java b/java/src/main/java/org/forstdb/NativeLibraryLoader.java similarity index 97% rename from java/src/main/java/org/rocksdb/NativeLibraryLoader.java rename to java/src/main/java/org/forstdb/NativeLibraryLoader.java index 6fe97994d..955ddc6bb 100644 --- a/java/src/main/java/org/rocksdb/NativeLibraryLoader.java +++ b/java/src/main/java/org/forstdb/NativeLibraryLoader.java @@ -1,11 +1,11 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import java.io.*; import java.nio.file.Files; import java.nio.file.StandardCopyOption; -import org.rocksdb.util.Environment; +import org.forstdb.util.Environment; /** * This class is used to load the RocksDB shared library from within the jar. @@ -43,7 +43,7 @@ public static NativeLibraryLoader getInstance() { * Firstly attempts to load the library from java.library.path, * if that fails then it falls back to extracting * the library from the classpath - * {@link org.rocksdb.NativeLibraryLoader#loadLibraryFromJar(java.lang.String)} + * {@link org.forstdb.NativeLibraryLoader#loadLibraryFromJar(java.lang.String)} * * @param tmpDir A temporary directory to use * to copy the native library to when loading from the classpath. diff --git a/java/src/main/java/org/rocksdb/OperationStage.java b/java/src/main/java/org/forstdb/OperationStage.java similarity index 98% rename from java/src/main/java/org/rocksdb/OperationStage.java rename to java/src/main/java/org/forstdb/OperationStage.java index 6ac0a15a2..10c49c8ce 100644 --- a/java/src/main/java/org/rocksdb/OperationStage.java +++ b/java/src/main/java/org/forstdb/OperationStage.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The operation stage. diff --git a/java/src/main/java/org/rocksdb/OperationType.java b/java/src/main/java/org/forstdb/OperationType.java similarity index 98% rename from java/src/main/java/org/rocksdb/OperationType.java rename to java/src/main/java/org/forstdb/OperationType.java index bf7353468..9227427d7 100644 --- a/java/src/main/java/org/rocksdb/OperationType.java +++ b/java/src/main/java/org/forstdb/OperationType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The type used to refer to a thread operation. diff --git a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java b/java/src/main/java/org/forstdb/OptimisticTransactionDB.java similarity index 98% rename from java/src/main/java/org/rocksdb/OptimisticTransactionDB.java rename to java/src/main/java/org/forstdb/OptimisticTransactionDB.java index 283f19a31..4ae26d3d9 100644 --- a/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java +++ b/java/src/main/java/org/forstdb/OptimisticTransactionDB.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; @@ -27,7 +27,7 @@ private OptimisticTransactionDB(final long nativeHandle) { * Open an OptimisticTransactionDB similar to * {@link RocksDB#open(Options, String)}. * - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. * @param path the path to the rocksdb. * * @return a {@link OptimisticTransactionDB} instance on success, null if the @@ -52,7 +52,7 @@ public static OptimisticTransactionDB open(final Options options, * Open an OptimisticTransactionDB similar to * {@link RocksDB#open(DBOptions, String, List, List)}. * - * @param dbOptions {@link org.rocksdb.DBOptions} instance. + * @param dbOptions {@link org.forstdb.DBOptions} instance. * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances diff --git a/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java b/java/src/main/java/org/forstdb/OptimisticTransactionOptions.java similarity index 98% rename from java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java rename to java/src/main/java/org/forstdb/OptimisticTransactionOptions.java index a2f5d85ab..f1740105d 100644 --- a/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java +++ b/java/src/main/java/org/forstdb/OptimisticTransactionOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class OptimisticTransactionOptions extends RocksObject implements TransactionalOptions { diff --git a/java/src/main/java/org/rocksdb/OptionString.java b/java/src/main/java/org/forstdb/OptionString.java similarity index 99% rename from java/src/main/java/org/rocksdb/OptionString.java rename to java/src/main/java/org/forstdb/OptionString.java index bcbf1d152..f26b72cc9 100644 --- a/java/src/main/java/org/rocksdb/OptionString.java +++ b/java/src/main/java/org/forstdb/OptionString.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.ArrayList; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/Options.java b/java/src/main/java/org/forstdb/Options.java similarity index 99% rename from java/src/main/java/org/rocksdb/Options.java rename to java/src/main/java/org/forstdb/Options.java index 29f5e8e0d..c65978324 100644 --- a/java/src/main/java/org/rocksdb/Options.java +++ b/java/src/main/java/org/forstdb/Options.java @@ -3,14 +3,14 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.file.Paths; import java.util.*; /** * Options to control the behavior of a database. It will be used - * during the creation of a {@link org.rocksdb.RocksDB} (i.e., RocksDB.open()). + * during the creation of a {@link org.forstdb.RocksDB} (i.e., RocksDB.open()). *

    * As a descendent of {@link AbstractNativeReference}, this class is {@link AutoCloseable} * and will be automatically released if opened in the preamble of a try with resources block. @@ -52,8 +52,8 @@ public Options() { * Construct options for opening a RocksDB. Reusing database options * and column family options. * - * @param dbOptions {@link org.rocksdb.DBOptions} instance - * @param columnFamilyOptions {@link org.rocksdb.ColumnFamilyOptions} + * @param dbOptions {@link org.forstdb.DBOptions} instance + * @param columnFamilyOptions {@link org.forstdb.ColumnFamilyOptions} * instance */ public Options(final DBOptions dbOptions, diff --git a/java/src/main/java/org/rocksdb/OptionsUtil.java b/java/src/main/java/org/forstdb/OptionsUtil.java similarity index 90% rename from java/src/main/java/org/rocksdb/OptionsUtil.java rename to java/src/main/java/org/forstdb/OptionsUtil.java index 4168921f2..b41d45049 100644 --- a/java/src/main/java/org/rocksdb/OptionsUtil.java +++ b/java/src/main/java/org/forstdb/OptionsUtil.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; @@ -35,10 +35,10 @@ public class OptionsUtil { * BlockBasedTableOptions and making necessary changes. * * @param dbPath the path to the RocksDB. - * @param configOptions {@link org.rocksdb.ConfigOptions} instance. - * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be + * @param configOptions {@link org.forstdb.ConfigOptions} instance. + * @param dbOptions {@link org.forstdb.DBOptions} instance. This will be * filled and returned. - * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be + * @param cfDescs A list of {@link org.forstdb.ColumnFamilyDescriptor}'s be * returned. * @throws RocksDBException thrown if error happens in underlying * native library. @@ -56,10 +56,10 @@ public static void loadLatestOptions(final ConfigOptions configOptions, final St * See LoadLatestOptions above. * * @param optionsFileName the RocksDB options file path. - * @param configOptions {@link org.rocksdb.ConfigOptions} instance. - * @param dbOptions {@link org.rocksdb.DBOptions} instance. This will be + * @param configOptions {@link org.forstdb.ConfigOptions} instance. + * @param dbOptions {@link org.forstdb.DBOptions} instance. This will be * filled and returned. - * @param cfDescs A list of {@link org.rocksdb.ColumnFamilyDescriptor}'s be + * @param cfDescs A list of {@link org.forstdb.ColumnFamilyDescriptor}'s be * returned. * @throws RocksDBException thrown if error happens in underlying * native library. @@ -76,7 +76,7 @@ public static void loadOptionsFromFile(final ConfigOptions configOptions, * Returns the latest options file name under the specified RocksDB path. * * @param dbPath the path to the RocksDB. - * @param env {@link org.rocksdb.Env} instance. + * @param env {@link org.forstdb.Env} instance. * @return the latest options file name under the db path. * * @throws RocksDBException thrown if error happens in underlying diff --git a/java/src/main/java/org/rocksdb/PerfContext.java b/java/src/main/java/org/forstdb/PerfContext.java similarity index 99% rename from java/src/main/java/org/rocksdb/PerfContext.java rename to java/src/main/java/org/forstdb/PerfContext.java index 3934e4115..2b7ba6750 100644 --- a/java/src/main/java/org/rocksdb/PerfContext.java +++ b/java/src/main/java/org/forstdb/PerfContext.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class PerfContext extends RocksObject { protected PerfContext(final long nativeHandle) { diff --git a/java/src/main/java/org/rocksdb/PerfLevel.java b/java/src/main/java/org/forstdb/PerfLevel.java similarity index 98% rename from java/src/main/java/org/rocksdb/PerfLevel.java rename to java/src/main/java/org/forstdb/PerfLevel.java index 332e6d7d9..2d90366e6 100644 --- a/java/src/main/java/org/rocksdb/PerfLevel.java +++ b/java/src/main/java/org/forstdb/PerfLevel.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum PerfLevel { /** diff --git a/java/src/main/java/org/rocksdb/PersistentCache.java b/java/src/main/java/org/forstdb/PersistentCache.java similarity index 97% rename from java/src/main/java/org/rocksdb/PersistentCache.java rename to java/src/main/java/org/forstdb/PersistentCache.java index 5297111e6..a349a9461 100644 --- a/java/src/main/java/org/rocksdb/PersistentCache.java +++ b/java/src/main/java/org/forstdb/PersistentCache.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Persistent cache for caching IO pages on a persistent medium. The diff --git a/java/src/main/java/org/rocksdb/PlainTableConfig.java b/java/src/main/java/org/forstdb/PlainTableConfig.java similarity index 98% rename from java/src/main/java/org/rocksdb/PlainTableConfig.java rename to java/src/main/java/org/forstdb/PlainTableConfig.java index 46077ba56..6c62e589a 100644 --- a/java/src/main/java/org/rocksdb/PlainTableConfig.java +++ b/java/src/main/java/org/forstdb/PlainTableConfig.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The config for plain table sst format. @@ -163,7 +163,7 @@ public int hugePageTlbSize() { * different encoding types can co-exist in the same DB and * can be read.

    * - * @param encodingType {@link org.rocksdb.EncodingType} value. + * @param encodingType {@link org.forstdb.EncodingType} value. * @return the reference to the current config. */ public PlainTableConfig setEncodingType(final EncodingType encodingType) { diff --git a/java/src/main/java/org/rocksdb/PrepopulateBlobCache.java b/java/src/main/java/org/forstdb/PrepopulateBlobCache.java similarity index 99% rename from java/src/main/java/org/rocksdb/PrepopulateBlobCache.java rename to java/src/main/java/org/forstdb/PrepopulateBlobCache.java index f1237aa7c..0a4329289 100644 --- a/java/src/main/java/org/rocksdb/PrepopulateBlobCache.java +++ b/java/src/main/java/org/forstdb/PrepopulateBlobCache.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Enum PrepopulateBlobCache diff --git a/java/src/main/java/org/rocksdb/Priority.java b/java/src/main/java/org/forstdb/Priority.java similarity index 93% rename from java/src/main/java/org/rocksdb/Priority.java rename to java/src/main/java/org/forstdb/Priority.java index 34a56edcb..2077739e8 100644 --- a/java/src/main/java/org/rocksdb/Priority.java +++ b/java/src/main/java/org/forstdb/Priority.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The Thread Pool priority. @@ -34,7 +34,7 @@ byte getValue() { * * @param value byte representation of Priority. * - * @return {@link org.rocksdb.Priority} instance. + * @return {@link org.forstdb.Priority} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/Range.java b/java/src/main/java/org/forstdb/Range.java similarity index 95% rename from java/src/main/java/org/rocksdb/Range.java rename to java/src/main/java/org/forstdb/Range.java index 74c85e5f0..48f32a1ad 100644 --- a/java/src/main/java/org/rocksdb/Range.java +++ b/java/src/main/java/org/forstdb/Range.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Range from start to limit. diff --git a/java/src/main/java/org/rocksdb/RateLimiter.java b/java/src/main/java/org/forstdb/RateLimiter.java similarity index 99% rename from java/src/main/java/org/rocksdb/RateLimiter.java rename to java/src/main/java/org/forstdb/RateLimiter.java index c2b8a0fd9..ace2b9500 100644 --- a/java/src/main/java/org/rocksdb/RateLimiter.java +++ b/java/src/main/java/org/forstdb/RateLimiter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * RateLimiter, which is used to control write rate of flush and diff --git a/java/src/main/java/org/rocksdb/RateLimiterMode.java b/java/src/main/java/org/forstdb/RateLimiterMode.java similarity index 98% rename from java/src/main/java/org/rocksdb/RateLimiterMode.java rename to java/src/main/java/org/forstdb/RateLimiterMode.java index 4b029d816..87615fa10 100644 --- a/java/src/main/java/org/rocksdb/RateLimiterMode.java +++ b/java/src/main/java/org/forstdb/RateLimiterMode.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Mode for {@link RateLimiter#RateLimiter(long, long, int, RateLimiterMode)}. diff --git a/java/src/main/java/org/rocksdb/ReadOptions.java b/java/src/main/java/org/forstdb/ReadOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/ReadOptions.java rename to java/src/main/java/org/forstdb/ReadOptions.java index c444ae167..5da35981c 100644 --- a/java/src/main/java/org/rocksdb/ReadOptions.java +++ b/java/src/main/java/org/forstdb/ReadOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The class that controls the get behavior. diff --git a/java/src/main/java/org/rocksdb/ReadTier.java b/java/src/main/java/org/forstdb/ReadTier.java similarity index 93% rename from java/src/main/java/org/rocksdb/ReadTier.java rename to java/src/main/java/org/forstdb/ReadTier.java index 78f83f6ad..7970918cd 100644 --- a/java/src/main/java/org/rocksdb/ReadTier.java +++ b/java/src/main/java/org/forstdb/ReadTier.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * RocksDB {@link ReadOptions} read tiers. @@ -34,7 +34,7 @@ public byte getValue() { * * @param value byte representation of ReadTier. * - * @return {@link org.rocksdb.ReadTier} instance or null. + * @return {@link org.forstdb.ReadTier} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java b/java/src/main/java/org/forstdb/RemoveEmptyValueCompactionFilter.java similarity index 96% rename from java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java rename to java/src/main/java/org/forstdb/RemoveEmptyValueCompactionFilter.java index e96694313..20e187205 100644 --- a/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java +++ b/java/src/main/java/org/forstdb/RemoveEmptyValueCompactionFilter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Just a Java wrapper around EmptyValueCompactionFilter implemented in C++ diff --git a/java/src/main/java/org/rocksdb/RestoreOptions.java b/java/src/main/java/org/forstdb/RestoreOptions.java similarity index 98% rename from java/src/main/java/org/rocksdb/RestoreOptions.java rename to java/src/main/java/org/forstdb/RestoreOptions.java index a6b43d476..6c60981a6 100644 --- a/java/src/main/java/org/rocksdb/RestoreOptions.java +++ b/java/src/main/java/org/forstdb/RestoreOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * RestoreOptions to control the behavior of restore. diff --git a/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java b/java/src/main/java/org/forstdb/ReusedSynchronisationType.java similarity index 95% rename from java/src/main/java/org/rocksdb/ReusedSynchronisationType.java rename to java/src/main/java/org/forstdb/ReusedSynchronisationType.java index 2709a5d59..a3590fabc 100644 --- a/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java +++ b/java/src/main/java/org/forstdb/ReusedSynchronisationType.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Determines the type of synchronisation primitive used @@ -47,7 +47,7 @@ public byte getValue() { * * @param value byte representation of ReusedSynchronisationType. * - * @return {@link org.rocksdb.ReusedSynchronisationType} instance. + * @return {@link org.forstdb.ReusedSynchronisationType} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/RocksCallbackObject.java b/java/src/main/java/org/forstdb/RocksCallbackObject.java similarity index 99% rename from java/src/main/java/org/rocksdb/RocksCallbackObject.java rename to java/src/main/java/org/forstdb/RocksCallbackObject.java index 2c4547b12..4daf14277 100644 --- a/java/src/main/java/org/rocksdb/RocksCallbackObject.java +++ b/java/src/main/java/org/forstdb/RocksCallbackObject.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/RocksDB.java b/java/src/main/java/org/forstdb/RocksDB.java similarity index 98% rename from java/src/main/java/org/rocksdb/RocksDB.java rename to java/src/main/java/org/forstdb/RocksDB.java index 839d01877..76f74609a 100644 --- a/java/src/main/java/org/rocksdb/RocksDB.java +++ b/java/src/main/java/org/forstdb/RocksDB.java @@ -3,16 +3,16 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.rocksdb.util.BufferUtil.CheckBounds; +import static org.forstdb.util.BufferUtil.CheckBounds; import java.io.IOException; import java.nio.ByteBuffer; import java.util.*; import java.util.concurrent.atomic.AtomicReference; -import org.rocksdb.util.Environment; +import org.forstdb.util.Environment; /** * A RocksDB is a persistent ordered map from keys to values. It is safe for @@ -241,7 +241,7 @@ public static RocksDB open(final String path, * with new Options instance as underlying native statistics instance does not * use any locks to prevent concurrent updates.

    * - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. * @param path the path to the rocksdb. * @return a {@link RocksDB} instance on success, null if the specified * {@link RocksDB} can not be opened. @@ -285,7 +285,7 @@ public static RocksDB open(final Options options, final String path) * ColumnFamily handles are disposed when the RocksDB instance is disposed. *

    * - * @param options {@link org.rocksdb.DBOptions} instance. + * @param options {@link org.forstdb.DBOptions} instance. * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances @@ -697,7 +697,7 @@ public static List listColumnFamilies(final Options options, * The ColumnFamilyHandle is automatically disposed with DB disposal. * * @param columnFamilyDescriptor column family to be created. - * @return {@link org.rocksdb.ColumnFamilyHandle} instance. + * @return {@link org.forstdb.ColumnFamilyHandle} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. @@ -781,7 +781,7 @@ public List createColumnFamilies( * The ColumnFamilyHandle is automatically disposed with DB disposal. * * @param columnFamilyDescriptor column family to be created. - * @return {@link org.rocksdb.ColumnFamilyHandle} instance. + * @return {@link org.forstdb.ColumnFamilyHandle} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. @@ -819,7 +819,7 @@ public ColumnFamilyHandle createColumnFamilyWithImport( * only records a drop record in the manifest and prevents the column * family from flushing and compacting. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * * @throws RocksDBException thrown if error happens in underlying @@ -904,7 +904,7 @@ public void put(final byte[] key, final int offset, final int len, * Set the database entry for "key" to "value" in the specified * column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the specified key to be inserted. * @param value the value associated with the specified key. @@ -924,7 +924,7 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, * Set the database entry for "key" to "value" in the specified * column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must @@ -954,7 +954,7 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, /** * Set the database entry for "key" to "value". * - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param writeOpts {@link org.forstdb.WriteOptions} instance. * @param key the specified key to be inserted. * @param value the value associated with the specified key. * @@ -970,7 +970,7 @@ public void put(final WriteOptions writeOpts, final byte[] key, /** * Set the database entry for "key" to "value". * - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param writeOpts {@link org.forstdb.WriteOptions} instance. * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length @@ -1000,9 +1000,9 @@ public void put(final WriteOptions writeOpts, * Set the database entry for "key" to "value" for the specified * column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param writeOpts {@link org.forstdb.WriteOptions} instance. * @param key the specified key to be inserted. * @param value the value associated with the specified key. *

    @@ -1023,9 +1023,9 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, * Set the database entry for "key" to "value" for the specified * column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param writeOpts {@link org.forstdb.WriteOptions} instance. * @param key the specified key to be inserted. Position and limit is used. * Supports direct buffer only. * @param value the value associated with the specified key. Position and limit is used. @@ -1058,7 +1058,7 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final WriteOptions /** * Set the database entry for "key" to "value". * - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param writeOpts {@link org.forstdb.WriteOptions} instance. * @param key the specified key to be inserted. Position and limit is used. * Supports direct buffer only. * @param value the value associated with the specified key. Position and limit is used. @@ -1092,9 +1092,9 @@ public void put(final WriteOptions writeOpts, final ByteBuffer key, final ByteBu * Set the database entry for "key" to "value" for the specified * column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance - * @param writeOpts {@link org.rocksdb.WriteOptions} instance. + * @param writeOpts {@link org.forstdb.WriteOptions} instance. * @param key The specified key to be inserted * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length @@ -1159,7 +1159,7 @@ public void delete(final byte[] key, final int offset, final int len) * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key Key to delete within database * @@ -1176,7 +1176,7 @@ public void delete(final ColumnFamilyHandle columnFamilyHandle, * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key Key to delete within database * @param offset the offset of the "key" array to be used, @@ -1234,7 +1234,7 @@ public void delete(final WriteOptions writeOpt, final byte[] key, * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database @@ -1254,7 +1254,7 @@ public void delete(final ColumnFamilyHandle columnFamilyHandle, * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database @@ -1276,7 +1276,7 @@ public void delete(final ColumnFamilyHandle columnFamilyHandle, /** * Get the value associated with the specified key within column family. * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. It is using position and limit. * Supports direct buffer only. * @param value the out-value to receive the retrieved value. @@ -1316,9 +1316,9 @@ public int get(final ReadOptions opt, final ByteBuffer key, final ByteBuffer val /** * Get the value associated with the specified key within column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. It is using position and limit. * Supports direct buffer only. * @param value the out-value to receive the retrieved value. @@ -1493,7 +1493,7 @@ public void deleteRange(final byte[] beginKey, final byte[] endKey) * non-OK status on error. It is not an error if "key" did not exist in the * database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance * @param beginKey First key to delete within database (inclusive) * @param endKey Last key to delete within database (exclusive) * @@ -1537,7 +1537,7 @@ public void deleteRange(final WriteOptions writeOpt, final byte[] beginKey, * non-OK status on error. It is not an error if "key" did not exist in the * database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance * @param writeOpt WriteOptions to be used with delete operation * @param beginKey First key to delete within database (included) * @param endKey Last key to delete within database (excluded) @@ -1748,7 +1748,7 @@ public void delete(final WriteOptions writeOpt, final ByteBuffer key) throws Roc * success, and a non-OK status on error. It is not an error if "key" * did not exist in the database. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param writeOpt WriteOptions to be used with delete operation * @param key Key to delete within database. It is using position and limit. @@ -1904,7 +1904,7 @@ public int get(final byte[] key, final int offset, final int len, /** * Get the value associated with the specified key within column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. @@ -1927,7 +1927,7 @@ public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, /** * Get the value associated with the specified key within column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be @@ -1962,7 +1962,7 @@ public int get(final ColumnFamilyHandle columnFamilyHandle, final byte[] key, /** * Get the value associated with the specified key. * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * @return The size of the actual value that matches the specified @@ -1984,7 +1984,7 @@ public int get(final ReadOptions opt, final byte[] key, /** * Get the value associated with the specified key. * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length @@ -2017,9 +2017,9 @@ public int get(final ReadOptions opt, final byte[] key, final int offset, /** * Get the value associated with the specified key within column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * @return The size of the actual value that matches the specified @@ -2042,9 +2042,9 @@ public int get(final ColumnFamilyHandle columnFamilyHandle, /** * Get the value associated with the specified key within column family. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param offset the offset of the "key" array to be used, must be * non-negative and no larger than "key".length @@ -2118,7 +2118,7 @@ public byte[] get(final byte[] key, final int offset, * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @return a byte array storing the value associated with the input key if @@ -2138,7 +2138,7 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @param offset the offset of the "key" array to be used, must be @@ -2205,7 +2205,7 @@ public byte[] get(final ReadOptions opt, final byte[] key, final int offset, * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @param opt Read options. @@ -2226,7 +2226,7 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, * the value associated with the specified input key if any. null will be * returned if the specified key is not found. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key retrieve the value. * @param offset the offset of the "key" array to be used, must be @@ -2283,7 +2283,7 @@ public List multiGetAsList(final List keys) *

    * * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys List of keys for which values need to be retrieved. * @return List of values for the given list of keys. List will contain * null for keys which could not be found. @@ -2357,7 +2357,7 @@ public List multiGetAsList(final ReadOptions opt, * * @param opt Read options. * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * @return List of values for the given list of keys. List will contain * null for keys which could not be found. @@ -2440,7 +2440,7 @@ public List multiGetByteBuffers(final ReadOptions readOptio *

    * * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys list of keys for which values need to be retrieved. * @param values list of buffers to return retrieved values in * @throws RocksDBException if error happens in underlying native library. @@ -2465,7 +2465,7 @@ public List multiGetByteBuffers( * * @param readOptions Read options * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys list of keys for which values need to be retrieved. * @param values list of buffers to return retrieved values in * @throws RocksDBException if error happens in underlying native library. @@ -3272,7 +3272,7 @@ public RocksIterator newIterator(final ReadOptions readOptions) { * The returned iterator should be closed before this db is closed. *

    * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @return instance of iterator object. */ @@ -3293,7 +3293,7 @@ public RocksIterator newIterator( * The returned iterator should be closed before this db is closed. *

    * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param readOptions {@link ReadOptions} instance. * @return instance of iterator object. @@ -3310,8 +3310,8 @@ public RocksIterator newIterator(final ColumnFamilyHandle columnFamilyHandle, * before the db is deleted * * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. - * @return {@link java.util.List} containing {@link org.rocksdb.RocksIterator} + * {@link org.forstdb.ColumnFamilyHandle} instances. + * @return {@link java.util.List} containing {@link org.forstdb.RocksIterator} * instances * * @throws RocksDBException thrown if error happens in underlying @@ -3329,9 +3329,9 @@ public List newIterators( * before the db is deleted * * @param columnFamilyHandleList {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param readOptions {@link ReadOptions} instance. - * @return {@link java.util.List} containing {@link org.rocksdb.RocksIterator} + * @return {@link java.util.List} containing {@link org.forstdb.RocksIterator} * instances * * @throws RocksDBException thrown if error happens in underlying @@ -3409,7 +3409,7 @@ public void releaseSnapshot(final Snapshot snapshot) { * of the sstables that make up the db contents. * * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param property to be fetched. See above for examples * @return property value @@ -3470,7 +3470,7 @@ public Map getMapProperty(final String property) /** * Gets a property map. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param property to be fetched. * @@ -3528,7 +3528,7 @@ public long getLongProperty(final String property) throws RocksDBException { *

    Java 8: In Java 8 the value should be treated as * unsigned long using provided methods of type {@link Long}.

    * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family * @param property to be fetched. * @@ -3594,7 +3594,7 @@ public long getAggregatedLongProperty(final String property) * should include the recently written data in the mem-tables (if * the mem-table type supports it), data serialized to disk, or both. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family * @param ranges the ranges over which to approximate sizes * @param sizeApproximationFlags flags to determine what to include in the @@ -3656,7 +3656,7 @@ public CountAndSize(final long count, final long size) { * {@link #getApproximateSizes(ColumnFamilyHandle, List, SizeApproximationFlag...)}, * except that it returns approximate number of records and size in memtables. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family * @param range the ranges over which to get the memtable stats * @@ -3717,7 +3717,7 @@ public void compactRange() throws RocksDBException { * * * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family. * * @throws RocksDBException thrown if an error occurs within the native @@ -3763,7 +3763,7 @@ public void compactRange(final byte[] begin, final byte[] end) *
  • {@link #compactRange(ColumnFamilyHandle)}
  • * * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param begin start of key range (included in range) * @param end end of key range (excluded from range) @@ -3786,7 +3786,7 @@ public void compactRange( * all data will have been pushed down to the last level containing * any data.

    * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance. + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance. * @param begin start of key range (included in range) * @param end end of key range (excluded from range) * @param compactRangeOptions options for the compaction @@ -3811,7 +3811,7 @@ public void compactRange( * Any entries outside this range will be completely deleted (including * tombstones). * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance * @param beginKey First key to clip within database (inclusive) * @param endKey Last key to clip within database (exclusive) * @@ -3828,7 +3828,7 @@ public void clipColumnFamily(final ColumnFamilyHandle columnFamilyHandle, final /** * Change the options for the column family handle. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family. * @param mutableColumnFamilyOptions the options. * @@ -3879,7 +3879,7 @@ public PerfContext getPerfContext() { /** * Get the options for the column family handle * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance, or null for the default column family. * * @return the options parsed from the options string return by RocksDB @@ -4175,7 +4175,7 @@ public Env getEnv() { * is not GC'ed before this method finishes. If the wait parameter is * set to false, flush processing is asynchronous.

    * - * @param flushOptions {@link org.rocksdb.FlushOptions} instance. + * @param flushOptions {@link org.forstdb.FlushOptions} instance. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ @@ -4191,8 +4191,8 @@ public void flush(final FlushOptions flushOptions) * is not GC'ed before this method finishes. If the wait parameter is * set to false, flush processing is asynchronous.

    * - * @param flushOptions {@link org.rocksdb.FlushOptions} instance. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance. + * @param flushOptions {@link org.forstdb.FlushOptions} instance. + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance. * @throws RocksDBException thrown if an error occurs within the native * part of the library. */ @@ -4213,7 +4213,7 @@ public void flush(final FlushOptions flushOptions, * specified up to the latest sequence number at the time when flush is * requested. * - * @param flushOptions {@link org.rocksdb.FlushOptions} instance. + * @param flushOptions {@link org.forstdb.FlushOptions} instance. * @param columnFamilyHandles column family handles. * @throws RocksDBException thrown if an error occurs within the native * part of the library. @@ -4394,9 +4394,9 @@ public List getSortedWalFiles() throws RocksDBException { * * @param sequenceNumber sequence number offset * - * @return {@link org.rocksdb.TransactionLogIterator} instance. + * @return {@link org.forstdb.TransactionLogIterator} instance. * - * @throws org.rocksdb.RocksDBException if iterator cannot be retrieved + * @throws org.forstdb.RocksDBException if iterator cannot be retrieved * from native-side. */ public TransactionLogIterator getUpdatesSince(final long sequenceNumber) @@ -4737,7 +4737,7 @@ public void deleteFilesInRanges(final ColumnFamilyHandle columnFamily, final Lis * Be very careful using this method. * * @param path the path to the Rocksdb database. - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. diff --git a/java/src/main/java/org/rocksdb/RocksDBException.java b/java/src/main/java/org/forstdb/RocksDBException.java similarity index 98% rename from java/src/main/java/org/rocksdb/RocksDBException.java rename to java/src/main/java/org/forstdb/RocksDBException.java index 9df411d12..c1f698d5a 100644 --- a/java/src/main/java/org/rocksdb/RocksDBException.java +++ b/java/src/main/java/org/forstdb/RocksDBException.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * A RocksDBException encapsulates the error of an operation. This exception diff --git a/java/src/main/java/org/rocksdb/RocksEnv.java b/java/src/main/java/org/forstdb/RocksEnv.java similarity index 98% rename from java/src/main/java/org/rocksdb/RocksEnv.java rename to java/src/main/java/org/forstdb/RocksEnv.java index ca010c9f9..5d6d1a639 100644 --- a/java/src/main/java/org/rocksdb/RocksEnv.java +++ b/java/src/main/java/org/forstdb/RocksEnv.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** *

    A RocksEnv is an interface used by the rocksdb implementation to access diff --git a/java/src/main/java/org/rocksdb/RocksIterator.java b/java/src/main/java/org/forstdb/RocksIterator.java similarity index 98% rename from java/src/main/java/org/rocksdb/RocksIterator.java rename to java/src/main/java/org/forstdb/RocksIterator.java index b35dea2af..8127ff157 100644 --- a/java/src/main/java/org/rocksdb/RocksIterator.java +++ b/java/src/main/java/org/forstdb/RocksIterator.java @@ -3,9 +3,9 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; -import static org.rocksdb.util.BufferUtil.CheckBounds; +import static org.forstdb.util.BufferUtil.CheckBounds; import java.nio.ByteBuffer; @@ -20,7 +20,7 @@ * non-const method, all threads accessing the same RocksIterator must use * external synchronization.

    * - * @see org.rocksdb.RocksObject + * @see org.forstdb.RocksObject */ public class RocksIterator extends AbstractRocksIterator { protected RocksIterator(final RocksDB rocksDB, final long nativeHandle) { diff --git a/java/src/main/java/org/rocksdb/RocksIteratorInterface.java b/java/src/main/java/org/forstdb/RocksIteratorInterface.java similarity index 98% rename from java/src/main/java/org/rocksdb/RocksIteratorInterface.java rename to java/src/main/java/org/forstdb/RocksIteratorInterface.java index 819c21c2c..9d344c22b 100644 --- a/java/src/main/java/org/rocksdb/RocksIteratorInterface.java +++ b/java/src/main/java/org/forstdb/RocksIteratorInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -18,7 +18,7 @@ * non-const method, all threads accessing the same RocksIterator must use * external synchronization.

    * - * @see org.rocksdb.RocksObject + * @see org.forstdb.RocksObject */ public interface RocksIteratorInterface { diff --git a/java/src/main/java/org/rocksdb/RocksMemEnv.java b/java/src/main/java/org/forstdb/RocksMemEnv.java similarity index 97% rename from java/src/main/java/org/rocksdb/RocksMemEnv.java rename to java/src/main/java/org/forstdb/RocksMemEnv.java index 39a6f6e1c..05db05900 100644 --- a/java/src/main/java/org/rocksdb/RocksMemEnv.java +++ b/java/src/main/java/org/forstdb/RocksMemEnv.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Memory environment. diff --git a/java/src/main/java/org/rocksdb/RocksMutableObject.java b/java/src/main/java/org/forstdb/RocksMutableObject.java similarity index 99% rename from java/src/main/java/org/rocksdb/RocksMutableObject.java rename to java/src/main/java/org/forstdb/RocksMutableObject.java index eb3215290..7840cc14b 100644 --- a/java/src/main/java/org/rocksdb/RocksMutableObject.java +++ b/java/src/main/java/org/forstdb/RocksMutableObject.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * RocksMutableObject is an implementation of {@link AbstractNativeReference} diff --git a/java/src/main/java/org/rocksdb/RocksObject.java b/java/src/main/java/org/forstdb/RocksObject.java similarity index 98% rename from java/src/main/java/org/rocksdb/RocksObject.java rename to java/src/main/java/org/forstdb/RocksObject.java index f07e1018a..7abd061fa 100644 --- a/java/src/main/java/org/rocksdb/RocksObject.java +++ b/java/src/main/java/org/forstdb/RocksObject.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * RocksObject is an implementation of {@link AbstractNativeReference} which diff --git a/java/src/main/java/org/rocksdb/SanityLevel.java b/java/src/main/java/org/forstdb/SanityLevel.java similarity index 98% rename from java/src/main/java/org/rocksdb/SanityLevel.java rename to java/src/main/java/org/forstdb/SanityLevel.java index 30568c363..4487d7d59 100644 --- a/java/src/main/java/org/rocksdb/SanityLevel.java +++ b/java/src/main/java/org/forstdb/SanityLevel.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum SanityLevel { NONE((byte) 0x0), diff --git a/java/src/main/java/org/rocksdb/SizeApproximationFlag.java b/java/src/main/java/org/forstdb/SizeApproximationFlag.java similarity index 96% rename from java/src/main/java/org/rocksdb/SizeApproximationFlag.java rename to java/src/main/java/org/forstdb/SizeApproximationFlag.java index fe3c2dd05..c39824925 100644 --- a/java/src/main/java/org/rocksdb/SizeApproximationFlag.java +++ b/java/src/main/java/org/forstdb/SizeApproximationFlag.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import java.util.List; diff --git a/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java b/java/src/main/java/org/forstdb/SkipListMemTableConfig.java similarity index 98% rename from java/src/main/java/org/rocksdb/SkipListMemTableConfig.java rename to java/src/main/java/org/forstdb/SkipListMemTableConfig.java index e2c1b97d8..5e6f7090c 100644 --- a/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java +++ b/java/src/main/java/org/forstdb/SkipListMemTableConfig.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * The config for skip-list memtable representation. diff --git a/java/src/main/java/org/rocksdb/Slice.java b/java/src/main/java/org/forstdb/Slice.java similarity index 96% rename from java/src/main/java/org/rocksdb/Slice.java rename to java/src/main/java/org/forstdb/Slice.java index 6a01374d6..386ab4740 100644 --- a/java/src/main/java/org/rocksdb/Slice.java +++ b/java/src/main/java/org/forstdb/Slice.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** *

    Base class for slices which will receive @@ -11,7 +11,7 @@ * *

    byte[] backed slices typically perform better with * small keys and values. When using larger keys and - * values consider using {@link org.rocksdb.DirectSlice}

    + * values consider using {@link org.forstdb.DirectSlice}

    */ public class Slice extends AbstractSlice { @@ -27,7 +27,7 @@ public class Slice extends AbstractSlice { * at creation time.

    * *

    Note: You should be aware that - * {@see org.rocksdb.RocksObject#disOwnNativeHandle()} is intentionally + * {@see org.forstdb.RocksObject#disOwnNativeHandle()} is intentionally * called from the default Slice constructor, and that it is marked as * private. This is so that developers cannot construct their own default * Slice objects (at present). As developers cannot construct their own diff --git a/java/src/main/java/org/rocksdb/Snapshot.java b/java/src/main/java/org/forstdb/Snapshot.java similarity index 98% rename from java/src/main/java/org/rocksdb/Snapshot.java rename to java/src/main/java/org/forstdb/Snapshot.java index 1f471bd31..af5bb1ef8 100644 --- a/java/src/main/java/org/rocksdb/Snapshot.java +++ b/java/src/main/java/org/forstdb/Snapshot.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Snapshot of database diff --git a/java/src/main/java/org/rocksdb/SstFileManager.java b/java/src/main/java/org/forstdb/SstFileManager.java similarity index 99% rename from java/src/main/java/org/rocksdb/SstFileManager.java rename to java/src/main/java/org/forstdb/SstFileManager.java index 0b9a60061..ad51d753f 100644 --- a/java/src/main/java/org/rocksdb/SstFileManager.java +++ b/java/src/main/java/org/forstdb/SstFileManager.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Map; diff --git a/java/src/main/java/org/rocksdb/SstFileMetaData.java b/java/src/main/java/org/forstdb/SstFileMetaData.java similarity index 99% rename from java/src/main/java/org/rocksdb/SstFileMetaData.java rename to java/src/main/java/org/forstdb/SstFileMetaData.java index 6025d0b42..4fa210a9a 100644 --- a/java/src/main/java/org/rocksdb/SstFileMetaData.java +++ b/java/src/main/java/org/forstdb/SstFileMetaData.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The metadata that describes a SST file. diff --git a/java/src/main/java/org/rocksdb/SstFileReader.java b/java/src/main/java/org/forstdb/SstFileReader.java similarity index 99% rename from java/src/main/java/org/rocksdb/SstFileReader.java rename to java/src/main/java/org/forstdb/SstFileReader.java index 939d39375..2134f3d24 100644 --- a/java/src/main/java/org/rocksdb/SstFileReader.java +++ b/java/src/main/java/org/forstdb/SstFileReader.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class SstFileReader extends RocksObject { public SstFileReader(final Options options) { diff --git a/java/src/main/java/org/rocksdb/SstFileReaderIterator.java b/java/src/main/java/org/forstdb/SstFileReaderIterator.java similarity index 99% rename from java/src/main/java/org/rocksdb/SstFileReaderIterator.java rename to java/src/main/java/org/forstdb/SstFileReaderIterator.java index a4a08167b..c1eac960d 100644 --- a/java/src/main/java/org/rocksdb/SstFileReaderIterator.java +++ b/java/src/main/java/org/forstdb/SstFileReaderIterator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/SstFileWriter.java b/java/src/main/java/org/forstdb/SstFileWriter.java similarity index 98% rename from java/src/main/java/org/rocksdb/SstFileWriter.java rename to java/src/main/java/org/forstdb/SstFileWriter.java index d5766bffb..8e87b55ed 100644 --- a/java/src/main/java/org/rocksdb/SstFileWriter.java +++ b/java/src/main/java/org/forstdb/SstFileWriter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -16,8 +16,8 @@ public class SstFileWriter extends RocksObject { /** * SstFileWriter Constructor. * - * @param envOptions {@link org.rocksdb.EnvOptions} instance. - * @param options {@link org.rocksdb.Options} instance. + * @param envOptions {@link org.forstdb.EnvOptions} instance. + * @param options {@link org.forstdb.Options} instance. */ public SstFileWriter(final EnvOptions envOptions, final Options options) { super(newSstFileWriter( diff --git a/java/src/main/java/org/rocksdb/SstPartitionerFactory.java b/java/src/main/java/org/forstdb/SstPartitionerFactory.java similarity index 96% rename from java/src/main/java/org/rocksdb/SstPartitionerFactory.java rename to java/src/main/java/org/forstdb/SstPartitionerFactory.java index ea6f13565..9fa9e32a5 100644 --- a/java/src/main/java/org/rocksdb/SstPartitionerFactory.java +++ b/java/src/main/java/org/forstdb/SstPartitionerFactory.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Handle to factory for SstPartitioner. It is used in {@link ColumnFamilyOptions} diff --git a/java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java b/java/src/main/java/org/forstdb/SstPartitionerFixedPrefixFactory.java similarity index 97% rename from java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java rename to java/src/main/java/org/forstdb/SstPartitionerFixedPrefixFactory.java index b1ccf08c1..c86eda32b 100644 --- a/java/src/main/java/org/rocksdb/SstPartitionerFixedPrefixFactory.java +++ b/java/src/main/java/org/forstdb/SstPartitionerFixedPrefixFactory.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Fixed prefix factory. It partitions SST files using fixed prefix of the key. diff --git a/java/src/main/java/org/rocksdb/StateType.java b/java/src/main/java/org/forstdb/StateType.java similarity index 98% rename from java/src/main/java/org/rocksdb/StateType.java rename to java/src/main/java/org/forstdb/StateType.java index 803fa37d9..2e81a1b73 100644 --- a/java/src/main/java/org/rocksdb/StateType.java +++ b/java/src/main/java/org/forstdb/StateType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The type used to refer to a thread state. diff --git a/java/src/main/java/org/rocksdb/Statistics.java b/java/src/main/java/org/forstdb/Statistics.java similarity index 99% rename from java/src/main/java/org/rocksdb/Statistics.java rename to java/src/main/java/org/forstdb/Statistics.java index 09e08ee56..33bfd2e21 100644 --- a/java/src/main/java/org/rocksdb/Statistics.java +++ b/java/src/main/java/org/forstdb/Statistics.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.EnumSet; diff --git a/java/src/main/java/org/rocksdb/StatisticsCollector.java b/java/src/main/java/org/forstdb/StatisticsCollector.java similarity index 99% rename from java/src/main/java/org/rocksdb/StatisticsCollector.java rename to java/src/main/java/org/forstdb/StatisticsCollector.java index dd0d98fe5..8b698188c 100644 --- a/java/src/main/java/org/rocksdb/StatisticsCollector.java +++ b/java/src/main/java/org/forstdb/StatisticsCollector.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; import java.util.concurrent.Executors; diff --git a/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java b/java/src/main/java/org/forstdb/StatisticsCollectorCallback.java similarity index 98% rename from java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java rename to java/src/main/java/org/forstdb/StatisticsCollectorCallback.java index bed7828e0..8504b06ac 100644 --- a/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java +++ b/java/src/main/java/org/forstdb/StatisticsCollectorCallback.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Callback interface provided to StatisticsCollector. diff --git a/java/src/main/java/org/rocksdb/StatsCollectorInput.java b/java/src/main/java/org/forstdb/StatsCollectorInput.java similarity index 97% rename from java/src/main/java/org/rocksdb/StatsCollectorInput.java rename to java/src/main/java/org/forstdb/StatsCollectorInput.java index 5bf43ade5..331957064 100644 --- a/java/src/main/java/org/rocksdb/StatsCollectorInput.java +++ b/java/src/main/java/org/forstdb/StatsCollectorInput.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Contains all information necessary to collect statistics from one instance diff --git a/java/src/main/java/org/rocksdb/StatsLevel.java b/java/src/main/java/org/forstdb/StatsLevel.java similarity index 95% rename from java/src/main/java/org/rocksdb/StatsLevel.java rename to java/src/main/java/org/forstdb/StatsLevel.java index 8190e503a..8ce3910bf 100644 --- a/java/src/main/java/org/rocksdb/StatsLevel.java +++ b/java/src/main/java/org/forstdb/StatsLevel.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The level of Statistics to report. @@ -49,7 +49,7 @@ public byte getValue() { * * @param value byte representation of StatsLevel. * - * @return {@link org.rocksdb.StatsLevel} instance. + * @return {@link org.forstdb.StatsLevel} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/Status.java b/java/src/main/java/org/forstdb/Status.java similarity index 99% rename from java/src/main/java/org/rocksdb/Status.java rename to java/src/main/java/org/forstdb/Status.java index 5f751f422..db7223ee3 100644 --- a/java/src/main/java/org/rocksdb/Status.java +++ b/java/src/main/java/org/forstdb/Status.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.io.Serializable; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/StringAppendOperator.java b/java/src/main/java/org/forstdb/StringAppendOperator.java similarity index 97% rename from java/src/main/java/org/rocksdb/StringAppendOperator.java rename to java/src/main/java/org/forstdb/StringAppendOperator.java index 547371e7c..befb215c1 100644 --- a/java/src/main/java/org/rocksdb/StringAppendOperator.java +++ b/java/src/main/java/org/forstdb/StringAppendOperator.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * StringAppendOperator is a merge operator that concatenates diff --git a/java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java b/java/src/main/java/org/forstdb/TableFileCreationBriefInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java rename to java/src/main/java/org/forstdb/TableFileCreationBriefInfo.java index 8dc56796a..5246a868c 100644 --- a/java/src/main/java/org/rocksdb/TableFileCreationBriefInfo.java +++ b/java/src/main/java/org/forstdb/TableFileCreationBriefInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/TableFileCreationInfo.java b/java/src/main/java/org/forstdb/TableFileCreationInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/TableFileCreationInfo.java rename to java/src/main/java/org/forstdb/TableFileCreationInfo.java index 5654603c3..f9c3c368e 100644 --- a/java/src/main/java/org/rocksdb/TableFileCreationInfo.java +++ b/java/src/main/java/org/forstdb/TableFileCreationInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/TableFileCreationReason.java b/java/src/main/java/org/forstdb/TableFileCreationReason.java similarity index 98% rename from java/src/main/java/org/rocksdb/TableFileCreationReason.java rename to java/src/main/java/org/forstdb/TableFileCreationReason.java index d3984663d..13cfb832f 100644 --- a/java/src/main/java/org/rocksdb/TableFileCreationReason.java +++ b/java/src/main/java/org/forstdb/TableFileCreationReason.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum TableFileCreationReason { FLUSH((byte) 0x00), diff --git a/java/src/main/java/org/rocksdb/TableFileDeletionInfo.java b/java/src/main/java/org/forstdb/TableFileDeletionInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/TableFileDeletionInfo.java rename to java/src/main/java/org/forstdb/TableFileDeletionInfo.java index 9a777e333..61a3fdba6 100644 --- a/java/src/main/java/org/rocksdb/TableFileDeletionInfo.java +++ b/java/src/main/java/org/forstdb/TableFileDeletionInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/TableFilter.java b/java/src/main/java/org/forstdb/TableFilter.java similarity index 97% rename from java/src/main/java/org/rocksdb/TableFilter.java rename to java/src/main/java/org/forstdb/TableFilter.java index a39a329fb..0b4e8b400 100644 --- a/java/src/main/java/org/rocksdb/TableFilter.java +++ b/java/src/main/java/org/forstdb/TableFilter.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * Filter for iterating a table. diff --git a/java/src/main/java/org/rocksdb/TableFormatConfig.java b/java/src/main/java/org/forstdb/TableFormatConfig.java similarity index 97% rename from java/src/main/java/org/rocksdb/TableFormatConfig.java rename to java/src/main/java/org/forstdb/TableFormatConfig.java index 726c6f122..891b3cb72 100644 --- a/java/src/main/java/org/rocksdb/TableFormatConfig.java +++ b/java/src/main/java/org/forstdb/TableFormatConfig.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * TableFormatConfig is used to config the internal Table format of a RocksDB. diff --git a/java/src/main/java/org/rocksdb/TableProperties.java b/java/src/main/java/org/forstdb/TableProperties.java similarity index 99% rename from java/src/main/java/org/rocksdb/TableProperties.java rename to java/src/main/java/org/forstdb/TableProperties.java index 7fb1bcc77..4243ce9fe 100644 --- a/java/src/main/java/org/rocksdb/TableProperties.java +++ b/java/src/main/java/org/forstdb/TableProperties.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.Map; diff --git a/java/src/main/java/org/rocksdb/ThreadStatus.java b/java/src/main/java/org/forstdb/ThreadStatus.java similarity index 99% rename from java/src/main/java/org/rocksdb/ThreadStatus.java rename to java/src/main/java/org/forstdb/ThreadStatus.java index 4211453d1..f1a9e5c98 100644 --- a/java/src/main/java/org/rocksdb/ThreadStatus.java +++ b/java/src/main/java/org/forstdb/ThreadStatus.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Map; diff --git a/java/src/main/java/org/rocksdb/ThreadType.java b/java/src/main/java/org/forstdb/ThreadType.java similarity index 98% rename from java/src/main/java/org/rocksdb/ThreadType.java rename to java/src/main/java/org/forstdb/ThreadType.java index cc329f442..4f324c338 100644 --- a/java/src/main/java/org/rocksdb/ThreadType.java +++ b/java/src/main/java/org/forstdb/ThreadType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The type of a thread. diff --git a/java/src/main/java/org/rocksdb/TickerType.java b/java/src/main/java/org/forstdb/TickerType.java similarity index 98% rename from java/src/main/java/org/rocksdb/TickerType.java rename to java/src/main/java/org/forstdb/TickerType.java index f2ca42776..aef29e31a 100644 --- a/java/src/main/java/org/rocksdb/TickerType.java +++ b/java/src/main/java/org/forstdb/TickerType.java @@ -3,16 +3,16 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The logical mapping of tickers defined in rocksdb::Tickers. *

    * Java byte value mappings don't align 1:1 to the c++ values. c++ rocksdb::Tickers enumeration type - * is uint32_t and java org.rocksdb.TickerType is byte, this causes mapping issues when + * is uint32_t and java org.forstdb.TickerType is byte, this causes mapping issues when * rocksdb::Tickers value is greater then 127 (0x7F) for jbyte jni interface as range greater is not * available. Without breaking interface in minor versions, value mappings for - * org.rocksdb.TickerType leverage full byte range [-128 (-0x80), (0x7F)]. Newer tickers added + * org.forstdb.TickerType leverage full byte range [-128 (-0x80), (0x7F)]. Newer tickers added * should descend into negative values until TICKER_ENUM_MAX reaches -128 (-0x80). */ public enum TickerType { @@ -798,7 +798,7 @@ public byte getValue() { * * @param value byte representation of TickerType. * - * @return {@link org.rocksdb.TickerType} instance. + * @return {@link org.forstdb.TickerType} instance. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/TimedEnv.java b/java/src/main/java/org/forstdb/TimedEnv.java similarity index 97% rename from java/src/main/java/org/rocksdb/TimedEnv.java rename to java/src/main/java/org/forstdb/TimedEnv.java index dc8b5d6ef..d8d703db7 100644 --- a/java/src/main/java/org/rocksdb/TimedEnv.java +++ b/java/src/main/java/org/forstdb/TimedEnv.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Timed environment. diff --git a/java/src/main/java/org/rocksdb/TraceOptions.java b/java/src/main/java/org/forstdb/TraceOptions.java similarity index 97% rename from java/src/main/java/org/rocksdb/TraceOptions.java rename to java/src/main/java/org/forstdb/TraceOptions.java index cf5f7bbe1..45eb80624 100644 --- a/java/src/main/java/org/rocksdb/TraceOptions.java +++ b/java/src/main/java/org/forstdb/TraceOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * TraceOptions is used for diff --git a/java/src/main/java/org/rocksdb/TraceWriter.java b/java/src/main/java/org/forstdb/TraceWriter.java similarity index 97% rename from java/src/main/java/org/rocksdb/TraceWriter.java rename to java/src/main/java/org/forstdb/TraceWriter.java index cb0234e9b..baaa640c9 100644 --- a/java/src/main/java/org/rocksdb/TraceWriter.java +++ b/java/src/main/java/org/forstdb/TraceWriter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * TraceWriter allows exporting RocksDB traces to any system, diff --git a/java/src/main/java/org/rocksdb/Transaction.java b/java/src/main/java/org/forstdb/Transaction.java similarity index 98% rename from java/src/main/java/org/rocksdb/Transaction.java rename to java/src/main/java/org/forstdb/Transaction.java index cab7ed287..f5bc2de3c 100644 --- a/java/src/main/java/org/rocksdb/Transaction.java +++ b/java/src/main/java/org/forstdb/Transaction.java @@ -3,9 +3,9 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; -import static org.rocksdb.RocksDB.PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD; +import static org.forstdb.RocksDB.PERFORMANCE_OPTIMIZATION_FOR_A_VERY_SPECIFIC_WORKLOAD; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -19,8 +19,8 @@ * {@link OptimisticTransactionDB} or a {@link TransactionDB} * * To create a transaction, use - * {@link OptimisticTransactionDB#beginTransaction(org.rocksdb.WriteOptions)} or - * {@link TransactionDB#beginTransaction(org.rocksdb.WriteOptions)} + * {@link OptimisticTransactionDB#beginTransaction(org.forstdb.WriteOptions)} or + * {@link TransactionDB#beginTransaction(org.forstdb.WriteOptions)} * * It is up to the caller to synchronize access to this object. *

    @@ -40,8 +40,8 @@ public class Transaction extends RocksObject { /** * Intentionally package private * as this is called from - * {@link OptimisticTransactionDB#beginTransaction(org.rocksdb.WriteOptions)} - * or {@link TransactionDB#beginTransaction(org.rocksdb.WriteOptions)} + * {@link OptimisticTransactionDB#beginTransaction(org.forstdb.WriteOptions)} + * or {@link TransactionDB#beginTransaction(org.forstdb.WriteOptions)} * * @param parent This must be either {@link TransactionDB} or * {@link OptimisticTransactionDB} @@ -273,7 +273,7 @@ public void rollbackToSavePoint() throws RocksDBException { * transaction (the keys in this transaction do not yet belong to any snapshot * and will be fetched regardless). * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance * @param readOptions Read options. * @param key the key to retrieve the value for. * @@ -308,7 +308,7 @@ public byte[] get(final ColumnFamilyHandle columnFamilyHandle, final ReadOptions * and will be fetched regardless). * * @param readOptions Read options. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} instance + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} instance * @param key the key to retrieve the value for. * * @return a byte array storing the value associated with the input key if @@ -359,7 +359,7 @@ public byte[] get(final ReadOptions readOptions, final byte[] key) /** * Get the value associated with the specified key in the default column family * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. * @return A {@link GetStatus} wrapping the result status and the return value size. @@ -388,7 +388,7 @@ public GetStatus get(final ReadOptions opt, final byte[] key, final byte[] value /** * Get the value associated with the specified key in a specified column family * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param columnFamilyHandle the column family to find the key in * @param key the key to retrieve the value. * @param value the out-value to receive the retrieved value. @@ -418,7 +418,7 @@ public GetStatus get(final ReadOptions opt, final ColumnFamilyHandle columnFamil /** * Get the value associated with the specified key within the specified column family. * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param columnFamilyHandle the column family in which to find the key. * @param key the key to retrieve the value. It is using position and limit. * Supports direct buffer only. @@ -466,7 +466,7 @@ public GetStatus get(final ReadOptions opt, final ColumnFamilyHandle columnFamil /** * Get the value associated with the specified key within the default column family. * - * @param opt {@link org.rocksdb.ReadOptions} instance. + * @param opt {@link org.forstdb.ReadOptions} instance. * @param key the key to retrieve the value. It is using position and limit. * Supports direct buffer only. * @param value the out-value to receive the retrieved value. @@ -507,7 +507,7 @@ public GetStatus get(final ReadOptions opt, final ByteBuffer key, final ByteBuff * * @param readOptions Read options. * @param columnFamilyHandles {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * * @return Array of values, one for each key @@ -557,7 +557,7 @@ public byte[][] multiGet(final ReadOptions readOptions, * * @param readOptions Read options. * @param columnFamilyHandles {@link java.util.List} containing - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * * @return Array of values, one for each key @@ -606,7 +606,7 @@ public List multiGetAsList(final ReadOptions readOptions, * and will be fetched regardless). * * @param readOptions Read options.= - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * * @return Array of values, one for each key @@ -643,7 +643,7 @@ public byte[][] multiGet(final ReadOptions readOptions, final byte[][] keys) * and will be fetched regardless). * * @param readOptions Read options.= - * {@link org.rocksdb.ColumnFamilyHandle} instances. + * {@link org.forstdb.ColumnFamilyHandle} instances. * @param keys of keys for which values need to be retrieved. * * @return Array of values, one for each key @@ -695,7 +695,7 @@ public List multiGetAsList(final ReadOptions readOptions, final List * * @param readOptions Read options. - * @param columnFamilyHandles {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandles {@link org.forstdb.ColumnFamilyHandle} * instances * @param keys the keys to retrieve the values for. * @@ -1196,7 +1196,7 @@ public byte[][] multiGetForUpdate(final ReadOptions readOptions, *

    * * @param readOptions Read options. - * @param columnFamilyHandles {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandles {@link org.forstdb.ColumnFamilyHandle} * instances * @param keys the keys to retrieve the values for. * @@ -1339,7 +1339,7 @@ public RocksIterator getIterator(final ReadOptions readOptions) { * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. * * @param readOptions Read options. - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * * @return instance of iterator object. @@ -1367,7 +1367,7 @@ public RocksIterator getIterator(final ReadOptions readOptions, * The returned iterator is only valid until {@link #commit()}, * {@link #rollback()}, or {@link #rollbackToSavePoint()} is called. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * * @return instance of iterator object. @@ -1475,7 +1475,7 @@ public void put(final byte[] key, final byte[] value) put(nativeHandle_, key, 0, key.length, value, 0, value.length); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #put(ColumnFamilyHandle, byte[], byte[])} but allows * you to specify the key and value in several parts that will be @@ -1613,7 +1613,7 @@ public void put(final ColumnFamilyHandle columnFamilyHandle, final ByteBuffer ke put(columnFamilyHandle, key, value, false); } - // TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + // TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #put(byte[], byte[])} but allows * you to specify the key and value in several parts that will be @@ -1927,7 +1927,7 @@ public void delete(final byte[] key) throws RocksDBException { delete(nativeHandle_, key, key.length); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #delete(ColumnFamilyHandle, byte[])} but allows * you to specify the key in several parts that will be @@ -1972,7 +1972,7 @@ public void delete(final ColumnFamilyHandle columnFamilyHandle, columnFamilyHandle.nativeHandle_, false); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #delete(byte[])} but allows * you to specify key the in several parts that will be @@ -2082,7 +2082,7 @@ public void singleDelete(final byte[] key) throws RocksDBException { singleDelete(nativeHandle_, key, key.length); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #singleDelete(ColumnFamilyHandle, byte[])} but allows * you to specify the key in several parts that will be @@ -2128,7 +2128,7 @@ public void singleDelete(final ColumnFamilyHandle columnFamilyHandle, final byte columnFamilyHandle.nativeHandle_, false); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #singleDelete(byte[])} but allows * you to specify the key in several parts that will be @@ -2197,7 +2197,7 @@ public void putUntracked(final byte[] key, final byte[] value) putUntracked(nativeHandle_, key, key.length, value, value.length); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #putUntracked(ColumnFamilyHandle, byte[], byte[])} but * allows you to specify the key and value in several parts that will be @@ -2218,7 +2218,7 @@ public void putUntracked(final ColumnFamilyHandle columnFamilyHandle, valueParts.length, columnFamilyHandle.nativeHandle_); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #putUntracked(byte[], byte[])} but * allows you to specify the key and value in several parts that will be @@ -2399,7 +2399,7 @@ public void deleteUntracked(final byte[] key) throws RocksDBException { deleteUntracked(nativeHandle_, key, key.length); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #deleteUntracked(ColumnFamilyHandle, byte[])} but allows * you to specify the key in several parts that will be @@ -2418,7 +2418,7 @@ public void deleteUntracked(final ColumnFamilyHandle columnFamilyHandle, columnFamilyHandle.nativeHandle_); } - //TODO(AR) refactor if we implement org.rocksdb.SliceParts in future + //TODO(AR) refactor if we implement org.forstdb.SliceParts in future /** * Similar to {@link #deleteUntracked(byte[])} but allows * you to specify the key in several parts that will be @@ -2606,7 +2606,7 @@ public void setWriteOptions(final WriteOptions writeOptions) { * calling {@code #undoGetForUpdate(ColumnFamilyHandle, byte[])} may release * any held locks for this key. * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the key to retrieve the value for. */ @@ -2804,7 +2804,7 @@ public enum TransactionState { * * @param value byte representation of TransactionState. * - * @return {@link org.rocksdb.Transaction.TransactionState} instance or null. + * @return {@link org.forstdb.Transaction.TransactionState} instance or null. * @throws java.lang.IllegalArgumentException if an invalid * value is provided. */ diff --git a/java/src/main/java/org/rocksdb/TransactionDB.java b/java/src/main/java/org/forstdb/TransactionDB.java similarity index 98% rename from java/src/main/java/org/rocksdb/TransactionDB.java rename to java/src/main/java/org/forstdb/TransactionDB.java index a4ee951dc..ef4b36c7f 100644 --- a/java/src/main/java/org/rocksdb/TransactionDB.java +++ b/java/src/main/java/org/forstdb/TransactionDB.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.ArrayList; import java.util.List; @@ -29,8 +29,8 @@ private TransactionDB(final long nativeHandle) { /** * Open a TransactionDB, similar to {@link RocksDB#open(Options, String)}. * - * @param options {@link org.rocksdb.Options} instance. - * @param transactionDbOptions {@link org.rocksdb.TransactionDBOptions} + * @param options {@link org.forstdb.Options} instance. + * @param transactionDbOptions {@link org.forstdb.TransactionDBOptions} * instance. * @param path the path to the rocksdb. * @@ -59,8 +59,8 @@ public static TransactionDB open(final Options options, * Open a TransactionDB, similar to * {@link RocksDB#open(DBOptions, String, List, List)}. * - * @param dbOptions {@link org.rocksdb.DBOptions} instance. - * @param transactionDbOptions {@link org.rocksdb.TransactionDBOptions} + * @param dbOptions {@link org.forstdb.DBOptions} instance. + * @param transactionDbOptions {@link org.forstdb.TransactionDBOptions} * instance. * @param path the path to the rocksdb. * @param columnFamilyDescriptors list of column family descriptors diff --git a/java/src/main/java/org/rocksdb/TransactionDBOptions.java b/java/src/main/java/org/forstdb/TransactionDBOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/TransactionDBOptions.java rename to java/src/main/java/org/forstdb/TransactionDBOptions.java index 391025d6a..0ee96e10a 100644 --- a/java/src/main/java/org/rocksdb/TransactionDBOptions.java +++ b/java/src/main/java/org/forstdb/TransactionDBOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class TransactionDBOptions extends RocksObject { diff --git a/java/src/main/java/org/rocksdb/TransactionLogIterator.java b/java/src/main/java/org/forstdb/TransactionLogIterator.java similarity index 89% rename from java/src/main/java/org/rocksdb/TransactionLogIterator.java rename to java/src/main/java/org/forstdb/TransactionLogIterator.java index 5d9ec58d7..078191ea2 100644 --- a/java/src/main/java/org/rocksdb/TransactionLogIterator.java +++ b/java/src/main/java/org/forstdb/TransactionLogIterator.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** *

    A TransactionLogIterator is used to iterate over the transactions in a db. @@ -30,7 +30,7 @@ public void next() { /** *

    Throws RocksDBException if something went wrong.

    * - * @throws org.rocksdb.RocksDBException if something went + * @throws org.forstdb.RocksDBException if something went * wrong in the underlying C++ code. */ public void status() throws RocksDBException { @@ -44,7 +44,7 @@ public void status() throws RocksDBException { * *

    ONLY use if Valid() is true and status() is OK.

    * - * @return {@link org.rocksdb.TransactionLogIterator.BatchResult} + * @return {@link org.forstdb.TransactionLogIterator.BatchResult} * instance. */ public BatchResult getBatch() { @@ -71,7 +71,7 @@ public static final class BatchResult { *

    Constructor of BatchResult class.

    * * @param sequenceNumber related to this BatchResult instance. - * @param nativeHandle to {@link org.rocksdb.WriteBatch} + * @param nativeHandle to {@link org.forstdb.WriteBatch} * native instance. */ public BatchResult(final long sequenceNumber, @@ -90,10 +90,10 @@ public long sequenceNumber() { } /** - *

    Return contained {@link org.rocksdb.WriteBatch} + *

    Return contained {@link org.forstdb.WriteBatch} * instance

    * - * @return {@link org.rocksdb.WriteBatch} instance. + * @return {@link org.forstdb.WriteBatch} instance. */ public WriteBatch writeBatch() { return writeBatch_; diff --git a/java/src/main/java/org/rocksdb/TransactionOptions.java b/java/src/main/java/org/forstdb/TransactionOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/TransactionOptions.java rename to java/src/main/java/org/forstdb/TransactionOptions.java index f93d3cb3c..311403053 100644 --- a/java/src/main/java/org/rocksdb/TransactionOptions.java +++ b/java/src/main/java/org/forstdb/TransactionOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class TransactionOptions extends RocksObject implements TransactionalOptions { diff --git a/java/src/main/java/org/rocksdb/TransactionalDB.java b/java/src/main/java/org/forstdb/TransactionalDB.java similarity index 99% rename from java/src/main/java/org/rocksdb/TransactionalDB.java rename to java/src/main/java/org/forstdb/TransactionalDB.java index 1ba955496..ccf34720b 100644 --- a/java/src/main/java/org/rocksdb/TransactionalDB.java +++ b/java/src/main/java/org/forstdb/TransactionalDB.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; interface TransactionalDB> extends AutoCloseable { /** diff --git a/java/src/main/java/org/rocksdb/TransactionalOptions.java b/java/src/main/java/org/forstdb/TransactionalOptions.java similarity index 97% rename from java/src/main/java/org/rocksdb/TransactionalOptions.java rename to java/src/main/java/org/forstdb/TransactionalOptions.java index 2175693fd..cd7173789 100644 --- a/java/src/main/java/org/rocksdb/TransactionalOptions.java +++ b/java/src/main/java/org/forstdb/TransactionalOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; interface TransactionalOptions> diff --git a/java/src/main/java/org/rocksdb/TtlDB.java b/java/src/main/java/org/forstdb/TtlDB.java similarity index 97% rename from java/src/main/java/org/rocksdb/TtlDB.java rename to java/src/main/java/org/forstdb/TtlDB.java index 9a90ba358..83ed89ae0 100644 --- a/java/src/main/java/org/rocksdb/TtlDB.java +++ b/java/src/main/java/org/forstdb/TtlDB.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.List; @@ -55,7 +55,7 @@ public class TtlDB extends RocksDB { * *

    Database is opened in read-write mode without default TTL.

    * - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. * @param db_path path to database. * * @return TtlDB instance. @@ -71,7 +71,7 @@ public static TtlDB open(final Options options, final String db_path) /** *

    Opens a TtlDB.

    * - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. * @param db_path path to database. * @param ttl time to live for new entries. * @param readOnly boolean value indicating if database if db is @@ -90,7 +90,7 @@ public static TtlDB open(final Options options, final String db_path, /** *

    Opens a TtlDB.

    * - * @param options {@link org.rocksdb.Options} instance. + * @param options {@link org.forstdb.Options} instance. * @param db_path path to database. * @param columnFamilyDescriptors list of column family descriptors * @param columnFamilyHandles will be filled with ColumnFamilyHandle instances @@ -201,7 +201,7 @@ public void close() { * @param columnFamilyDescriptor column family to be created. * @param ttl TTL to set for this column family. * - * @return {@link org.rocksdb.ColumnFamilyHandle} instance. + * @return {@link org.forstdb.ColumnFamilyHandle} instance. * * @throws RocksDBException thrown if error happens in underlying * native library. diff --git a/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java b/java/src/main/java/org/forstdb/TxnDBWritePolicy.java similarity index 98% rename from java/src/main/java/org/rocksdb/TxnDBWritePolicy.java rename to java/src/main/java/org/forstdb/TxnDBWritePolicy.java index 28cb8556b..b51dd4103 100644 --- a/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java +++ b/java/src/main/java/org/forstdb/TxnDBWritePolicy.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The transaction db write policy. diff --git a/java/src/main/java/org/rocksdb/UInt64AddOperator.java b/java/src/main/java/org/forstdb/UInt64AddOperator.java similarity index 96% rename from java/src/main/java/org/rocksdb/UInt64AddOperator.java rename to java/src/main/java/org/forstdb/UInt64AddOperator.java index 0cffdce8c..2435eafa7 100644 --- a/java/src/main/java/org/rocksdb/UInt64AddOperator.java +++ b/java/src/main/java/org/forstdb/UInt64AddOperator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Uint64AddOperator is a merge operator that accumlates a long diff --git a/java/src/main/java/org/rocksdb/VectorMemTableConfig.java b/java/src/main/java/org/forstdb/VectorMemTableConfig.java similarity index 98% rename from java/src/main/java/org/rocksdb/VectorMemTableConfig.java rename to java/src/main/java/org/forstdb/VectorMemTableConfig.java index fb1e7a948..039c68a1b 100644 --- a/java/src/main/java/org/rocksdb/VectorMemTableConfig.java +++ b/java/src/main/java/org/forstdb/VectorMemTableConfig.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; /** * The config for vector memtable representation. diff --git a/java/src/main/java/org/rocksdb/WALRecoveryMode.java b/java/src/main/java/org/forstdb/WALRecoveryMode.java similarity index 99% rename from java/src/main/java/org/rocksdb/WALRecoveryMode.java rename to java/src/main/java/org/forstdb/WALRecoveryMode.java index b8c098f94..c82b741db 100644 --- a/java/src/main/java/org/rocksdb/WALRecoveryMode.java +++ b/java/src/main/java/org/forstdb/WALRecoveryMode.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * The WAL Recover Mode diff --git a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java b/java/src/main/java/org/forstdb/WBWIRocksIterator.java similarity index 95% rename from java/src/main/java/org/rocksdb/WBWIRocksIterator.java rename to java/src/main/java/org/forstdb/WBWIRocksIterator.java index 25d6e6f9d..1e44fadb8 100644 --- a/java/src/main/java/org/rocksdb/WBWIRocksIterator.java +++ b/java/src/main/java/org/forstdb/WBWIRocksIterator.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -101,11 +101,11 @@ public void close() { /** * Represents an entry returned by - * {@link org.rocksdb.WBWIRocksIterator#entry()} + * {@link org.forstdb.WBWIRocksIterator#entry()} * * It is worth noting that a WriteEntry with - * the type {@link org.rocksdb.WBWIRocksIterator.WriteType#DELETE} - * or {@link org.rocksdb.WBWIRocksIterator.WriteType#LOG} + * the type {@link org.forstdb.WBWIRocksIterator.WriteType#DELETE} + * or {@link org.forstdb.WBWIRocksIterator.WriteType#LOG} * will not have a value. */ public static class WriteEntry implements AutoCloseable { @@ -118,7 +118,7 @@ public static class WriteEntry implements AutoCloseable { * should only be instantiated in * this manner by the outer WBWIRocksIterator * class; The class members are then modified - * by calling {@link org.rocksdb.WBWIRocksIterator#entry()} + * by calling {@link org.forstdb.WBWIRocksIterator#entry()} */ private WriteEntry() { key = new DirectSlice(); diff --git a/java/src/main/java/org/rocksdb/WalFileType.java b/java/src/main/java/org/forstdb/WalFileType.java similarity index 98% rename from java/src/main/java/org/rocksdb/WalFileType.java rename to java/src/main/java/org/forstdb/WalFileType.java index fed27ed11..117c59aa4 100644 --- a/java/src/main/java/org/rocksdb/WalFileType.java +++ b/java/src/main/java/org/forstdb/WalFileType.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum WalFileType { /** diff --git a/java/src/main/java/org/rocksdb/WalFilter.java b/java/src/main/java/org/forstdb/WalFilter.java similarity index 99% rename from java/src/main/java/org/rocksdb/WalFilter.java rename to java/src/main/java/org/forstdb/WalFilter.java index a2836634a..330d20c88 100644 --- a/java/src/main/java/org/rocksdb/WalFilter.java +++ b/java/src/main/java/org/forstdb/WalFilter.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Map; diff --git a/java/src/main/java/org/rocksdb/WalProcessingOption.java b/java/src/main/java/org/forstdb/WalProcessingOption.java similarity index 98% rename from java/src/main/java/org/rocksdb/WalProcessingOption.java rename to java/src/main/java/org/forstdb/WalProcessingOption.java index 3a9c2be0e..36ee14c99 100644 --- a/java/src/main/java/org/rocksdb/WalProcessingOption.java +++ b/java/src/main/java/org/forstdb/WalProcessingOption.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum WalProcessingOption { /* diff --git a/java/src/main/java/org/rocksdb/WriteBatch.java b/java/src/main/java/org/forstdb/WriteBatch.java similarity index 99% rename from java/src/main/java/org/rocksdb/WriteBatch.java rename to java/src/main/java/org/forstdb/WriteBatch.java index 49e1f7f20..1cc7736ca 100644 --- a/java/src/main/java/org/rocksdb/WriteBatch.java +++ b/java/src/main/java/org/forstdb/WriteBatch.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/WriteBatchInterface.java b/java/src/main/java/org/forstdb/WriteBatchInterface.java similarity index 98% rename from java/src/main/java/org/rocksdb/WriteBatchInterface.java rename to java/src/main/java/org/forstdb/WriteBatchInterface.java index 32cd8d1e7..6c7166875 100644 --- a/java/src/main/java/org/rocksdb/WriteBatchInterface.java +++ b/java/src/main/java/org/forstdb/WriteBatchInterface.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; @@ -33,7 +33,7 @@ public interface WriteBatchInterface { *

    Store the mapping "key->value" within given column * family.

    * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the specified key to be inserted. * @param value the value associated with the specified key. @@ -58,7 +58,7 @@ void put(ColumnFamilyHandle columnFamilyHandle, byte[] key, byte[] value) *

    Store the mapping "key->value" within given column * family.

    * - * @param columnFamilyHandle {@link org.rocksdb.ColumnFamilyHandle} + * @param columnFamilyHandle {@link org.forstdb.ColumnFamilyHandle} * instance * @param key the specified key to be inserted. It is using position and limit. * Supports direct buffer only. diff --git a/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java b/java/src/main/java/org/forstdb/WriteBatchWithIndex.java similarity index 94% rename from java/src/main/java/org/rocksdb/WriteBatchWithIndex.java rename to java/src/main/java/org/forstdb/WriteBatchWithIndex.java index d41be5856..02a3be8fb 100644 --- a/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java +++ b/java/src/main/java/org/forstdb/WriteBatchWithIndex.java @@ -3,20 +3,20 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.ByteBuffer; /** - * Similar to {@link org.rocksdb.WriteBatch} but with a binary searchable + * Similar to {@link org.forstdb.WriteBatch} but with a binary searchable * index built for all the keys inserted. *

    * Calling put, merge, remove or putLogData calls the same function - * as with {@link org.rocksdb.WriteBatch} whilst also building an index. + * as with {@link org.forstdb.WriteBatch} whilst also building an index. *

    - * A user can call {@link org.rocksdb.WriteBatchWithIndex#newIterator()} to + * A user can call {@link org.forstdb.WriteBatchWithIndex#newIterator()} to * create an iterator over the write batch or - * {@link org.rocksdb.WriteBatchWithIndex#newIteratorWithBase(org.rocksdb.RocksIterator)} + * {@link org.forstdb.WriteBatchWithIndex#newIteratorWithBase(org.forstdb.RocksIterator)} * to get an iterator for the database with Read-Your-Own-Writes like capability */ public class WriteBatchWithIndex extends AbstractWriteBatch { @@ -80,7 +80,7 @@ public WriteBatchWithIndex( /** * Create an iterator of a column family. User can call - * {@link org.rocksdb.RocksIteratorInterface#seek(byte[])} to + * {@link org.forstdb.RocksIteratorInterface#seek(byte[])} to * search to the next entry of or after a key. Keys will be iterated in the * order given by index_comparator. For multiple updates on the same key, * each update will be returned as a separate entry, in the order of update @@ -98,7 +98,7 @@ public WBWIRocksIterator newIterator( /** * Create an iterator of the default column family. User can call - * {@link org.rocksdb.RocksIteratorInterface#seek(byte[])} to + * {@link org.forstdb.RocksIteratorInterface#seek(byte[])} to * search to the next entry of or after a key. Keys will be iterated in the * order given by index_comparator. For multiple updates on the same key, * each update will be returned as a separate entry, in the order of update @@ -112,7 +112,7 @@ public WBWIRocksIterator newIterator() { /** * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} + * creating a new Iterator that will use {@link org.forstdb.WBWIRocksIterator} * as a delta and baseIterator as a base *

    * Updating write batch with the current key of the iterator is not safe. @@ -123,7 +123,7 @@ public WBWIRocksIterator newIterator() { * * @param columnFamilyHandle The column family to iterate over * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} + * e.g. {@link org.forstdb.RocksDB#newIterator()} * @return An iterator which shows a view comprised of both the database * point-in-time from baseIterator and modifications made in this write batch. */ @@ -135,7 +135,7 @@ public RocksIterator newIteratorWithBase( /** * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} + * creating a new Iterator that will use {@link org.forstdb.WBWIRocksIterator} * as a delta and baseIterator as a base *

    * Updating write batch with the current key of the iterator is not safe. @@ -146,7 +146,7 @@ public RocksIterator newIteratorWithBase( * * @param columnFamilyHandle The column family to iterate over * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} + * e.g. {@link org.forstdb.RocksDB#newIterator()} * @param readOptions the read options, or null * @return An iterator which shows a view comprised of both the database * point-in-time from baseIterator and modifications made in this write batch. @@ -165,12 +165,12 @@ public RocksIterator newIteratorWithBase(final ColumnFamilyHandle columnFamilyHa /** * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} + * creating a new Iterator that will use {@link org.forstdb.WBWIRocksIterator} * as a delta and baseIterator as a base. Operates on the default column * family. * * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} + * e.g. {@link org.forstdb.RocksDB#newIterator()} * @return An iterator which shows a view comprised of both the database * point-in-time from baseIterator and modifications made in this write batch. */ @@ -180,12 +180,12 @@ public RocksIterator newIteratorWithBase(final RocksIterator baseIterator) { /** * Provides Read-Your-Own-Writes like functionality by - * creating a new Iterator that will use {@link org.rocksdb.WBWIRocksIterator} + * creating a new Iterator that will use {@link org.forstdb.WBWIRocksIterator} * as a delta and baseIterator as a base. Operates on the default column * family. * * @param baseIterator The base iterator, - * e.g. {@link org.rocksdb.RocksDB#newIterator()} + * e.g. {@link org.forstdb.RocksDB#newIterator()} * @param readOptions the read options, or null * @return An iterator which shows a view comprised of both the database * point-in-time from baseIterator and modifications made in this write batch. diff --git a/java/src/main/java/org/rocksdb/WriteBufferManager.java b/java/src/main/java/org/forstdb/WriteBufferManager.java similarity index 98% rename from java/src/main/java/org/rocksdb/WriteBufferManager.java rename to java/src/main/java/org/forstdb/WriteBufferManager.java index 40176aba4..18ecfea2b 100644 --- a/java/src/main/java/org/rocksdb/WriteBufferManager.java +++ b/java/src/main/java/org/forstdb/WriteBufferManager.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Java wrapper over native write_buffer_manager class diff --git a/java/src/main/java/org/rocksdb/WriteOptions.java b/java/src/main/java/org/forstdb/WriteOptions.java similarity index 99% rename from java/src/main/java/org/rocksdb/WriteOptions.java rename to java/src/main/java/org/forstdb/WriteOptions.java index 7c184b094..26598d72e 100644 --- a/java/src/main/java/org/rocksdb/WriteOptions.java +++ b/java/src/main/java/org/forstdb/WriteOptions.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Options that control write operations. diff --git a/java/src/main/java/org/rocksdb/WriteStallCondition.java b/java/src/main/java/org/forstdb/WriteStallCondition.java similarity index 98% rename from java/src/main/java/org/rocksdb/WriteStallCondition.java rename to java/src/main/java/org/forstdb/WriteStallCondition.java index 98d9e2ce4..acc0dcdba 100644 --- a/java/src/main/java/org/rocksdb/WriteStallCondition.java +++ b/java/src/main/java/org/forstdb/WriteStallCondition.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public enum WriteStallCondition { DELAYED((byte) 0x0), diff --git a/java/src/main/java/org/rocksdb/WriteStallInfo.java b/java/src/main/java/org/forstdb/WriteStallInfo.java similarity index 99% rename from java/src/main/java/org/rocksdb/WriteStallInfo.java rename to java/src/main/java/org/forstdb/WriteStallInfo.java index 1cade0acb..dd48eb434 100644 --- a/java/src/main/java/org/rocksdb/WriteStallInfo.java +++ b/java/src/main/java/org/forstdb/WriteStallInfo.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Objects; diff --git a/java/src/main/java/org/rocksdb/util/BufferUtil.java b/java/src/main/java/org/forstdb/util/BufferUtil.java similarity index 95% rename from java/src/main/java/org/rocksdb/util/BufferUtil.java rename to java/src/main/java/org/forstdb/util/BufferUtil.java index 54be3e693..8bec35922 100644 --- a/java/src/main/java/org/rocksdb/util/BufferUtil.java +++ b/java/src/main/java/org/forstdb/util/BufferUtil.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; public class BufferUtil { public static void CheckBounds(final int offset, final int len, final int size) { diff --git a/java/src/main/java/org/rocksdb/util/ByteUtil.java b/java/src/main/java/org/forstdb/util/ByteUtil.java similarity index 98% rename from java/src/main/java/org/rocksdb/util/ByteUtil.java rename to java/src/main/java/org/forstdb/util/ByteUtil.java index 5d64d5dcf..c42c4b690 100644 --- a/java/src/main/java/org/rocksdb/util/ByteUtil.java +++ b/java/src/main/java/org/forstdb/util/ByteUtil.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/util/BytewiseComparator.java b/java/src/main/java/org/forstdb/util/BytewiseComparator.java similarity index 95% rename from java/src/main/java/org/rocksdb/util/BytewiseComparator.java rename to java/src/main/java/org/forstdb/util/BytewiseComparator.java index 202241d3b..50c5fde9f 100644 --- a/java/src/main/java/org/rocksdb/util/BytewiseComparator.java +++ b/java/src/main/java/org/forstdb/util/BytewiseComparator.java @@ -3,13 +3,13 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; -import org.rocksdb.*; +import org.forstdb.*; import java.nio.ByteBuffer; -import static org.rocksdb.util.ByteUtil.memcmp; +import static org.forstdb.util.ByteUtil.memcmp; /** * This is a Java Native implementation of the C++ @@ -19,7 +19,7 @@ * less than their C++ counterparts due to the bridging overhead, * as such you likely don't want to use this apart from benchmarking * and you most likely instead wanted - * {@link org.rocksdb.BuiltinComparator#BYTEWISE_COMPARATOR} + * {@link org.forstdb.BuiltinComparator#BYTEWISE_COMPARATOR} */ public final class BytewiseComparator extends AbstractComparator { diff --git a/java/src/main/java/org/rocksdb/util/Environment.java b/java/src/main/java/org/forstdb/util/Environment.java similarity index 99% rename from java/src/main/java/org/rocksdb/util/Environment.java rename to java/src/main/java/org/forstdb/util/Environment.java index 78b73dc5d..57217a653 100644 --- a/java/src/main/java/org/rocksdb/util/Environment.java +++ b/java/src/main/java/org/forstdb/util/Environment.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.util; +package org.forstdb.util; import java.io.File; import java.io.IOException; diff --git a/java/src/main/java/org/rocksdb/util/IntComparator.java b/java/src/main/java/org/forstdb/util/IntComparator.java similarity index 94% rename from java/src/main/java/org/rocksdb/util/IntComparator.java rename to java/src/main/java/org/forstdb/util/IntComparator.java index 2caf0c601..44dfa9f73 100644 --- a/java/src/main/java/org/rocksdb/util/IntComparator.java +++ b/java/src/main/java/org/forstdb/util/IntComparator.java @@ -3,10 +3,10 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; -import org.rocksdb.AbstractComparator; -import org.rocksdb.ComparatorOptions; +import org.forstdb.AbstractComparator; +import org.forstdb.ComparatorOptions; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java b/java/src/main/java/org/forstdb/util/ReverseBytewiseComparator.java similarity index 93% rename from java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java rename to java/src/main/java/org/forstdb/util/ReverseBytewiseComparator.java index 3d3c42941..87a8fb4e8 100644 --- a/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java +++ b/java/src/main/java/org/forstdb/util/ReverseBytewiseComparator.java @@ -3,12 +3,12 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; -import org.rocksdb.AbstractComparator; -import org.rocksdb.BuiltinComparator; -import org.rocksdb.ComparatorOptions; -import org.rocksdb.Slice; +import org.forstdb.AbstractComparator; +import org.forstdb.BuiltinComparator; +import org.forstdb.ComparatorOptions; +import org.forstdb.Slice; import java.nio.ByteBuffer; diff --git a/java/src/main/java/org/rocksdb/util/SizeUnit.java b/java/src/main/java/org/forstdb/util/SizeUnit.java similarity index 95% rename from java/src/main/java/org/rocksdb/util/SizeUnit.java rename to java/src/main/java/org/forstdb/util/SizeUnit.java index 0f717e8d4..313db8d44 100644 --- a/java/src/main/java/org/rocksdb/util/SizeUnit.java +++ b/java/src/main/java/org/forstdb/util/SizeUnit.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; public class SizeUnit { public static final long KB = 1024L; diff --git a/java/src/test/java/org/rocksdb/AbstractTransactionTest.java b/java/src/test/java/org/forstdb/AbstractTransactionTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/AbstractTransactionTest.java rename to java/src/test/java/org/forstdb/AbstractTransactionTest.java index 2977d78fd..09f69ba2c 100644 --- a/java/src/test/java/org/rocksdb/AbstractTransactionTest.java +++ b/java/src/test/java/org/forstdb/AbstractTransactionTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/BackupEngineOptionsTest.java b/java/src/test/java/org/forstdb/BackupEngineOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/BackupEngineOptionsTest.java rename to java/src/test/java/org/forstdb/BackupEngineOptionsTest.java index b07f8d33c..e9f6087f3 100644 --- a/java/src/test/java/org/rocksdb/BackupEngineOptionsTest.java +++ b/java/src/test/java/org/forstdb/BackupEngineOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/BackupEngineTest.java b/java/src/test/java/org/forstdb/BackupEngineTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/BackupEngineTest.java rename to java/src/test/java/org/forstdb/BackupEngineTest.java index 67145f846..9b136a527 100644 --- a/java/src/test/java/org/rocksdb/BackupEngineTest.java +++ b/java/src/test/java/org/forstdb/BackupEngineTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/BlobOptionsTest.java b/java/src/test/java/org/forstdb/BlobOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/BlobOptionsTest.java rename to java/src/test/java/org/forstdb/BlobOptionsTest.java index a0a2af84a..a7895fe34 100644 --- a/java/src/test/java/org/rocksdb/BlobOptionsTest.java +++ b/java/src/test/java/org/forstdb/BlobOptionsTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java b/java/src/test/java/org/forstdb/BlockBasedTableConfigTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java rename to java/src/test/java/org/forstdb/BlockBasedTableConfigTest.java index 13247d1e6..51b23ede8 100644 --- a/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java +++ b/java/src/test/java/org/forstdb/BlockBasedTableConfigTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; diff --git a/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java b/java/src/test/java/org/forstdb/BuiltinComparatorTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/BuiltinComparatorTest.java rename to java/src/test/java/org/forstdb/BuiltinComparatorTest.java index e238ae07b..70e7ccf17 100644 --- a/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java +++ b/java/src/test/java/org/forstdb/BuiltinComparatorTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java b/java/src/test/java/org/forstdb/ByteBufferUnsupportedOperationTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java rename to java/src/test/java/org/forstdb/ByteBufferUnsupportedOperationTest.java index f596f573f..b8d38cbd2 100644 --- a/java/src/test/java/org/rocksdb/ByteBufferUnsupportedOperationTest.java +++ b/java/src/test/java/org/forstdb/ByteBufferUnsupportedOperationTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.nio.charset.StandardCharsets; import java.util.*; @@ -12,7 +12,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.ReverseBytewiseComparator; +import org.forstdb.util.ReverseBytewiseComparator; public class ByteBufferUnsupportedOperationTest { @ClassRule diff --git a/java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java b/java/src/test/java/org/forstdb/BytewiseComparatorRegressionTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java rename to java/src/test/java/org/forstdb/BytewiseComparatorRegressionTest.java index 13aa6c2bd..84cfeace5 100644 --- a/java/src/test/java/org/rocksdb/BytewiseComparatorRegressionTest.java +++ b/java/src/test/java/org/forstdb/BytewiseComparatorRegressionTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.junit.Assert.assertArrayEquals; @@ -14,7 +14,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.BytewiseComparator; +import org.forstdb.util.BytewiseComparator; /** * This test confirms that the following issues were in fact resolved diff --git a/java/src/test/java/org/rocksdb/CheckPointTest.java b/java/src/test/java/org/forstdb/CheckPointTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/CheckPointTest.java rename to java/src/test/java/org/forstdb/CheckPointTest.java index 3b0b5d86a..eb846e492 100644 --- a/java/src/test/java/org/rocksdb/CheckPointTest.java +++ b/java/src/test/java/org/forstdb/CheckPointTest.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; diff --git a/java/src/test/java/org/rocksdb/ClockCacheTest.java b/java/src/test/java/org/forstdb/ClockCacheTest.java similarity index 96% rename from java/src/test/java/org/rocksdb/ClockCacheTest.java rename to java/src/test/java/org/forstdb/ClockCacheTest.java index 718c24f70..8e466e1f4 100644 --- a/java/src/test/java/org/rocksdb/ClockCacheTest.java +++ b/java/src/test/java/org/forstdb/ClockCacheTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java b/java/src/test/java/org/forstdb/ColumnFamilyOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java rename to java/src/test/java/org/forstdb/ColumnFamilyOptionsTest.java index 35a04a697..aac9e0f24 100644 --- a/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java +++ b/java/src/test/java/org/forstdb/ColumnFamilyOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; @@ -13,7 +13,7 @@ import java.util.*; import org.junit.ClassRule; import org.junit.Test; -import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; +import org.forstdb.test.RemoveEmptyValueCompactionFilterFactory; public class ColumnFamilyOptionsTest { diff --git a/java/src/test/java/org/rocksdb/ColumnFamilyTest.java b/java/src/test/java/org/forstdb/ColumnFamilyTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/ColumnFamilyTest.java rename to java/src/test/java/org/forstdb/ColumnFamilyTest.java index fb8a45085..a629ccc95 100644 --- a/java/src/test/java/org/rocksdb/ColumnFamilyTest.java +++ b/java/src/test/java/org/forstdb/ColumnFamilyTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java b/java/src/test/java/org/forstdb/CompactRangeOptionsTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java rename to java/src/test/java/org/forstdb/CompactRangeOptionsTest.java index 549b74beb..1b7941365 100644 --- a/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java +++ b/java/src/test/java/org/forstdb/CompactRangeOptionsTest.java @@ -3,10 +3,10 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; -import org.rocksdb.CompactRangeOptions.BottommostLevelCompaction; +import org.forstdb.CompactRangeOptions.BottommostLevelCompaction; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java b/java/src/test/java/org/forstdb/CompactionFilterFactoryTest.java similarity index 96% rename from java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java rename to java/src/test/java/org/forstdb/CompactionFilterFactoryTest.java index 35a14eb54..7209161f5 100644 --- a/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java +++ b/java/src/test/java/org/forstdb/CompactionFilterFactoryTest.java @@ -3,12 +3,12 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; +import org.forstdb.test.RemoveEmptyValueCompactionFilterFactory; import java.util.ArrayList; import java.util.Arrays; diff --git a/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java b/java/src/test/java/org/forstdb/CompactionJobInfoTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/CompactionJobInfoTest.java rename to java/src/test/java/org/forstdb/CompactionJobInfoTest.java index c71b0da16..6c38c0c01 100644 --- a/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java +++ b/java/src/test/java/org/forstdb/CompactionJobInfoTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java b/java/src/test/java/org/forstdb/CompactionJobStatsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/CompactionJobStatsTest.java rename to java/src/test/java/org/forstdb/CompactionJobStatsTest.java index 5c1eb2aab..30df1f4df 100644 --- a/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java +++ b/java/src/test/java/org/forstdb/CompactionJobStatsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java b/java/src/test/java/org/forstdb/CompactionOptionsFIFOTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java rename to java/src/test/java/org/forstdb/CompactionOptionsFIFOTest.java index 841615e67..6da11aa52 100644 --- a/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java +++ b/java/src/test/java/org/forstdb/CompactionOptionsFIFOTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompactionOptionsTest.java b/java/src/test/java/org/forstdb/CompactionOptionsTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/CompactionOptionsTest.java rename to java/src/test/java/org/forstdb/CompactionOptionsTest.java index 9b7d79694..6f070c4d3 100644 --- a/java/src/test/java/org/rocksdb/CompactionOptionsTest.java +++ b/java/src/test/java/org/forstdb/CompactionOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java b/java/src/test/java/org/forstdb/CompactionOptionsUniversalTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java rename to java/src/test/java/org/forstdb/CompactionOptionsUniversalTest.java index 5e2d195b6..6aa0ef2cc 100644 --- a/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java +++ b/java/src/test/java/org/forstdb/CompactionOptionsUniversalTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompactionPriorityTest.java b/java/src/test/java/org/forstdb/CompactionPriorityTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/CompactionPriorityTest.java rename to java/src/test/java/org/forstdb/CompactionPriorityTest.java index b078e132f..9902b9c97 100644 --- a/java/src/test/java/org/rocksdb/CompactionPriorityTest.java +++ b/java/src/test/java/org/forstdb/CompactionPriorityTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java b/java/src/test/java/org/forstdb/CompactionStopStyleTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/CompactionStopStyleTest.java rename to java/src/test/java/org/forstdb/CompactionStopStyleTest.java index 4c8a20950..978aee632 100644 --- a/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java +++ b/java/src/test/java/org/forstdb/CompactionStopStyleTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java b/java/src/test/java/org/forstdb/ComparatorOptionsTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/ComparatorOptionsTest.java rename to java/src/test/java/org/forstdb/ComparatorOptionsTest.java index 3e90b9f10..ea6486378 100644 --- a/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java +++ b/java/src/test/java/org/forstdb/ComparatorOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompressionOptionsTest.java b/java/src/test/java/org/forstdb/CompressionOptionsTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/CompressionOptionsTest.java rename to java/src/test/java/org/forstdb/CompressionOptionsTest.java index 116552c32..c11c57af9 100644 --- a/java/src/test/java/org/rocksdb/CompressionOptionsTest.java +++ b/java/src/test/java/org/forstdb/CompressionOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/CompressionTypesTest.java b/java/src/test/java/org/forstdb/CompressionTypesTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/CompressionTypesTest.java rename to java/src/test/java/org/forstdb/CompressionTypesTest.java index a983f471a..761d4eec0 100644 --- a/java/src/test/java/org/rocksdb/CompressionTypesTest.java +++ b/java/src/test/java/org/forstdb/CompressionTypesTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java b/java/src/test/java/org/forstdb/ConcurrentTaskLimiterTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java rename to java/src/test/java/org/forstdb/ConcurrentTaskLimiterTest.java index 165f4f24c..8e6b5b02c 100644 --- a/java/src/test/java/org/rocksdb/ConcurrentTaskLimiterTest.java +++ b/java/src/test/java/org/forstdb/ConcurrentTaskLimiterTest.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.junit.Assert.assertEquals; diff --git a/java/src/test/java/org/rocksdb/DBOptionsTest.java b/java/src/test/java/org/forstdb/DBOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/DBOptionsTest.java rename to java/src/test/java/org/forstdb/DBOptionsTest.java index cb7eabcfb..ffa22a231 100644 --- a/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ b/java/src/test/java/org/forstdb/DBOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; diff --git a/java/src/test/java/org/rocksdb/DefaultEnvTest.java b/java/src/test/java/org/forstdb/DefaultEnvTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/DefaultEnvTest.java rename to java/src/test/java/org/forstdb/DefaultEnvTest.java index 3fb563ecb..a53d1de0f 100644 --- a/java/src/test/java/org/rocksdb/DefaultEnvTest.java +++ b/java/src/test/java/org/forstdb/DefaultEnvTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/DirectSliceTest.java b/java/src/test/java/org/forstdb/DirectSliceTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/DirectSliceTest.java rename to java/src/test/java/org/forstdb/DirectSliceTest.java index 67385345c..c14f4925b 100644 --- a/java/src/test/java/org/rocksdb/DirectSliceTest.java +++ b/java/src/test/java/org/forstdb/DirectSliceTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/EnvOptionsTest.java b/java/src/test/java/org/forstdb/EnvOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/EnvOptionsTest.java rename to java/src/test/java/org/forstdb/EnvOptionsTest.java index 0f3d8e234..fcf1bcf87 100644 --- a/java/src/test/java/org/rocksdb/EnvOptionsTest.java +++ b/java/src/test/java/org/forstdb/EnvOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/EventListenerTest.java b/java/src/test/java/org/forstdb/EventListenerTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/EventListenerTest.java rename to java/src/test/java/org/forstdb/EventListenerTest.java index 84be232f9..4d5729586 100644 --- a/java/src/test/java/org/rocksdb/EventListenerTest.java +++ b/java/src/test/java/org/forstdb/EventListenerTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; @@ -17,8 +17,8 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.AbstractEventListener.EnabledEventCallback; -import org.rocksdb.test.TestableEventListener; +import org.forstdb.AbstractEventListener.EnabledEventCallback; +import org.forstdb.test.TestableEventListener; public class EventListenerTest { @ClassRule diff --git a/java/src/test/java/org/rocksdb/FilterTest.java b/java/src/test/java/org/forstdb/FilterTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/FilterTest.java rename to java/src/test/java/org/forstdb/FilterTest.java index e308ffefb..d2e93e4e2 100644 --- a/java/src/test/java/org/rocksdb/FilterTest.java +++ b/java/src/test/java/org/forstdb/FilterTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java b/java/src/test/java/org/forstdb/FlinkCompactionFilterTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java rename to java/src/test/java/org/forstdb/FlinkCompactionFilterTest.java index 40320e9d5..87bbb6bbe 100644 --- a/java/src/test/java/org/rocksdb/FlinkCompactionFilterTest.java +++ b/java/src/test/java/org/forstdb/FlinkCompactionFilterTest.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; @@ -31,8 +31,8 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.FlinkCompactionFilter.StateType; -import org.rocksdb.FlinkCompactionFilter.TimeProvider; +import org.forstdb.FlinkCompactionFilter.StateType; +import org.forstdb.FlinkCompactionFilter.TimeProvider; public class FlinkCompactionFilterTest { private static final int LONG_LENGTH = 8; diff --git a/java/src/test/java/org/rocksdb/FlushOptionsTest.java b/java/src/test/java/org/forstdb/FlushOptionsTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/FlushOptionsTest.java rename to java/src/test/java/org/forstdb/FlushOptionsTest.java index f90ae911d..2c0e268b0 100644 --- a/java/src/test/java/org/rocksdb/FlushOptionsTest.java +++ b/java/src/test/java/org/forstdb/FlushOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/FlushTest.java b/java/src/test/java/org/forstdb/FlushTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/FlushTest.java rename to java/src/test/java/org/forstdb/FlushTest.java index 1a354f4ce..358091561 100644 --- a/java/src/test/java/org/rocksdb/FlushTest.java +++ b/java/src/test/java/org/forstdb/FlushTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/HyperClockCacheTest.java b/java/src/test/java/org/forstdb/HyperClockCacheTest.java similarity index 95% rename from java/src/test/java/org/rocksdb/HyperClockCacheTest.java rename to java/src/test/java/org/forstdb/HyperClockCacheTest.java index 132d69351..5bbc93db8 100644 --- a/java/src/test/java/org/rocksdb/HyperClockCacheTest.java +++ b/java/src/test/java/org/forstdb/HyperClockCacheTest.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java b/java/src/test/java/org/forstdb/ImportColumnFamilyTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java rename to java/src/test/java/org/forstdb/ImportColumnFamilyTest.java index ee569d497..040f2a14d 100644 --- a/java/src/test/java/org/rocksdb/ImportColumnFamilyTest.java +++ b/java/src/test/java/org/forstdb/ImportColumnFamilyTest.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; @@ -19,7 +19,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.BytewiseComparator; +import org.forstdb.util.BytewiseComparator; public class ImportColumnFamilyTest { private static final String SST_FILE_NAME = "test.sst"; diff --git a/java/src/test/java/org/rocksdb/InfoLogLevelTest.java b/java/src/test/java/org/forstdb/InfoLogLevelTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/InfoLogLevelTest.java rename to java/src/test/java/org/forstdb/InfoLogLevelTest.java index 90b0b4e2d..6afd1f783 100644 --- a/java/src/test/java/org/rocksdb/InfoLogLevelTest.java +++ b/java/src/test/java/org/forstdb/InfoLogLevelTest.java @@ -1,11 +1,11 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.Environment; +import org.forstdb.util.Environment; import java.io.IOException; diff --git a/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java b/java/src/test/java/org/forstdb/IngestExternalFileOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java rename to java/src/test/java/org/forstdb/IngestExternalFileOptionsTest.java index 230694615..535156580 100644 --- a/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java +++ b/java/src/test/java/org/forstdb/IngestExternalFileOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/KeyExistsTest.java b/java/src/test/java/org/forstdb/KeyExistsTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/KeyExistsTest.java rename to java/src/test/java/org/forstdb/KeyExistsTest.java index 1ee9bdce2..150411cb7 100644 --- a/java/src/test/java/org/rocksdb/KeyExistsTest.java +++ b/java/src/test/java/org/forstdb/KeyExistsTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/KeyMayExistTest.java b/java/src/test/java/org/forstdb/KeyMayExistTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/KeyMayExistTest.java rename to java/src/test/java/org/forstdb/KeyMayExistTest.java index 5a9ffd6eb..387a238a1 100644 --- a/java/src/test/java/org/rocksdb/KeyMayExistTest.java +++ b/java/src/test/java/org/forstdb/KeyMayExistTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/LRUCacheTest.java b/java/src/test/java/org/forstdb/LRUCacheTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/LRUCacheTest.java rename to java/src/test/java/org/forstdb/LRUCacheTest.java index 4d194e712..e2782720e 100644 --- a/java/src/test/java/org/rocksdb/LRUCacheTest.java +++ b/java/src/test/java/org/forstdb/LRUCacheTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/LoggerTest.java b/java/src/test/java/org/forstdb/LoggerTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/LoggerTest.java rename to java/src/test/java/org/forstdb/LoggerTest.java index b6a7be55e..f91a6c260 100644 --- a/java/src/test/java/org/rocksdb/LoggerTest.java +++ b/java/src/test/java/org/forstdb/LoggerTest.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/MemTableTest.java b/java/src/test/java/org/forstdb/MemTableTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MemTableTest.java rename to java/src/test/java/org/forstdb/MemTableTest.java index 6ebf9ef51..2cf0ff0ec 100644 --- a/java/src/test/java/org/rocksdb/MemTableTest.java +++ b/java/src/test/java/org/forstdb/MemTableTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/MemoryUtilTest.java b/java/src/test/java/org/forstdb/MemoryUtilTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MemoryUtilTest.java rename to java/src/test/java/org/forstdb/MemoryUtilTest.java index bfdcb9fe1..555463706 100644 --- a/java/src/test/java/org/rocksdb/MemoryUtilTest.java +++ b/java/src/test/java/org/forstdb/MemoryUtilTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/MergeCFVariantsTest.java b/java/src/test/java/org/forstdb/MergeCFVariantsTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/MergeCFVariantsTest.java rename to java/src/test/java/org/forstdb/MergeCFVariantsTest.java index 6c4f07ddc..5f3206fc6 100644 --- a/java/src/test/java/org/rocksdb/MergeCFVariantsTest.java +++ b/java/src/test/java/org/forstdb/MergeCFVariantsTest.java @@ -3,11 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.MergeTest.longFromByteArray; -import static org.rocksdb.MergeTest.longToByteArray; +import static org.forstdb.MergeTest.longFromByteArray; +import static org.forstdb.MergeTest.longToByteArray; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; diff --git a/java/src/test/java/org/rocksdb/MergeTest.java b/java/src/test/java/org/forstdb/MergeTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MergeTest.java rename to java/src/test/java/org/forstdb/MergeTest.java index 10ffeb778..2612027a6 100644 --- a/java/src/test/java/org/rocksdb/MergeTest.java +++ b/java/src/test/java/org/forstdb/MergeTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/MergeVariantsTest.java b/java/src/test/java/org/forstdb/MergeVariantsTest.java similarity index 96% rename from java/src/test/java/org/rocksdb/MergeVariantsTest.java rename to java/src/test/java/org/forstdb/MergeVariantsTest.java index 1acedc1e6..bcf9edc1a 100644 --- a/java/src/test/java/org/rocksdb/MergeVariantsTest.java +++ b/java/src/test/java/org/forstdb/MergeVariantsTest.java @@ -3,11 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.MergeTest.longFromByteArray; -import static org.rocksdb.MergeTest.longToByteArray; +import static org.forstdb.MergeTest.longFromByteArray; +import static org.forstdb.MergeTest.longToByteArray; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; diff --git a/java/src/test/java/org/rocksdb/MixedOptionsTest.java b/java/src/test/java/org/forstdb/MixedOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MixedOptionsTest.java rename to java/src/test/java/org/forstdb/MixedOptionsTest.java index 4e17d04ef..4a1b40d47 100644 --- a/java/src/test/java/org/rocksdb/MixedOptionsTest.java +++ b/java/src/test/java/org/forstdb/MixedOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/MultiColumnRegressionTest.java b/java/src/test/java/org/forstdb/MultiColumnRegressionTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MultiColumnRegressionTest.java rename to java/src/test/java/org/forstdb/MultiColumnRegressionTest.java index 6087b0260..7902a4af7 100644 --- a/java/src/test/java/org/rocksdb/MultiColumnRegressionTest.java +++ b/java/src/test/java/org/forstdb/MultiColumnRegressionTest.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java b/java/src/test/java/org/forstdb/MultiGetManyKeysTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java rename to java/src/test/java/org/forstdb/MultiGetManyKeysTest.java index e66eef622..c65e4fd1d 100644 --- a/java/src/test/java/org/rocksdb/MultiGetManyKeysTest.java +++ b/java/src/test/java/org/forstdb/MultiGetManyKeysTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/MultiGetTest.java b/java/src/test/java/org/forstdb/MultiGetTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MultiGetTest.java rename to java/src/test/java/org/forstdb/MultiGetTest.java index c391d81f6..809ac8ca2 100644 --- a/java/src/test/java/org/rocksdb/MultiGetTest.java +++ b/java/src/test/java/org/forstdb/MultiGetTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; @@ -15,7 +15,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.TestUtil; +import org.forstdb.util.TestUtil; public class MultiGetTest { @ClassRule diff --git a/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java b/java/src/test/java/org/forstdb/MutableColumnFamilyOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java rename to java/src/test/java/org/forstdb/MutableColumnFamilyOptionsTest.java index d858a150d..746c219e7 100644 --- a/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java +++ b/java/src/test/java/org/forstdb/MutableColumnFamilyOptionsTest.java @@ -2,10 +2,10 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; -import org.rocksdb.MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder; +import org.forstdb.MutableColumnFamilyOptions.MutableColumnFamilyOptionsBuilder; import java.util.NoSuchElementException; diff --git a/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java b/java/src/test/java/org/forstdb/MutableDBOptionsTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/MutableDBOptionsTest.java rename to java/src/test/java/org/forstdb/MutableDBOptionsTest.java index 063a8de38..9298181d3 100644 --- a/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java +++ b/java/src/test/java/org/forstdb/MutableDBOptionsTest.java @@ -2,10 +2,10 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; -import org.rocksdb.MutableDBOptions.MutableDBOptionsBuilder; +import org.forstdb.MutableDBOptions.MutableDBOptionsBuilder; import java.util.NoSuchElementException; diff --git a/java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java b/java/src/test/java/org/forstdb/MutableOptionsGetSetTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java rename to java/src/test/java/org/forstdb/MutableOptionsGetSetTest.java index 6db940619..4628dd417 100644 --- a/java/src/test/java/org/rocksdb/MutableOptionsGetSetTest.java +++ b/java/src/test/java/org/forstdb/MutableOptionsGetSetTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java b/java/src/test/java/org/forstdb/NativeComparatorWrapperTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java rename to java/src/test/java/org/forstdb/NativeComparatorWrapperTest.java index 1e0ded816..a5aaac158 100644 --- a/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java +++ b/java/src/test/java/org/forstdb/NativeComparatorWrapperTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.junit.Assert.assertEquals; diff --git a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java b/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java similarity index 95% rename from java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java rename to java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java index 6b954f67e..4a983ae64 100644 --- a/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java +++ b/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java @@ -2,12 +2,12 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.Environment; +import org.forstdb.util.Environment; import java.io.File; import java.io.IOException; diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java b/java/src/test/java/org/forstdb/OptimisticTransactionDBTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java rename to java/src/test/java/org/forstdb/OptimisticTransactionDBTest.java index 519b70b1d..20b5d442c 100644 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java +++ b/java/src/test/java/org/forstdb/OptimisticTransactionDBTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Rule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java b/java/src/test/java/org/forstdb/OptimisticTransactionOptionsTest.java similarity index 94% rename from java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java rename to java/src/test/java/org/forstdb/OptimisticTransactionOptionsTest.java index ef656b958..2190dc963 100644 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java +++ b/java/src/test/java/org/forstdb/OptimisticTransactionOptionsTest.java @@ -3,10 +3,10 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; -import org.rocksdb.util.BytewiseComparator; +import org.forstdb.util.BytewiseComparator; import java.util.Random; diff --git a/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java b/java/src/test/java/org/forstdb/OptimisticTransactionTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/OptimisticTransactionTest.java rename to java/src/test/java/org/forstdb/OptimisticTransactionTest.java index 4959d207b..c8a5f7997 100644 --- a/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java +++ b/java/src/test/java/org/forstdb/OptimisticTransactionTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.*; diff --git a/java/src/test/java/org/rocksdb/OptionsTest.java b/java/src/test/java/org/forstdb/OptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/OptionsTest.java rename to java/src/test/java/org/forstdb/OptionsTest.java index 4b59464b1..898aefc26 100644 --- a/java/src/test/java/org/rocksdb/OptionsTest.java +++ b/java/src/test/java/org/forstdb/OptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.*; @@ -13,7 +13,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.junit.ClassRule; import org.junit.Test; -import org.rocksdb.test.RemoveEmptyValueCompactionFilterFactory; +import org.forstdb.test.RemoveEmptyValueCompactionFilterFactory; public class OptionsTest { diff --git a/java/src/test/java/org/rocksdb/OptionsUtilTest.java b/java/src/test/java/org/forstdb/OptionsUtilTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/OptionsUtilTest.java rename to java/src/test/java/org/forstdb/OptionsUtilTest.java index 23949ac06..f9725efd7 100644 --- a/java/src/test/java/org/rocksdb/OptionsUtilTest.java +++ b/java/src/test/java/org/forstdb/OptionsUtilTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/PerfContextTest.java b/java/src/test/java/org/forstdb/PerfContextTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/PerfContextTest.java rename to java/src/test/java/org/forstdb/PerfContextTest.java index 3145b59e4..84bf3fa92 100644 --- a/java/src/test/java/org/rocksdb/PerfContextTest.java +++ b/java/src/test/java/org/forstdb/PerfContextTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/PerfLevelTest.java b/java/src/test/java/org/forstdb/PerfLevelTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/PerfLevelTest.java rename to java/src/test/java/org/forstdb/PerfLevelTest.java index bb766cbd4..d3c8d6cc0 100644 --- a/java/src/test/java/org/rocksdb/PerfLevelTest.java +++ b/java/src/test/java/org/forstdb/PerfLevelTest.java @@ -3,11 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.rocksdb.PerfLevel.*; +import static org.forstdb.PerfLevel.*; import java.util.ArrayList; import java.util.Arrays; diff --git a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java b/java/src/test/java/org/forstdb/PlainTableConfigTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/PlainTableConfigTest.java rename to java/src/test/java/org/forstdb/PlainTableConfigTest.java index 827eb79f9..dca2c4777 100644 --- a/java/src/test/java/org/rocksdb/PlainTableConfigTest.java +++ b/java/src/test/java/org/forstdb/PlainTableConfigTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/PlatformRandomHelper.java b/java/src/test/java/org/forstdb/PlatformRandomHelper.java similarity index 98% rename from java/src/test/java/org/rocksdb/PlatformRandomHelper.java rename to java/src/test/java/org/forstdb/PlatformRandomHelper.java index 80ea4d197..ca1dbe7b7 100644 --- a/java/src/test/java/org/rocksdb/PlatformRandomHelper.java +++ b/java/src/test/java/org/forstdb/PlatformRandomHelper.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Random; diff --git a/java/src/test/java/org/rocksdb/PutCFVariantsTest.java b/java/src/test/java/org/forstdb/PutCFVariantsTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/PutCFVariantsTest.java rename to java/src/test/java/org/forstdb/PutCFVariantsTest.java index 977c74dc8..a76dd975f 100644 --- a/java/src/test/java/org/rocksdb/PutCFVariantsTest.java +++ b/java/src/test/java/org/forstdb/PutCFVariantsTest.java @@ -3,11 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.MergeTest.longFromByteArray; -import static org.rocksdb.MergeTest.longToByteArray; +import static org.forstdb.MergeTest.longFromByteArray; +import static org.forstdb.MergeTest.longToByteArray; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; diff --git a/java/src/test/java/org/rocksdb/PutMultiplePartsTest.java b/java/src/test/java/org/forstdb/PutMultiplePartsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/PutMultiplePartsTest.java rename to java/src/test/java/org/forstdb/PutMultiplePartsTest.java index 7835737ae..4846c2537 100644 --- a/java/src/test/java/org/rocksdb/PutMultiplePartsTest.java +++ b/java/src/test/java/org/forstdb/PutMultiplePartsTest.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/PutVariantsTest.java b/java/src/test/java/org/forstdb/PutVariantsTest.java similarity index 96% rename from java/src/test/java/org/rocksdb/PutVariantsTest.java rename to java/src/test/java/org/forstdb/PutVariantsTest.java index 2e0e9b9e3..ce6bbeeda 100644 --- a/java/src/test/java/org/rocksdb/PutVariantsTest.java +++ b/java/src/test/java/org/forstdb/PutVariantsTest.java @@ -3,11 +3,11 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.MergeTest.longFromByteArray; -import static org.rocksdb.MergeTest.longToByteArray; +import static org.forstdb.MergeTest.longFromByteArray; +import static org.forstdb.MergeTest.longToByteArray; import java.nio.ByteBuffer; import java.nio.charset.Charset; diff --git a/java/src/test/java/org/rocksdb/RateLimiterTest.java b/java/src/test/java/org/forstdb/RateLimiterTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/RateLimiterTest.java rename to java/src/test/java/org/forstdb/RateLimiterTest.java index e7d6e6c49..5e834bcc0 100644 --- a/java/src/test/java/org/rocksdb/RateLimiterTest.java +++ b/java/src/test/java/org/forstdb/RateLimiterTest.java @@ -2,13 +2,13 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.RateLimiter.*; +import static org.forstdb.RateLimiter.*; public class RateLimiterTest { diff --git a/java/src/test/java/org/rocksdb/ReadOnlyTest.java b/java/src/test/java/org/forstdb/ReadOnlyTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/ReadOnlyTest.java rename to java/src/test/java/org/forstdb/ReadOnlyTest.java index 99549b61b..573e8307b 100644 --- a/java/src/test/java/org/rocksdb/ReadOnlyTest.java +++ b/java/src/test/java/org/forstdb/ReadOnlyTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/ReadOptionsTest.java b/java/src/test/java/org/forstdb/ReadOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/ReadOptionsTest.java rename to java/src/test/java/org/forstdb/ReadOptionsTest.java index 1bc24b984..f7f6125d9 100644 --- a/java/src/test/java/org/rocksdb/ReadOptionsTest.java +++ b/java/src/test/java/org/forstdb/ReadOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.Random; diff --git a/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java b/java/src/test/java/org/forstdb/RocksDBExceptionTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/RocksDBExceptionTest.java rename to java/src/test/java/org/forstdb/RocksDBExceptionTest.java index d3bd4ece7..8fb9285d2 100644 --- a/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java +++ b/java/src/test/java/org/forstdb/RocksDBExceptionTest.java @@ -3,12 +3,12 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; -import org.rocksdb.Status.Code; -import org.rocksdb.Status.SubCode; +import org.forstdb.Status.Code; +import org.forstdb.Status.SubCode; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/forstdb/RocksDBTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/RocksDBTest.java rename to java/src/test/java/org/forstdb/RocksDBTest.java index 74e523c49..b5e6a1eb4 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/forstdb/RocksDBTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.*; import org.junit.rules.ExpectedException; diff --git a/java/src/test/java/org/rocksdb/RocksIteratorTest.java b/java/src/test/java/org/forstdb/RocksIteratorTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/RocksIteratorTest.java rename to java/src/test/java/org/forstdb/RocksIteratorTest.java index 90c635f58..34eb5e779 100644 --- a/java/src/test/java/org/rocksdb/RocksIteratorTest.java +++ b/java/src/test/java/org/forstdb/RocksIteratorTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; diff --git a/java/src/test/java/org/rocksdb/RocksMemEnvTest.java b/java/src/test/java/org/forstdb/RocksMemEnvTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/RocksMemEnvTest.java rename to java/src/test/java/org/forstdb/RocksMemEnvTest.java index 40b24ffa3..5f77ce3e3 100644 --- a/java/src/test/java/org/rocksdb/RocksMemEnvTest.java +++ b/java/src/test/java/org/forstdb/RocksMemEnvTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java b/java/src/test/java/org/forstdb/RocksNativeLibraryResource.java similarity index 95% rename from java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java rename to java/src/test/java/org/forstdb/RocksNativeLibraryResource.java index 6116f2f92..7f85047ac 100644 --- a/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java +++ b/java/src/test/java/org/forstdb/RocksNativeLibraryResource.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.rules.ExternalResource; diff --git a/java/src/test/java/org/rocksdb/SecondaryDBTest.java b/java/src/test/java/org/forstdb/SecondaryDBTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SecondaryDBTest.java rename to java/src/test/java/org/forstdb/SecondaryDBTest.java index 557d4a47d..99a76afbd 100644 --- a/java/src/test/java/org/rocksdb/SecondaryDBTest.java +++ b/java/src/test/java/org/forstdb/SecondaryDBTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/SliceTest.java b/java/src/test/java/org/forstdb/SliceTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SliceTest.java rename to java/src/test/java/org/forstdb/SliceTest.java index c65b01903..ec83cd419 100644 --- a/java/src/test/java/org/rocksdb/SliceTest.java +++ b/java/src/test/java/org/forstdb/SliceTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/SnapshotTest.java b/java/src/test/java/org/forstdb/SnapshotTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SnapshotTest.java rename to java/src/test/java/org/forstdb/SnapshotTest.java index 11f0d560a..b6f37ac55 100644 --- a/java/src/test/java/org/rocksdb/SnapshotTest.java +++ b/java/src/test/java/org/forstdb/SnapshotTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/SstFileManagerTest.java b/java/src/test/java/org/forstdb/SstFileManagerTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SstFileManagerTest.java rename to java/src/test/java/org/forstdb/SstFileManagerTest.java index 2e136e820..e1976134e 100644 --- a/java/src/test/java/org/rocksdb/SstFileManagerTest.java +++ b/java/src/test/java/org/forstdb/SstFileManagerTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/SstFileReaderTest.java b/java/src/test/java/org/forstdb/SstFileReaderTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SstFileReaderTest.java rename to java/src/test/java/org/forstdb/SstFileReaderTest.java index ef74b08a7..959558ffe 100644 --- a/java/src/test/java/org/rocksdb/SstFileReaderTest.java +++ b/java/src/test/java/org/forstdb/SstFileReaderTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; @@ -20,7 +20,7 @@ import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.rocksdb.util.ByteBufferAllocator; +import org.forstdb.util.ByteBufferAllocator; @RunWith(Parameterized.class) public class SstFileReaderTest { diff --git a/java/src/test/java/org/rocksdb/SstFileWriterTest.java b/java/src/test/java/org/forstdb/SstFileWriterTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SstFileWriterTest.java rename to java/src/test/java/org/forstdb/SstFileWriterTest.java index c0f4ed9f1..7e686eb35 100644 --- a/java/src/test/java/org/rocksdb/SstFileWriterTest.java +++ b/java/src/test/java/org/forstdb/SstFileWriterTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.fail; @@ -18,7 +18,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.BytewiseComparator; +import org.forstdb.util.BytewiseComparator; public class SstFileWriterTest { private static final String SST_FILE_NAME = "test.sst"; diff --git a/java/src/test/java/org/rocksdb/SstPartitionerTest.java b/java/src/test/java/org/forstdb/SstPartitionerTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/SstPartitionerTest.java rename to java/src/test/java/org/forstdb/SstPartitionerTest.java index 3ee739053..48b225d32 100644 --- a/java/src/test/java/org/rocksdb/SstPartitionerTest.java +++ b/java/src/test/java/org/forstdb/SstPartitionerTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java b/java/src/test/java/org/forstdb/StatisticsCollectorTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/StatisticsCollectorTest.java rename to java/src/test/java/org/forstdb/StatisticsCollectorTest.java index 36721c80d..20436b687 100644 --- a/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java +++ b/java/src/test/java/org/forstdb/StatisticsCollectorTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Collections; diff --git a/java/src/test/java/org/rocksdb/StatisticsTest.java b/java/src/test/java/org/forstdb/StatisticsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/StatisticsTest.java rename to java/src/test/java/org/forstdb/StatisticsTest.java index 269cc56a0..3e83fae2c 100644 --- a/java/src/test/java/org/rocksdb/StatisticsTest.java +++ b/java/src/test/java/org/forstdb/StatisticsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/StatsCallbackMock.java b/java/src/test/java/org/forstdb/StatsCallbackMock.java similarity index 96% rename from java/src/test/java/org/rocksdb/StatsCallbackMock.java rename to java/src/test/java/org/forstdb/StatsCallbackMock.java index c6a7294c9..24cafd018 100644 --- a/java/src/test/java/org/rocksdb/StatsCallbackMock.java +++ b/java/src/test/java/org/forstdb/StatsCallbackMock.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; public class StatsCallbackMock implements StatisticsCollectorCallback { public int tickerCallbackCount = 0; diff --git a/java/src/test/java/org/rocksdb/TableFilterTest.java b/java/src/test/java/org/forstdb/TableFilterTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/TableFilterTest.java rename to java/src/test/java/org/forstdb/TableFilterTest.java index 2bd3b1798..c9604f823 100644 --- a/java/src/test/java/org/rocksdb/TableFilterTest.java +++ b/java/src/test/java/org/forstdb/TableFilterTest.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import org.junit.Rule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/TimedEnvTest.java b/java/src/test/java/org/forstdb/TimedEnvTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/TimedEnvTest.java rename to java/src/test/java/org/forstdb/TimedEnvTest.java index 31bad2e2e..3134a131a 100644 --- a/java/src/test/java/org/rocksdb/TimedEnvTest.java +++ b/java/src/test/java/org/forstdb/TimedEnvTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java b/java/src/test/java/org/forstdb/TransactionDBOptionsTest.java similarity index 98% rename from java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java rename to java/src/test/java/org/forstdb/TransactionDBOptionsTest.java index 7eaa6b16c..303da19ec 100644 --- a/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java +++ b/java/src/test/java/org/forstdb/TransactionDBOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/TransactionDBTest.java b/java/src/test/java/org/forstdb/TransactionDBTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/TransactionDBTest.java rename to java/src/test/java/org/forstdb/TransactionDBTest.java index 56acb21c7..4e0e3848b 100644 --- a/java/src/test/java/org/rocksdb/TransactionDBTest.java +++ b/java/src/test/java/org/forstdb/TransactionDBTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Rule; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java b/java/src/test/java/org/forstdb/TransactionLogIteratorTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java rename to java/src/test/java/org/forstdb/TransactionLogIteratorTest.java index 3c4dff7bb..fd96c34ed 100644 --- a/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java +++ b/java/src/test/java/org/forstdb/TransactionLogIteratorTest.java @@ -1,5 +1,5 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/TransactionOptionsTest.java b/java/src/test/java/org/forstdb/TransactionOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/TransactionOptionsTest.java rename to java/src/test/java/org/forstdb/TransactionOptionsTest.java index add0439e0..1b8ccae54 100644 --- a/java/src/test/java/org/rocksdb/TransactionOptionsTest.java +++ b/java/src/test/java/org/forstdb/TransactionOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/TransactionTest.java b/java/src/test/java/org/forstdb/TransactionTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/TransactionTest.java rename to java/src/test/java/org/forstdb/TransactionTest.java index 03a6b4ff6..9f239788a 100644 --- a/java/src/test/java/org/rocksdb/TransactionTest.java +++ b/java/src/test/java/org/forstdb/TransactionTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/TtlDBTest.java b/java/src/test/java/org/forstdb/TtlDBTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/TtlDBTest.java rename to java/src/test/java/org/forstdb/TtlDBTest.java index ebf9e9eaa..6457a5e62 100644 --- a/java/src/test/java/org/rocksdb/TtlDBTest.java +++ b/java/src/test/java/org/forstdb/TtlDBTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; diff --git a/java/src/test/java/org/rocksdb/Types.java b/java/src/test/java/org/forstdb/Types.java similarity index 97% rename from java/src/test/java/org/rocksdb/Types.java rename to java/src/test/java/org/forstdb/Types.java index a6abdecbc..d9a0171c5 100644 --- a/java/src/test/java/org/rocksdb/Types.java +++ b/java/src/test/java/org/forstdb/Types.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; /** * Simple type conversion methods diff --git a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java b/java/src/test/java/org/forstdb/VerifyChecksumsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/VerifyChecksumsTest.java rename to java/src/test/java/org/forstdb/VerifyChecksumsTest.java index ddc2a456f..fbdffca29 100644 --- a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java +++ b/java/src/test/java/org/forstdb/VerifyChecksumsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java b/java/src/test/java/org/forstdb/WALRecoveryModeTest.java similarity index 96% rename from java/src/test/java/org/rocksdb/WALRecoveryModeTest.java rename to java/src/test/java/org/forstdb/WALRecoveryModeTest.java index 2a0133f6b..08ee946a9 100644 --- a/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java +++ b/java/src/test/java/org/forstdb/WALRecoveryModeTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/WalFilterTest.java b/java/src/test/java/org/forstdb/WalFilterTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/WalFilterTest.java rename to java/src/test/java/org/forstdb/WalFilterTest.java index 08bc6eef5..5792b60f8 100644 --- a/java/src/test/java/org/rocksdb/WalFilterTest.java +++ b/java/src/test/java/org/forstdb/WalFilterTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.ClassRule; import org.junit.Rule; @@ -16,8 +16,8 @@ import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.util.ByteUtil.bytes; -import static org.rocksdb.util.TestUtil.*; +import static org.forstdb.util.ByteUtil.bytes; +import static org.forstdb.util.TestUtil.*; public class WalFilterTest { diff --git a/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java b/java/src/test/java/org/forstdb/WriteBatchHandlerTest.java similarity index 91% rename from java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java rename to java/src/test/java/org/forstdb/WriteBatchHandlerTest.java index 2826b128f..6b101f980 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java +++ b/java/src/test/java/org/forstdb/WriteBatchHandlerTest.java @@ -3,18 +3,18 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import java.util.Arrays; import java.util.List; import org.junit.ClassRule; import org.junit.Test; -import org.rocksdb.util.CapturingWriteBatchHandler; -import org.rocksdb.util.CapturingWriteBatchHandler.Event; +import org.forstdb.util.CapturingWriteBatchHandler; +import org.forstdb.util.CapturingWriteBatchHandler.Event; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.*; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.*; public class WriteBatchHandlerTest { diff --git a/java/src/test/java/org/rocksdb/WriteBatchTest.java b/java/src/test/java/org/forstdb/WriteBatchTest.java similarity index 96% rename from java/src/test/java/org/rocksdb/WriteBatchTest.java rename to java/src/test/java/org/forstdb/WriteBatchTest.java index cc3ad26eb..d36944d05 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchTest.java +++ b/java/src/test/java/org/forstdb/WriteBatchTest.java @@ -6,16 +6,16 @@ // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.DELETE; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.DELETE_RANGE; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.LOG; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.MERGE; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.PUT; -import static org.rocksdb.util.CapturingWriteBatchHandler.Action.SINGLE_DELETE; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.DELETE; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.DELETE_RANGE; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.LOG; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.MERGE; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.PUT; +import static org.forstdb.util.CapturingWriteBatchHandler.Action.SINGLE_DELETE; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; @@ -23,9 +23,9 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.CapturingWriteBatchHandler; -import org.rocksdb.util.CapturingWriteBatchHandler.Event; -import org.rocksdb.util.WriteBatchGetter; +import org.forstdb.util.CapturingWriteBatchHandler; +import org.forstdb.util.CapturingWriteBatchHandler.Event; +import org.forstdb.util.WriteBatchGetter; /** * This class mimics the db/write_batch_test.cc diff --git a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java b/java/src/test/java/org/forstdb/WriteBatchThreadedTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java rename to java/src/test/java/org/forstdb/WriteBatchThreadedTest.java index 0321da3fa..ce76c69ae 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java +++ b/java/src/test/java/org/forstdb/WriteBatchThreadedTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import org.junit.After; import org.junit.Before; diff --git a/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java b/java/src/test/java/org/forstdb/WriteBatchWithIndexTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java rename to java/src/test/java/org/forstdb/WriteBatchWithIndexTest.java index b0a0cdc0e..3a6ccf0e9 100644 --- a/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java +++ b/java/src/test/java/org/forstdb/WriteBatchWithIndexTest.java @@ -7,7 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -package org.rocksdb; +package org.forstdb; import static java.nio.charset.StandardCharsets.UTF_8; import static org.assertj.core.api.Assertions.assertThat; @@ -20,7 +20,7 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.util.ByteBufferAllocator; +import org.forstdb.util.ByteBufferAllocator; public class WriteBatchWithIndexTest { diff --git a/java/src/test/java/org/rocksdb/WriteOptionsTest.java b/java/src/test/java/org/forstdb/WriteOptionsTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/WriteOptionsTest.java rename to java/src/test/java/org/forstdb/WriteOptionsTest.java index 1e1c93fb5..b4092e17a 100644 --- a/java/src/test/java/org/rocksdb/WriteOptionsTest.java +++ b/java/src/test/java/org/forstdb/WriteOptionsTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb; +package org.forstdb; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java b/java/src/test/java/org/forstdb/flink/FlinkEnvTest.java similarity index 92% rename from java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java rename to java/src/test/java/org/forstdb/flink/FlinkEnvTest.java index 5c7166557..805755637 100644 --- a/java/src/test/java/org/rocksdb/flink/FlinkEnvTest.java +++ b/java/src/test/java/org/forstdb/flink/FlinkEnvTest.java @@ -16,14 +16,14 @@ * limitations under the License. */ -package org.rocksdb.flink; +package org.forstdb.flink; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.EnvFlinkTestSuite; -import org.rocksdb.RocksNativeLibraryResource; +import org.forstdb.EnvFlinkTestSuite; +import org.forstdb.RocksNativeLibraryResource; /** * Unit test for env/flink/env_flink.cc. diff --git a/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java b/java/src/test/java/org/forstdb/test/RemoveEmptyValueCompactionFilterFactory.java similarity index 77% rename from java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java rename to java/src/test/java/org/forstdb/test/RemoveEmptyValueCompactionFilterFactory.java index c4e4f25a0..b93346f8a 100644 --- a/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java +++ b/java/src/test/java/org/forstdb/test/RemoveEmptyValueCompactionFilterFactory.java @@ -1,9 +1,9 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.test; +package org.forstdb.test; -import org.rocksdb.AbstractCompactionFilter; -import org.rocksdb.AbstractCompactionFilterFactory; -import org.rocksdb.RemoveEmptyValueCompactionFilter; +import org.forstdb.AbstractCompactionFilter; +import org.forstdb.AbstractCompactionFilterFactory; +import org.forstdb.RemoveEmptyValueCompactionFilter; /** * Simple CompactionFilterFactory class used in tests. Generates RemoveEmptyValueCompactionFilters. diff --git a/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java b/java/src/test/java/org/forstdb/test/RocksJunitRunner.java similarity index 97% rename from java/src/test/java/org/rocksdb/test/RocksJunitRunner.java rename to java/src/test/java/org/forstdb/test/RocksJunitRunner.java index 42d3148ef..81f9cb3a6 100644 --- a/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java +++ b/java/src/test/java/org/forstdb/test/RocksJunitRunner.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.test; +package org.forstdb.test; import org.junit.internal.JUnitSystem; import org.junit.internal.RealSystem; @@ -11,7 +11,7 @@ import org.junit.runner.JUnitCore; import org.junit.runner.Result; import org.junit.runner.notification.Failure; -import org.rocksdb.RocksDB; +import org.forstdb.RocksDB; import java.io.PrintStream; import java.text.DecimalFormat; @@ -19,7 +19,7 @@ import java.util.ArrayList; import java.util.List; -import static org.rocksdb.test.RocksJunitRunner.RocksJunitListener.Status.*; +import static org.forstdb.test.RocksJunitRunner.RocksJunitListener.Status.*; /** * Custom Junit Runner to print also Test classes diff --git a/java/src/test/java/org/rocksdb/test/TestableEventListener.java b/java/src/test/java/org/forstdb/test/TestableEventListener.java similarity index 90% rename from java/src/test/java/org/rocksdb/test/TestableEventListener.java rename to java/src/test/java/org/forstdb/test/TestableEventListener.java index 865ad5cf7..8aecc4688 100644 --- a/java/src/test/java/org/rocksdb/test/TestableEventListener.java +++ b/java/src/test/java/org/forstdb/test/TestableEventListener.java @@ -2,9 +2,9 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.test; +package org.forstdb.test; -import org.rocksdb.AbstractEventListener; +import org.forstdb.AbstractEventListener; public class TestableEventListener extends AbstractEventListener { public TestableEventListener() { diff --git a/java/src/test/java/org/rocksdb/util/ByteBufferAllocator.java b/java/src/test/java/org/forstdb/util/ByteBufferAllocator.java similarity index 94% rename from java/src/test/java/org/rocksdb/util/ByteBufferAllocator.java rename to java/src/test/java/org/forstdb/util/ByteBufferAllocator.java index 8d7956cf2..d8967a4fe 100644 --- a/java/src/test/java/org/rocksdb/util/ByteBufferAllocator.java +++ b/java/src/test/java/org/forstdb/util/ByteBufferAllocator.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import java.nio.ByteBuffer; diff --git a/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java b/java/src/test/java/org/forstdb/util/BytewiseComparatorIntTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java rename to java/src/test/java/org/forstdb/util/BytewiseComparatorIntTest.java index fb7239c92..a5bcd1ff4 100644 --- a/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java +++ b/java/src/test/java/org/forstdb/util/BytewiseComparatorIntTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -14,7 +14,7 @@ import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; +import org.forstdb.*; import java.nio.ByteBuffer; import java.nio.file.FileSystems; diff --git a/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java b/java/src/test/java/org/forstdb/util/BytewiseComparatorTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java rename to java/src/test/java/org/forstdb/util/BytewiseComparatorTest.java index 69f2c282b..6f84e1bcc 100644 --- a/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java +++ b/java/src/test/java/org/forstdb/util/BytewiseComparatorTest.java @@ -3,13 +3,13 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; -import org.rocksdb.*; +import org.forstdb.*; import java.io.IOException; import java.nio.ByteBuffer; @@ -18,7 +18,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.*; -import static org.rocksdb.util.ByteUtil.bytes; +import static org.forstdb.util.ByteUtil.bytes; /** * This is a direct port of various C++ diff --git a/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java b/java/src/test/java/org/forstdb/util/CapturingWriteBatchHandler.java similarity index 98% rename from java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java rename to java/src/test/java/org/forstdb/util/CapturingWriteBatchHandler.java index 8ea104332..29f39ab88 100644 --- a/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java +++ b/java/src/test/java/org/forstdb/util/CapturingWriteBatchHandler.java @@ -1,8 +1,8 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.util; +package org.forstdb.util; -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteBatch; +import org.forstdb.RocksDBException; +import org.forstdb.WriteBatch; import java.util.ArrayList; import java.util.Arrays; diff --git a/java/src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java b/java/src/test/java/org/forstdb/util/DirectByteBufferAllocator.java similarity index 95% rename from java/src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java rename to java/src/test/java/org/forstdb/util/DirectByteBufferAllocator.java index d26fb578b..b5ac81b26 100644 --- a/java/src/test/java/org/rocksdb/util/DirectByteBufferAllocator.java +++ b/java/src/test/java/org/forstdb/util/DirectByteBufferAllocator.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import java.nio.ByteBuffer; diff --git a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java b/java/src/test/java/org/forstdb/util/EnvironmentTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/util/EnvironmentTest.java rename to java/src/test/java/org/forstdb/util/EnvironmentTest.java index 5e5369217..dfb150d69 100644 --- a/java/src/test/java/org/rocksdb/util/EnvironmentTest.java +++ b/java/src/test/java/org/forstdb/util/EnvironmentTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import static org.assertj.core.api.Assertions.assertThat; diff --git a/java/src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java b/java/src/test/java/org/forstdb/util/HeapByteBufferAllocator.java similarity index 95% rename from java/src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java rename to java/src/test/java/org/forstdb/util/HeapByteBufferAllocator.java index ad6b8f6f4..46da1826b 100644 --- a/java/src/test/java/org/rocksdb/util/HeapByteBufferAllocator.java +++ b/java/src/test/java/org/forstdb/util/HeapByteBufferAllocator.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import java.nio.ByteBuffer; diff --git a/java/src/test/java/org/rocksdb/util/IntComparatorTest.java b/java/src/test/java/org/forstdb/util/IntComparatorTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/util/IntComparatorTest.java rename to java/src/test/java/org/forstdb/util/IntComparatorTest.java index dd3288513..f9c4e746d 100644 --- a/java/src/test/java/org/rocksdb/util/IntComparatorTest.java +++ b/java/src/test/java/org/forstdb/util/IntComparatorTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -14,7 +14,7 @@ import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; +import org.forstdb.*; import java.nio.ByteBuffer; import java.nio.file.*; diff --git a/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java b/java/src/test/java/org/forstdb/util/JNIComparatorTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/util/JNIComparatorTest.java rename to java/src/test/java/org/forstdb/util/JNIComparatorTest.java index a962b8d78..78e13f5fd 100644 --- a/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java +++ b/java/src/test/java/org/forstdb/util/JNIComparatorTest.java @@ -4,7 +4,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import org.junit.ClassRule; import org.junit.Rule; @@ -14,7 +14,7 @@ import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; +import org.forstdb.*; import java.io.IOException; import java.nio.ByteBuffer; diff --git a/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java b/java/src/test/java/org/forstdb/util/ReverseBytewiseComparatorIntTest.java similarity index 99% rename from java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java rename to java/src/test/java/org/forstdb/util/ReverseBytewiseComparatorIntTest.java index ca08d9de1..ebf98d11b 100644 --- a/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java +++ b/java/src/test/java/org/forstdb/util/ReverseBytewiseComparatorIntTest.java @@ -3,7 +3,7 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import org.junit.BeforeClass; import org.junit.ClassRule; @@ -14,7 +14,7 @@ import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -import org.rocksdb.*; +import org.forstdb.*; import java.nio.ByteBuffer; import java.nio.file.FileSystems; diff --git a/java/src/test/java/org/rocksdb/util/SizeUnitTest.java b/java/src/test/java/org/forstdb/util/SizeUnitTest.java similarity index 97% rename from java/src/test/java/org/rocksdb/util/SizeUnitTest.java rename to java/src/test/java/org/forstdb/util/SizeUnitTest.java index 990aa5f47..190e445d6 100644 --- a/java/src/test/java/org/rocksdb/util/SizeUnitTest.java +++ b/java/src/test/java/org/forstdb/util/SizeUnitTest.java @@ -2,7 +2,7 @@ // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import org.junit.Test; diff --git a/java/src/test/java/org/rocksdb/util/TestUtil.java b/java/src/test/java/org/forstdb/util/TestUtil.java similarity index 93% rename from java/src/test/java/org/rocksdb/util/TestUtil.java rename to java/src/test/java/org/forstdb/util/TestUtil.java index e4f490c8e..a84d97b92 100644 --- a/java/src/test/java/org/rocksdb/util/TestUtil.java +++ b/java/src/test/java/org/forstdb/util/TestUtil.java @@ -3,15 +3,15 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -package org.rocksdb.util; +package org.forstdb.util; import static java.nio.charset.StandardCharsets.UTF_8; import java.nio.ByteBuffer; import java.util.Random; -import org.rocksdb.CompactionPriority; -import org.rocksdb.Options; -import org.rocksdb.WALRecoveryMode; +import org.forstdb.CompactionPriority; +import org.forstdb.Options; +import org.forstdb.WALRecoveryMode; /** * General test utilities. diff --git a/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java b/java/src/test/java/org/forstdb/util/WriteBatchGetter.java similarity index 97% rename from java/src/test/java/org/rocksdb/util/WriteBatchGetter.java rename to java/src/test/java/org/forstdb/util/WriteBatchGetter.java index 2efa16473..3230eed62 100644 --- a/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java +++ b/java/src/test/java/org/forstdb/util/WriteBatchGetter.java @@ -1,8 +1,8 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -package org.rocksdb.util; +package org.forstdb.util; -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteBatch; +import org.forstdb.RocksDBException; +import org.forstdb.WriteBatch; import java.util.Arrays; diff --git a/logging/auto_roll_logger.cc b/logging/auto_roll_logger.cc index 9e9ad45ae..595e0d246 100644 --- a/logging/auto_roll_logger.cc +++ b/logging/auto_roll_logger.cc @@ -280,7 +280,7 @@ Status CreateLoggerFromOptions(const std::string& dbname, Env* env = options.env; std::string db_absolute_path; Status s = env->GetAbsolutePath(dbname, &db_absolute_path); - TEST_SYNC_POINT_CALLBACK("rocksdb::CreateLoggerFromOptions:AfterGetPath", &s); + TEST_SYNC_POINT_CALLBACK("forstdb::CreateLoggerFromOptions:AfterGetPath", &s); if (!s.ok()) { return s; } diff --git a/src.mk b/src.mk index e168fcd3e..1f596f12d 100644 --- a/src.mk +++ b/src.mk @@ -649,88 +649,88 @@ MICROBENCH_SOURCES = \ microbench/db_basic_bench.cc \ JNI_NATIVE_SOURCES = \ - java/rocksjni/backupenginejni.cc \ - java/rocksjni/backup_engine_options.cc \ - java/rocksjni/checkpoint.cc \ - java/rocksjni/clock_cache.cc \ - java/rocksjni/cache.cc \ - java/rocksjni/columnfamilyhandle.cc \ - java/rocksjni/compact_range_options.cc \ - java/rocksjni/compaction_filter.cc \ - java/rocksjni/compaction_filter_factory.cc \ - java/rocksjni/compaction_filter_factory_jnicallback.cc \ - java/rocksjni/compaction_job_info.cc \ - java/rocksjni/compaction_job_stats.cc \ - java/rocksjni/compaction_options.cc \ - java/rocksjni/compaction_options_fifo.cc \ - java/rocksjni/compaction_options_universal.cc \ - java/rocksjni/comparator.cc \ - java/rocksjni/comparatorjnicallback.cc \ - java/rocksjni/compression_options.cc \ - java/rocksjni/concurrent_task_limiter.cc \ - java/rocksjni/config_options.cc \ - java/rocksjni/export_import_files_metadatajni.cc \ - java/rocksjni/env.cc \ - java/rocksjni/env_flink.cc \ - java/rocksjni/env_flink_test_suite.cc \ - java/rocksjni/env_options.cc \ - java/rocksjni/event_listener.cc \ - java/rocksjni/event_listener_jnicallback.cc \ - java/rocksjni/import_column_family_options.cc \ - java/rocksjni/flink_compactionfilterjni.cc \ - java/rocksjni/ingest_external_file_options.cc \ - java/rocksjni/filter.cc \ - java/rocksjni/hyper_clock_cache.cc \ - java/rocksjni/iterator.cc \ - java/rocksjni/jni_perf_context.cc \ - java/rocksjni/jnicallback.cc \ - java/rocksjni/loggerjnicallback.cc \ - java/rocksjni/lru_cache.cc \ - java/rocksjni/memtablejni.cc \ - java/rocksjni/memory_util.cc \ - java/rocksjni/merge_operator.cc \ - java/rocksjni/native_comparator_wrapper_test.cc \ - java/rocksjni/optimistic_transaction_db.cc \ - java/rocksjni/optimistic_transaction_options.cc \ - java/rocksjni/options.cc \ - java/rocksjni/options_util.cc \ - java/rocksjni/persistent_cache.cc \ - java/rocksjni/ratelimiterjni.cc \ - java/rocksjni/remove_emptyvalue_compactionfilterjni.cc \ - java/rocksjni/cassandra_compactionfilterjni.cc \ - java/rocksjni/cassandra_value_operator.cc \ - java/rocksjni/restorejni.cc \ - java/rocksjni/rocks_callback_object.cc \ - java/rocksjni/rocksjni.cc \ - java/rocksjni/rocksdb_exception_test.cc \ - java/rocksjni/slice.cc \ - java/rocksjni/snapshot.cc \ - java/rocksjni/sst_file_manager.cc \ - java/rocksjni/sst_file_writerjni.cc \ - java/rocksjni/sst_file_readerjni.cc \ - java/rocksjni/sst_file_reader_iterator.cc \ - java/rocksjni/sst_partitioner.cc \ - java/rocksjni/statistics.cc \ - java/rocksjni/statisticsjni.cc \ - java/rocksjni/table.cc \ - java/rocksjni/table_filter.cc \ - java/rocksjni/table_filter_jnicallback.cc \ - java/rocksjni/thread_status.cc \ - java/rocksjni/trace_writer.cc \ - java/rocksjni/trace_writer_jnicallback.cc \ - java/rocksjni/transaction.cc \ - java/rocksjni/transaction_db.cc \ - java/rocksjni/transaction_options.cc \ - java/rocksjni/transaction_db_options.cc \ - java/rocksjni/transaction_log.cc \ - java/rocksjni/transaction_notifier.cc \ - java/rocksjni/transaction_notifier_jnicallback.cc \ - java/rocksjni/ttl.cc \ - java/rocksjni/testable_event_listener.cc \ - java/rocksjni/wal_filter.cc \ - java/rocksjni/wal_filter_jnicallback.cc \ - java/rocksjni/write_batch.cc \ - java/rocksjni/writebatchhandlerjnicallback.cc \ - java/rocksjni/write_batch_test.cc \ - java/rocksjni/write_batch_with_index.cc \ - java/rocksjni/write_buffer_manager.cc + java/forstjni/backupenginejni.cc \ + java/forstjni/backup_engine_options.cc \ + java/forstjni/checkpoint.cc \ + java/forstjni/clock_cache.cc \ + java/forstjni/cache.cc \ + java/forstjni/columnfamilyhandle.cc \ + java/forstjni/compact_range_options.cc \ + java/forstjni/compaction_filter.cc \ + java/forstjni/compaction_filter_factory.cc \ + java/forstjni/compaction_filter_factory_jnicallback.cc \ + java/forstjni/compaction_job_info.cc \ + java/forstjni/compaction_job_stats.cc \ + java/forstjni/compaction_options.cc \ + java/forstjni/compaction_options_fifo.cc \ + java/forstjni/compaction_options_universal.cc \ + java/forstjni/comparator.cc \ + java/forstjni/comparatorjnicallback.cc \ + java/forstjni/compression_options.cc \ + java/forstjni/concurrent_task_limiter.cc \ + java/forstjni/config_options.cc \ + java/forstjni/export_import_files_metadatajni.cc \ + java/forstjni/env.cc \ + java/forstjni/env_flink.cc \ + java/forstjni/env_flink_test_suite.cc \ + java/forstjni/env_options.cc \ + java/forstjni/event_listener.cc \ + java/forstjni/event_listener_jnicallback.cc \ + java/forstjni/import_column_family_options.cc \ + java/forstjni/flink_compactionfilterjni.cc \ + java/forstjni/ingest_external_file_options.cc \ + java/forstjni/filter.cc \ + java/forstjni/hyper_clock_cache.cc \ + java/forstjni/iterator.cc \ + java/forstjni/jni_perf_context.cc \ + java/forstjni/jnicallback.cc \ + java/forstjni/loggerjnicallback.cc \ + java/forstjni/lru_cache.cc \ + java/forstjni/memtablejni.cc \ + java/forstjni/memory_util.cc \ + java/forstjni/merge_operator.cc \ + java/forstjni/native_comparator_wrapper_test.cc \ + java/forstjni/optimistic_transaction_db.cc \ + java/forstjni/optimistic_transaction_options.cc \ + java/forstjni/options.cc \ + java/forstjni/options_util.cc \ + java/forstjni/persistent_cache.cc \ + java/forstjni/ratelimiterjni.cc \ + java/forstjni/remove_emptyvalue_compactionfilterjni.cc \ + java/forstjni/cassandra_compactionfilterjni.cc \ + java/forstjni/cassandra_value_operator.cc \ + java/forstjni/restorejni.cc \ + java/forstjni/rocks_callback_object.cc \ + java/forstjni/rocksjni.cc \ + java/forstjni/rocksdb_exception_test.cc \ + java/forstjni/slice.cc \ + java/forstjni/snapshot.cc \ + java/forstjni/sst_file_manager.cc \ + java/forstjni/sst_file_writerjni.cc \ + java/forstjni/sst_file_readerjni.cc \ + java/forstjni/sst_file_reader_iterator.cc \ + java/forstjni/sst_partitioner.cc \ + java/forstjni/statistics.cc \ + java/forstjni/statisticsjni.cc \ + java/forstjni/table.cc \ + java/forstjni/table_filter.cc \ + java/forstjni/table_filter_jnicallback.cc \ + java/forstjni/thread_status.cc \ + java/forstjni/trace_writer.cc \ + java/forstjni/trace_writer_jnicallback.cc \ + java/forstjni/transaction.cc \ + java/forstjni/transaction_db.cc \ + java/forstjni/transaction_options.cc \ + java/forstjni/transaction_db_options.cc \ + java/forstjni/transaction_log.cc \ + java/forstjni/transaction_notifier.cc \ + java/forstjni/transaction_notifier_jnicallback.cc \ + java/forstjni/ttl.cc \ + java/forstjni/testable_event_listener.cc \ + java/forstjni/wal_filter.cc \ + java/forstjni/wal_filter_jnicallback.cc \ + java/forstjni/write_batch.cc \ + java/forstjni/writebatchhandlerjnicallback.cc \ + java/forstjni/write_batch_test.cc \ + java/forstjni/write_batch_with_index.cc \ + java/forstjni/write_buffer_manager.cc From 44ac6d8d6888a2ebc0f46c522b25389a9621fc9e Mon Sep 17 00:00:00 2001 From: fredia Date: Thu, 26 Sep 2024 15:41:25 +0800 Subject: [PATCH 380/386] [build] Fix platform-related codes --- CMakeLists.txt | 10 +++++----- env/flink/jvm_util.cc | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 15f2d133d..f30d92d85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -727,6 +727,10 @@ set(SOURCES env/env_encryption.cc env/file_system.cc env/file_system_tracer.cc + env/flink/env_flink.cc + env/flink/jvm_util.cc + env/flink/jni_helper.cc + env/flink/env_flink_test_suite.cc env/fs_remap.cc env/mock_env.cc env/unique_id_gen.cc @@ -1019,11 +1023,7 @@ else() port/port_posix.cc env/env_posix.cc env/fs_posix.cc - env/io_posix.cc - env/flink/env_flink.cc - env/flink/jvm_util.cc - env/flink/jni_helper.cc - env/flink/env_flink_test_suite.cc) + env/io_posix.cc) endif() if(USE_FOLLY_LITE) diff --git a/env/flink/jvm_util.cc b/env/flink/jvm_util.cc index ecd6f9677..ab5cc9663 100644 --- a/env/flink/jvm_util.cc +++ b/env/flink/jvm_util.cc @@ -18,14 +18,14 @@ #include "env/flink/jvm_util.h" -#define UNUSED(x) (void)(x) +#define UNUSED_JNI_PARAMETER(x) (void)(x) namespace ROCKSDB_NAMESPACE { std::atomic jvm_ = std::atomic(nullptr); JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) { - UNUSED(reserved); + UNUSED_JNI_PARAMETER(reserved); JNIEnv* env = nullptr; if (vm->GetEnv((void**)&env, JNI_VERSION_1_8) != JNI_OK) { return -1; @@ -36,8 +36,8 @@ JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM* vm, void* reserved) { } JNIEXPORT void JNICALL JNI_OnUnload(JavaVM* vm, void* reserved) { - UNUSED(vm); - UNUSED(reserved); + UNUSED_JNI_PARAMETER(vm); + UNUSED_JNI_PARAMETER(reserved); jvm_.store(nullptr); } From fcb30886c2bd1939c9121916d6e7edf21c5c8c97 Mon Sep 17 00:00:00 2001 From: fredia Date: Thu, 26 Sep 2024 18:47:46 +0800 Subject: [PATCH 381/386] [FLINK-35928][build] Rename jclass to forst in portal.h --- java/forstjni/portal.h | 218 ++++++++++++++++++++--------------------- 1 file changed, 109 insertions(+), 109 deletions(-) diff --git a/java/forstjni/portal.h b/java/forstjni/portal.h index 1edb9a0f3..2be3f949f 100644 --- a/java/forstjni/portal.h +++ b/java/forstjni/portal.h @@ -235,7 +235,7 @@ class CodeJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/Status$Code"); + return JavaClass::getJClass(env, "org/forstdb/Status$Code"); } /** @@ -272,7 +272,7 @@ class SubCodeJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/Status$SubCode"); + return JavaClass::getJClass(env, "org/forstdb/Status$SubCode"); } /** @@ -336,7 +336,7 @@ class StatusJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Status"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/Status"); } /** @@ -355,7 +355,7 @@ class StatusJni } static jmethodID mid = - env->GetMethodID(jclazz, "getCode", "()Lorg/rocksdb/Status$Code;"); + env->GetMethodID(jclazz, "getCode", "()Lorg/forstdb/Status$Code;"); assert(mid != nullptr); return mid; } @@ -376,7 +376,7 @@ class StatusJni } static jmethodID mid = env->GetMethodID(jclazz, "getSubCode", - "()Lorg/rocksdb/Status$SubCode;"); + "()Lorg/forstdb/Status$SubCode;"); assert(mid != nullptr); return mid; } @@ -745,7 +745,7 @@ class RocksDBExceptionJni : public JavaException { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaException::getJClass(env, "org/rocksdb/RocksDBException"); + return JavaException::getJClass(env, "org/forstdb/RocksDBException"); } /** @@ -801,7 +801,7 @@ class RocksDBExceptionJni : public JavaException { // get the constructor of org.rocksdb.RocksDBException jmethodID mid = - env->GetMethodID(jclazz, "", "(Lorg/rocksdb/Status;)V"); + env->GetMethodID(jclazz, "", "(Lorg/forstdb/Status;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError std::cerr @@ -892,7 +892,7 @@ class RocksDBExceptionJni : public JavaException { // get the constructor of org.rocksdb.RocksDBException jmethodID mid = env->GetMethodID( - jclazz, "", "(Ljava/lang/String;Lorg/rocksdb/Status;)V"); + jclazz, "", "(Ljava/lang/String;Lorg/forstdb/Status;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError std::cerr @@ -990,7 +990,7 @@ class RocksDBExceptionJni : public JavaException { } static jmethodID mid = - env->GetMethodID(jclazz, "getStatus", "()Lorg/rocksdb/Status;"); + env->GetMethodID(jclazz, "getStatus", "()Lorg/forstdb/Status;"); assert(mid != nullptr); return mid; } @@ -2810,7 +2810,7 @@ class RocksDBJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksDB"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/RocksDB"); } }; @@ -2828,7 +2828,7 @@ class OptionsJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Options"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/Options"); } }; @@ -2846,7 +2846,7 @@ class DBOptionsJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/DBOptions"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/DBOptions"); } }; @@ -2866,7 +2866,7 @@ class ColumnFamilyOptionsJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, - "org/rocksdb/ColumnFamilyOptions"); + "org/forstdb/ColumnFamilyOptions"); } /** @@ -2918,7 +2918,7 @@ class WriteOptionsJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteOptions"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/WriteOptions"); } }; @@ -2937,7 +2937,7 @@ class ReadOptionsJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/ReadOptions"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/ReadOptions"); } }; @@ -2955,7 +2955,7 @@ class WriteBatchJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/WriteBatch"); } /** @@ -3005,7 +3005,7 @@ class WriteBatchHandlerJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/WriteBatch$Handler"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/WriteBatch$Handler"); } /** @@ -3402,7 +3402,7 @@ class WriteBatchSavePointJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WriteBatch$SavePoint"); + return JavaClass::getJClass(env, "org/forstdb/WriteBatch$SavePoint"); } /** @@ -3476,7 +3476,7 @@ class WriteBatchWithIndexJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, - "org/rocksdb/WriteBatchWithIndex"); + "org/forstdb/WriteBatchWithIndex"); } }; @@ -3493,7 +3493,7 @@ class HistogramDataJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/HistogramData"); + return JavaClass::getJClass(env, "org/forstdb/HistogramData"); } /** @@ -3533,7 +3533,7 @@ class BackupEngineOptionsJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, - "org/rocksdb/BackupEngineOptions"); + "org/forstdb/BackupEngineOptions"); } }; @@ -3552,7 +3552,7 @@ class BackupEngineJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/BackupEngine"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/BackupEngine"); } }; @@ -3570,7 +3570,7 @@ class IteratorJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/RocksIterator"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/RocksIterator"); } }; @@ -3596,7 +3596,7 @@ class FilterPolicyJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/FilterPolicy"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/FilterPolicy"); } static jbyte toJavaIndexType(const FilterPolicyTypeJni& filter_policy_type) { @@ -3640,7 +3640,7 @@ class ColumnFamilyHandleJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/ColumnFamilyHandle"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/ColumnFamilyHandle"); } }; @@ -3659,7 +3659,7 @@ class FlushOptionsJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/FlushOptions"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/FlushOptions"); } }; @@ -3679,7 +3679,7 @@ class ComparatorOptionsJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/ComparatorOptions"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/ComparatorOptions"); } }; @@ -3700,7 +3700,7 @@ class AbstractCompactionFilterFactoryJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass( - env, "org/rocksdb/AbstractCompactionFilterFactory"); + env, "org/forstdb/AbstractCompactionFilterFactory"); } /** @@ -3754,7 +3754,7 @@ class AbstractTransactionNotifierJni public: static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass( - env, "org/rocksdb/AbstractTransactionNotifier"); + env, "org/forstdb/AbstractTransactionNotifier"); } // Get the java method `snapshotCreated` @@ -3785,7 +3785,7 @@ class AbstractComparatorJniBridge : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/AbstractComparatorJniBridge"); + return JavaClass::getJClass(env, "org/forstdb/AbstractComparatorJniBridge"); } /** @@ -3800,7 +3800,7 @@ class AbstractComparatorJniBridge : public JavaClass { static jmethodID getCompareInternalMethodId(JNIEnv* env, jclass jclazz) { static jmethodID mid = env->GetStaticMethodID(jclazz, "compareInternal", - "(Lorg/rocksdb/AbstractComparator;Ljava/nio/" + "(Lorg/forstdb/AbstractComparator;Ljava/nio/" "ByteBuffer;ILjava/nio/ByteBuffer;I)I"); assert(mid != nullptr); return mid; @@ -3819,7 +3819,7 @@ class AbstractComparatorJniBridge : public JavaClass { jclass jclazz) { static jmethodID mid = env->GetStaticMethodID(jclazz, "findShortestSeparatorInternal", - "(Lorg/rocksdb/AbstractComparator;Ljava/nio/" + "(Lorg/forstdb/AbstractComparator;Ljava/nio/" "ByteBuffer;ILjava/nio/ByteBuffer;I)I"); assert(mid != nullptr); return mid; @@ -3838,7 +3838,7 @@ class AbstractComparatorJniBridge : public JavaClass { jclass jclazz) { static jmethodID mid = env->GetStaticMethodID( jclazz, "findShortSuccessorInternal", - "(Lorg/rocksdb/AbstractComparator;Ljava/nio/ByteBuffer;I)I"); + "(Lorg/forstdb/AbstractComparator;Ljava/nio/ByteBuffer;I)I"); assert(mid != nullptr); return mid; } @@ -3859,7 +3859,7 @@ class AbstractComparatorJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractComparator"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/AbstractComparator"); } /** @@ -3899,7 +3899,7 @@ class AbstractSliceJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractSlice"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/AbstractSlice"); } }; @@ -3918,7 +3918,7 @@ class SliceJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Slice"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/Slice"); } /** @@ -3966,7 +3966,7 @@ class DirectSliceJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/DirectSlice"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/DirectSlice"); } /** @@ -4012,7 +4012,7 @@ class BackupInfoJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/BackupInfo"); + return JavaClass::getJClass(env, "org/forstdb/BackupInfo"); } /** @@ -4156,7 +4156,7 @@ class WBWIRocksIteratorJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator"); + return JavaClass::getJClass(env, "org/forstdb/WBWIRocksIterator"); } /** @@ -4175,7 +4175,7 @@ class WBWIRocksIteratorJni : public JavaClass { } static jfieldID fid = env->GetFieldID( - jclazz, "entry", "Lorg/rocksdb/WBWIRocksIterator$WriteEntry;"); + jclazz, "entry", "Lorg/forstdb/WBWIRocksIterator$WriteEntry;"); assert(fid != nullptr); return fid; } @@ -4281,7 +4281,7 @@ class WriteTypeJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WBWIRocksIterator$WriteType"); + return JavaClass::getJClass(env, "org/forstdb/WBWIRocksIterator$WriteType"); } /** @@ -4301,7 +4301,7 @@ class WriteTypeJni : public JavaClass { } jfieldID jfid = env->GetStaticFieldID( - jclazz, name, "Lorg/rocksdb/WBWIRocksIterator$WriteType;"); + jclazz, name, "Lorg/forstdb/WBWIRocksIterator$WriteType;"); if (env->ExceptionCheck()) { // exception occurred while getting field return nullptr; @@ -4329,7 +4329,7 @@ class WriteEntryJni : public JavaClass { */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, - "org/rocksdb/WBWIRocksIterator$WriteEntry"); + "org/forstdb/WBWIRocksIterator$WriteEntry"); } }; @@ -4415,7 +4415,7 @@ class InfoLogLevelJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/InfoLogLevel"); + return JavaClass::getJClass(env, "org/forstdb/InfoLogLevel"); } /** @@ -4435,7 +4435,7 @@ class InfoLogLevelJni : public JavaClass { } jfieldID jfid = - env->GetStaticFieldID(jclazz, name, "Lorg/rocksdb/InfoLogLevel;"); + env->GetStaticFieldID(jclazz, name, "Lorg/forstdb/InfoLogLevel;"); if (env->ExceptionCheck()) { // exception occurred while getting field return nullptr; @@ -4455,7 +4455,7 @@ class LoggerJni std::shared_ptr*, LoggerJni> { public: /** - * Get the Java Class org/rocksdb/Logger + * Get the Java Class org/forstdb/Logger * * @param env A pointer to the Java environment * @@ -4464,7 +4464,7 @@ class LoggerJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/Logger"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/Logger"); } /** @@ -4483,7 +4483,7 @@ class LoggerJni } static jmethodID mid = env->GetMethodID( - jclazz, "log", "(Lorg/rocksdb/InfoLogLevel;Ljava/lang/String;)V"); + jclazz, "log", "(Lorg/forstdb/InfoLogLevel;Ljava/lang/String;)V"); assert(mid != nullptr); return mid; } @@ -4503,7 +4503,7 @@ class BatchResultJni : public JavaClass { */ static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass( - env, "org/rocksdb/TransactionLogIterator$BatchResult"); + env, "org/forstdb/TransactionLogIterator$BatchResult"); } /** @@ -6044,7 +6044,7 @@ class TransactionJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/Transaction"); + return JavaClass::getJClass(env, "org/forstdb/Transaction"); } /** @@ -6072,7 +6072,7 @@ class TransactionJni : public JavaClass { jmethodID mid = env->GetMethodID( jclazz, "newWaitingTransactions", - "(JLjava/lang/String;[J)Lorg/rocksdb/Transaction$WaitingTransactions;"); + "(JLjava/lang/String;[J)Lorg/forstdb/Transaction$WaitingTransactions;"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -6133,7 +6133,7 @@ class TransactionDBJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB"); + return JavaClass::getJClass(env, "org/forstdb/TransactionDB"); } /** @@ -6162,7 +6162,7 @@ class TransactionDBJni : public JavaClass { jmethodID mid = env->GetMethodID( jclazz, "newDeadlockInfo", - "(JJLjava/lang/String;Z)Lorg/rocksdb/TransactionDB$DeadlockInfo;"); + "(JJLjava/lang/String;Z)Lorg/forstdb/TransactionDB$DeadlockInfo;"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -6238,7 +6238,7 @@ class KeyLockInfoJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$KeyLockInfo"); + return JavaClass::getJClass(env, "org/forstdb/TransactionDB$KeyLockInfo"); } /** @@ -6309,7 +6309,7 @@ class DeadlockInfoJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockInfo"); + return JavaClass::getJClass(env, "org/forstdb/TransactionDB$DeadlockInfo"); } }; @@ -6326,7 +6326,7 @@ class DeadlockPathJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TransactionDB$DeadlockPath"); + return JavaClass::getJClass(env, "org/forstdb/TransactionDB$DeadlockPath"); } /** @@ -6384,14 +6384,14 @@ class AbstractTableFilterJni } static jmethodID mid = - env->GetMethodID(jclazz, "filter", "(Lorg/rocksdb/TableProperties;)Z"); + env->GetMethodID(jclazz, "filter", "(Lorg/forstdb/TableProperties;)Z"); assert(mid != nullptr); return mid; } private: static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFilter"); + return JavaClass::getJClass(env, "org/forstdb/TableFilter"); } }; @@ -6567,7 +6567,7 @@ class TablePropertiesJni : public JavaClass { private: static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableProperties"); + return JavaClass::getJClass(env, "org/forstdb/TableProperties"); } }; @@ -6583,7 +6583,7 @@ class ColumnFamilyDescriptorJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyDescriptor"); + return JavaClass::getJClass(env, "org/forstdb/ColumnFamilyDescriptor"); } /** @@ -6608,7 +6608,7 @@ class ColumnFamilyDescriptorJni : public JavaClass { } jmethodID mid = env->GetMethodID(jclazz, "", - "([BLorg/rocksdb/ColumnFamilyOptions;)V"); + "([BLorg/forstdb/ColumnFamilyOptions;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError env->DeleteLocalRef(jcf_name); @@ -6660,7 +6660,7 @@ class ColumnFamilyDescriptorJni : public JavaClass { } static jmethodID mid = env->GetMethodID( - jclazz, "columnFamilyOptions", "()Lorg/rocksdb/ColumnFamilyOptions;"); + jclazz, "columnFamilyOptions", "()Lorg/forstdb/ColumnFamilyOptions;"); assert(mid != nullptr); return mid; } @@ -7098,7 +7098,7 @@ class ThreadStatusJni : public JavaClass { * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ThreadStatus"); + return JavaClass::getJClass(env, "org/forstdb/ThreadStatus"); } /** @@ -7422,7 +7422,7 @@ class LogFileJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/LogFile"); + return JavaClass::getJClass(env, "org/forstdb/LogFile"); } }; @@ -7544,7 +7544,7 @@ class LiveFileMetaDataJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/LiveFileMetaData"); + return JavaClass::getJClass(env, "org/forstdb/LiveFileMetaData"); } }; @@ -7651,7 +7651,7 @@ class SstFileMetaDataJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/SstFileMetaData"); + return JavaClass::getJClass(env, "org/forstdb/SstFileMetaData"); } }; @@ -7675,7 +7675,7 @@ class LevelMetaDataJni : public JavaClass { } jmethodID mid = env->GetMethodID(jclazz, "", - "(IJ[Lorg/rocksdb/SstFileMetaData;)V"); + "(IJ[Lorg/forstdb/SstFileMetaData;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -7717,7 +7717,7 @@ class LevelMetaDataJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/LevelMetaData"); + return JavaClass::getJClass(env, "org/forstdb/LevelMetaData"); } }; @@ -7742,7 +7742,7 @@ class ColumnFamilyMetaDataJni : public JavaClass { } jmethodID mid = env->GetMethodID(jclazz, "", - "(JJ[B[Lorg/rocksdb/LevelMetaData;)V"); + "(JJ[B[Lorg/forstdb/LevelMetaData;)V"); if (mid == nullptr) { // exception thrown: NoSuchMethodException or OutOfMemoryError return nullptr; @@ -7796,7 +7796,7 @@ class ColumnFamilyMetaDataJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ColumnFamilyMetaData"); + return JavaClass::getJClass(env, "org/forstdb/ColumnFamilyMetaData"); } }; @@ -7817,7 +7817,7 @@ class AbstractTraceWriterJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, - "org/rocksdb/AbstractTraceWriter"); + "org/forstdb/AbstractTraceWriter"); } /** @@ -7896,7 +7896,7 @@ class AbstractWalFilterJni * OutOfMemoryError or ExceptionInInitializerError exceptions is thrown */ static jclass getJClass(JNIEnv* env) { - return RocksDBNativeClass::getJClass(env, "org/rocksdb/AbstractWalFilter"); + return RocksDBNativeClass::getJClass(env, "org/forstdb/AbstractWalFilter"); } /** @@ -8157,7 +8157,7 @@ class AbstractEventListenerJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, - "org/rocksdb/AbstractEventListener"); + "org/forstdb/AbstractEventListener"); } /** @@ -8171,7 +8171,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onFlushCompletedProxy", - "(JLorg/rocksdb/FlushJobInfo;)V"); + "(JLorg/forstdb/FlushJobInfo;)V"); assert(mid != nullptr); return mid; } @@ -8187,7 +8187,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onFlushBeginProxy", - "(JLorg/rocksdb/FlushJobInfo;)V"); + "(JLorg/forstdb/FlushJobInfo;)V"); assert(mid != nullptr); return mid; } @@ -8203,7 +8203,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onTableFileDeleted", "(Lorg/rocksdb/TableFileDeletionInfo;)V"); + jclazz, "onTableFileDeleted", "(Lorg/forstdb/TableFileDeletionInfo;)V"); assert(mid != nullptr); return mid; } @@ -8220,7 +8220,7 @@ class AbstractEventListenerJni assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onCompactionBeginProxy", - "(JLorg/rocksdb/CompactionJobInfo;)V"); + "(JLorg/forstdb/CompactionJobInfo;)V"); assert(mid != nullptr); return mid; } @@ -8237,7 +8237,7 @@ class AbstractEventListenerJni assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onCompactionCompletedProxy", - "(JLorg/rocksdb/CompactionJobInfo;)V"); + "(JLorg/forstdb/CompactionJobInfo;)V"); assert(mid != nullptr); return mid; } @@ -8253,7 +8253,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onTableFileCreated", "(Lorg/rocksdb/TableFileCreationInfo;)V"); + jclazz, "onTableFileCreated", "(Lorg/forstdb/TableFileCreationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8270,7 +8270,7 @@ class AbstractEventListenerJni assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onTableFileCreationStarted", - "(Lorg/rocksdb/TableFileCreationBriefInfo;)V"); + "(Lorg/forstdb/TableFileCreationBriefInfo;)V"); assert(mid != nullptr); return mid; } @@ -8286,7 +8286,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onMemTableSealed", - "(Lorg/rocksdb/MemTableInfo;)V"); + "(Lorg/forstdb/MemTableInfo;)V"); assert(mid != nullptr); return mid; } @@ -8304,7 +8304,7 @@ class AbstractEventListenerJni assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onColumnFamilyHandleDeletionStarted", - "(Lorg/rocksdb/ColumnFamilyHandle;)V"); + "(Lorg/forstdb/ColumnFamilyHandle;)V"); assert(mid != nullptr); return mid; } @@ -8321,7 +8321,7 @@ class AbstractEventListenerJni assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onExternalFileIngestedProxy", - "(JLorg/rocksdb/ExternalFileIngestionInfo;)V"); + "(JLorg/forstdb/ExternalFileIngestionInfo;)V"); assert(mid != nullptr); return mid; } @@ -8337,7 +8337,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onBackgroundErrorProxy", - "(BLorg/rocksdb/Status;)V"); + "(BLorg/forstdb/Status;)V"); assert(mid != nullptr); return mid; } @@ -8353,7 +8353,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onStallConditionsChanged", - "(Lorg/rocksdb/WriteStallInfo;)V"); + "(Lorg/forstdb/WriteStallInfo;)V"); assert(mid != nullptr); return mid; } @@ -8369,7 +8369,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileReadFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileReadFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8385,7 +8385,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileWriteFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileWriteFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8401,7 +8401,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileFlushFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileFlushFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8417,7 +8417,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileSyncFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileSyncFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8433,7 +8433,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileRangeSyncFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileRangeSyncFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8449,7 +8449,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileTruncateFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileTruncateFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8465,7 +8465,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID( - jclazz, "onFileCloseFinish", "(Lorg/rocksdb/FileOperationInfo;)V"); + jclazz, "onFileCloseFinish", "(Lorg/forstdb/FileOperationInfo;)V"); assert(mid != nullptr); return mid; } @@ -8497,7 +8497,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onErrorRecoveryBeginProxy", - "(BLorg/rocksdb/Status;)Z"); + "(BLorg/forstdb/Status;)Z"); assert(mid != nullptr); return mid; } @@ -8513,7 +8513,7 @@ class AbstractEventListenerJni jclass jclazz = getJClass(env); assert(jclazz != nullptr); static jmethodID mid = env->GetMethodID(jclazz, "onErrorRecoveryCompleted", - "(Lorg/rocksdb/Status;)V"); + "(Lorg/forstdb/Status;)V"); assert(mid != nullptr); return mid; } @@ -8567,7 +8567,7 @@ class FlushJobInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/FlushJobInfo"); + return JavaClass::getJClass(env, "org/forstdb/FlushJobInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { @@ -8613,13 +8613,13 @@ class TableFileDeletionInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFileDeletionInfo"); + return JavaClass::getJClass(env, "org/forstdb/TableFileDeletionInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { return env->GetMethodID( clazz, "", - "(Ljava/lang/String;Ljava/lang/String;ILorg/rocksdb/Status;)V"); + "(Ljava/lang/String;Ljava/lang/String;ILorg/forstdb/Status;)V"); } }; @@ -8637,7 +8637,7 @@ class CompactionJobInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/CompactionJobInfo"); + return JavaClass::getJClass(env, "org/forstdb/CompactionJobInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { @@ -8689,13 +8689,13 @@ class TableFileCreationInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFileCreationInfo"); + return JavaClass::getJClass(env, "org/forstdb/TableFileCreationInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { return env->GetMethodID( clazz, "", - "(JLorg/rocksdb/TableProperties;Lorg/rocksdb/Status;Ljava/lang/" + "(JLorg/forstdb/TableProperties;Lorg/forstdb/Status;Ljava/lang/" "String;Ljava/lang/String;Ljava/lang/String;IB)V"); } }; @@ -8729,7 +8729,7 @@ class TableFileCreationBriefInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/TableFileCreationBriefInfo"); + return JavaClass::getJClass(env, "org/forstdb/TableFileCreationBriefInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { @@ -8759,7 +8759,7 @@ class MemTableInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/MemTableInfo"); + return JavaClass::getJClass(env, "org/forstdb/MemTableInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { @@ -8806,13 +8806,13 @@ class ExternalFileIngestionInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/ExternalFileIngestionInfo"); + return JavaClass::getJClass(env, "org/forstdb/ExternalFileIngestionInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { return env->GetMethodID(clazz, "", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/" - "String;JLorg/rocksdb/TableProperties;)V"); + "String;JLorg/forstdb/TableProperties;)V"); } }; @@ -8834,7 +8834,7 @@ class WriteStallInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/WriteStallInfo"); + return JavaClass::getJClass(env, "org/forstdb/WriteStallInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { @@ -8867,12 +8867,12 @@ class FileOperationInfoJni : public JavaClass { } static jclass getJClass(JNIEnv* env) { - return JavaClass::getJClass(env, "org/rocksdb/FileOperationInfo"); + return JavaClass::getJClass(env, "org/forstdb/FileOperationInfo"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { return env->GetMethodID(clazz, "", - "(Ljava/lang/String;JJJJLorg/rocksdb/Status;)V"); + "(Ljava/lang/String;JJJJLorg/forstdb/Status;)V"); } }; @@ -8890,7 +8890,7 @@ class CompactRangeOptionsTimestampJni : public JavaClass { static jclass getJClass(JNIEnv* env) { return JavaClass::getJClass(env, - "org/rocksdb/CompactRangeOptions$Timestamp"); + "org/forstdb/CompactRangeOptions$Timestamp"); } static jmethodID getConstructorMethodId(JNIEnv* env, jclass clazz) { @@ -8914,7 +8914,7 @@ class BlockBasedTableOptionsJni */ static jclass getJClass(JNIEnv* env) { return RocksDBNativeClass::getJClass(env, - "org/rocksdb/BlockBasedTableConfig"); + "org/forstdb/BlockBasedTableConfig"); } /** From 3c86325400bcd2ff7154805beebd55b05d9d427a Mon Sep 17 00:00:00 2001 From: fredia Date: Fri, 27 Sep 2024 12:19:28 +0800 Subject: [PATCH 382/386] [FLINK-35928][build] Rename .so to forst --- Makefile | 80 +++++++++---------- java/Makefile | 28 +++---- .../java/org/forstdb/NativeLibraryLoader.java | 4 +- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/Makefile b/Makefile index bd636c840..c5d8f7155 100644 --- a/Makefile +++ b/Makefile @@ -2109,18 +2109,18 @@ ifneq ($(origin JNI_LIBC), undefined) JNI_LIBC_POSTFIX = -$(JNI_LIBC) endif -ifeq (,$(ROCKSDBJNILIB)) +ifeq (,$(FORSTDBJNILIB)) ifneq (,$(filter ppc% s390x arm64 aarch64 riscv64 sparc64 loongarch64, $(MACHINE))) - ROCKSDBJNILIB = librocksdbjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so + FORSTDBJNILIB = libforstdbjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so else - ROCKSDBJNILIB = librocksdbjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so + FORSTDBJNILIB = libforstdbjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so endif endif ROCKSDB_JAVA_VERSION ?= $(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH) -ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar -ROCKSDB_JAR_ALL = rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -ROCKSDB_JAVADOCS_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar -ROCKSDB_SOURCES_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-sources.jar +ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar +ROCKSDB_JAR_ALL = forstdbjni-$(ROCKSDB_JAVA_VERSION).jar +ROCKSDB_JAVADOCS_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar +ROCKSDB_SOURCES_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-sources.jar SHA256_CMD = sha256sum ZLIB_VER ?= 1.3 @@ -2141,16 +2141,16 @@ ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifeq ($(PLATFORM), OS_MACOSX) -ifeq (,$(findstring librocksdbjni-osx,$(ROCKSDBJNILIB))) +ifeq (,$(findstring libforstdbjni-osx,$(FORSTDBJNILIB))) ifeq ($(MACHINE),arm64) - ROCKSDBJNILIB = librocksdbjni-osx-arm64.jnilib + FORSTDBJNILIB = libforstdbjni-osx-arm64.jnilib else ifeq ($(MACHINE),x86_64) - ROCKSDBJNILIB = librocksdbjni-osx-x86_64.jnilib + FORSTDBJNILIB = libforstdbjni-osx-x86_64.jnilib else - ROCKSDBJNILIB = librocksdbjni-osx.jnilib + FORSTDBJNILIB = libforstdbjni-osx.jnilib endif endif - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-osx.jar + ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-osx.jar SHA256_CMD = openssl sha256 -r ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin @@ -2161,25 +2161,25 @@ endif ifeq ($(PLATFORM), OS_FREEBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/freebsd - ROCKSDBJNILIB = librocksdbjni-freebsd$(ARCH).so - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-freebsd$(ARCH).jar + FORSTDBJNILIB = libforstdbjni-freebsd$(ARCH).so + ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-freebsd$(ARCH).jar endif ifeq ($(PLATFORM), OS_SOLARIS) - ROCKSDBJNILIB = librocksdbjni-solaris$(ARCH).so - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar + FORSTDBJNILIB = libforstdbjni-solaris$(ARCH).so + ROCKSDB_JAR = forstdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/solaris SHA256_CMD = digest -a sha256 endif ifeq ($(PLATFORM), OS_AIX) JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/aix - ROCKSDBJNILIB = librocksdbjni-aix.so + FORSTDBJNILIB = libforstdbjni-aix.so EXTRACT_SOURCES = gunzip < TAR_GZ | tar xvf - SNAPPY_MAKE_TARGET = libsnappy.la endif ifeq ($(PLATFORM), OS_OPENBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/openbsd - ROCKSDBJNILIB = librocksdbjni-openbsd$(ARCH).so - ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar + FORSTDBJNILIB = libforstdbjni-openbsd$(ARCH).so + ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar endif export SHA256_CMD @@ -2281,14 +2281,14 @@ endif rocksdbjavastaticosx: rocksdbjavastaticosx_archs cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) librocksdbjni-osx-x86_64.jnilib librocksdbjni-osx-arm64.jnilib + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) libforstdbjni-osx-x86_64.jnilib libforstdbjni-osx-arm64.jnilib cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 rocksdbjavastaticosx_ub: rocksdbjavastaticosx_archs - cd java/target; lipo -create -output librocksdbjni-osx.jnilib librocksdbjni-osx-x86_64.jnilib librocksdbjni-osx-arm64.jnilib + cd java/target; lipo -create -output libforstdbjni-osx.jnilib libforstdbjni-osx-x86_64.jnilib libforstdbjni-osx-arm64.jnilib cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) librocksdbjni-osx.jnilib + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) libforstdbjni-osx.jnilib cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 @@ -2304,7 +2304,7 @@ endif $(MAKE) clean-rocks ARCHFLAG="-arch $*" $(MAKE) rocksdbjavastatic_deps ARCHFLAG="-arch $*" $(MAKE) rocksdbjavastatic_libobjects - ARCHFLAG="-arch $*" ROCKSDBJNILIB="librocksdbjni-osx-$*.jnilib" $(MAKE) rocksdbjavastatic_javalib + ARCHFLAG="-arch $*" FORSTDBJNILIB="libforstdbjni-osx-$*.jnilib" $(MAKE) rocksdbjavastatic_javalib ifeq ($(JAR_CMD),) ifneq ($(JAVA_HOME),) @@ -2315,18 +2315,18 @@ endif endif rocksdbjavastatic_javalib: cd java; $(MAKE) javalib - rm -f java/target/$(ROCKSDBJNILIB) + rm -f java/target/$(FORSTDBJNILIB) $(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC \ - -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) \ + -o ./java/target/$(FORSTDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) \ $(LIB_OBJECTS) $(COVERAGEFLAGS) \ $(JAVA_COMPRESSIONS) $(JAVA_STATIC_LDFLAGS) cd java/target;if [ "$(DEBUG_LEVEL)" == "0" ]; then \ - strip $(STRIPFLAGS) $(ROCKSDBJNILIB); \ + strip $(STRIPFLAGS) $(FORSTDBJNILIB); \ fi rocksdbjava_jar: cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(FORSTDBJNILIB) cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 @@ -2345,14 +2345,14 @@ rocksdbjavastatic_libobjects: $(LIB_OBJECTS) rocksdbjavastaticrelease: rocksdbjavastaticosx rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java/crossbuild && (vagrant destroy -f || true) && vagrant up linux32 && vagrant halt linux32 && vagrant up linux64 && vagrant halt linux64 && vagrant up linux64-musl && vagrant halt linux64-musl cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) libforstdbjni-*.so libforstdbjni-*.jnilib cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 rocksdbjavastaticreleasedocker: rocksdbjavastaticosx rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md jar -uf java/target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) librocksdbjni-*.so librocksdbjni-*.jnilib librocksdbjni-win64.dll + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) libforstdbjni-*.so libforstdbjni-*.jnilib libforstdbjni-win64.dll cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 @@ -2439,21 +2439,21 @@ rocksdbjavastaticpublishdocker: rocksdbjavastaticreleasedocker rocksdbjavastatic ROCKSDB_JAVA_RELEASE_CLASSIFIERS = javadoc sources linux64 linux32 linux64-musl linux32-musl osx win64 rocksdbjavastaticpublishcentral: rocksdbjavageneratepom - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -Dclassifier=$(classifier);) + mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -Dclassifier=$(classifier);) rocksdbjavageneratepom: cd java;cat pom.xml.template | sed 's/\$${ROCKSDB_JAVA_VERSION}/$(ROCKSDB_JAVA_VERSION)/' > pom.xml rocksdbjavastaticnexusbundlejar: rocksdbjavageneratepom openssl sha1 -r java/pom.xml | awk '{ print $$1 }' > java/target/pom.xml.sha1 - openssl sha1 -r java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar | awk '{ print $$1 }' > java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), openssl sha1 -r java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar | awk '{ print $$1 }' > java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1;) + openssl sha1 -r java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar | awk '{ print $$1 }' > java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), openssl sha1 -r java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar | awk '{ print $$1 }' > java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1;) gpg --yes --output java/target/pom.xml.asc -ab java/pom.xml - gpg --yes -ab java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), gpg --yes -ab java/target/rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar;) - $(JAR_CMD) cvf java/target/nexus-bundle-rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java pom.xml -C java/target pom.xml.sha1 -C java/target pom.xml.asc -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar.asc - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), $(JAR_CMD) uf java/target/nexus-bundle-rocksdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1 -C java/target rocksdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.asc;) + gpg --yes -ab java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), gpg --yes -ab java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar;) + $(JAR_CMD) cvf java/target/nexus-bundle-forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java pom.xml -C java/target pom.xml.sha1 -C java/target pom.xml.asc -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION).jar.asc + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), $(JAR_CMD) uf java/target/nexus-bundle-forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1 -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.asc;) # A version of each $(LIBOBJECTS) compiled with -fPIC @@ -2466,10 +2466,10 @@ ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif $(AM_V_GEN)cd java; $(MAKE) javalib; - $(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB) - $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/forstjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) + $(AM_V_at)rm -f ./java/target/$(FORSTDBJNILIB) + $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/forstjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(FORSTDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) $(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(ROCKSDBJNILIB) + $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(FORSTDBJNILIB) $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class $(AM_V_at)openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 diff --git a/java/Makefile b/java/Makefile index 7a6915cf0..66a039845 100644 --- a/java/Makefile +++ b/java/Makefile @@ -357,32 +357,32 @@ java: java-version sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBSample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni_not_found - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBSample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni_not_found + $(AM_V_at)@rm -rf /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni_not_found + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBSample /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni_not_found column_family_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBColumnFamilySample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBColumnFamilySample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBColumnFamilySample /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni transaction_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/TransactionSample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) TransactionSample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni + $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) TransactionSample /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni optimistic_transaction_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/OptimisticTransactionSample.java - $(AM_V_at)@rm -rf /tmp/rocksdbjni - $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) OptimisticTransactionSample /tmp/rocksdbjni - $(AM_V_at)@rm -rf /tmp/rocksdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni + $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) OptimisticTransactionSample /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstdbjni $(JAVA_TEST_LIBDIR): mkdir -p "$(JAVA_TEST_LIBDIR)" diff --git a/java/src/main/java/org/forstdb/NativeLibraryLoader.java b/java/src/main/java/org/forstdb/NativeLibraryLoader.java index 955ddc6bb..478119dcf 100644 --- a/java/src/main/java/org/forstdb/NativeLibraryLoader.java +++ b/java/src/main/java/org/forstdb/NativeLibraryLoader.java @@ -16,7 +16,7 @@ public class NativeLibraryLoader { private static final NativeLibraryLoader instance = new NativeLibraryLoader(); private static boolean initialized = false; - private static final String ROCKSDB_LIBRARY_NAME = "rocksdb"; + private static final String ROCKSDB_LIBRARY_NAME = "forstdb"; private static final String sharedLibraryName = Environment.getSharedLibraryName(ROCKSDB_LIBRARY_NAME); @@ -27,7 +27,7 @@ public class NativeLibraryLoader { Environment.getJniLibraryFileName(ROCKSDB_LIBRARY_NAME); private static final /* @Nullable */ String fallbackJniLibraryFileName = Environment.getFallbackJniLibraryFileName(ROCKSDB_LIBRARY_NAME); - private static final String tempFilePrefix = "librocksdbjni"; + private static final String tempFilePrefix = "libforstdbjni"; private static final String tempFileSuffix = Environment.getJniLibraryExtension(); /** From 98f5a1a8fe0eb5abd641f54ad207703049905cdd Mon Sep 17 00:00:00 2001 From: fredia Date: Fri, 27 Sep 2024 16:23:00 +0800 Subject: [PATCH 383/386] [FLINK-35928][build] break when loading library is interrupted --- java/src/main/java/org/forstdb/RocksDB.java | 6 ++++-- java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/org/forstdb/RocksDB.java b/java/src/main/java/org/forstdb/RocksDB.java index 76f74609a..ede573454 100644 --- a/java/src/main/java/org/forstdb/RocksDB.java +++ b/java/src/main/java/org/forstdb/RocksDB.java @@ -88,7 +88,8 @@ public static void loadLibrary() { try { Thread.sleep(10); } catch(final InterruptedException e) { - //ignore + throw new RuntimeException("Loading the RocksDB shared library is interrupted", + e); } } } @@ -150,7 +151,8 @@ public static void loadLibrary(final List paths) { try { Thread.sleep(10); } catch(final InterruptedException e) { - //ignore + throw new RuntimeException("Loading the RocksDB shared library is interrupted", + e); } } } diff --git a/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java b/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java index 4a983ae64..bf383c91f 100644 --- a/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java +++ b/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java @@ -25,7 +25,7 @@ public void tempFolder() throws IOException { NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( temporaryFolder.getRoot().getAbsolutePath()); final Path path = Paths.get(temporaryFolder.getRoot().getAbsolutePath(), - Environment.getJniLibraryFileName("rocksdb")); + Environment.getJniLibraryFileName("forstdb")); assertThat(Files.exists(path)).isTrue(); assertThat(Files.isReadable(path)).isTrue(); } From eef75e6ac77b1b45d399e43aebe2448ae3690883 Mon Sep 17 00:00:00 2001 From: fredia Date: Fri, 27 Sep 2024 18:03:33 +0800 Subject: [PATCH 384/386] [FLINK-35928][build] rename forstdbjni to forstjni --- Makefile | 80 +++++++++---------- java/Makefile | 28 +++---- java/crossbuild/build-linux-alpine.sh | 4 +- java/crossbuild/build-linux-centos.sh | 4 +- java/crossbuild/build-linux.sh | 4 +- java/crossbuild/build-win.bat | 4 +- java/crossbuild/docker-build-linux-alpine.sh | 2 +- java/crossbuild/docker-build-linux-centos.sh | 2 +- .../java/org/forstdb/NativeLibraryLoader.java | 4 +- .../org/forstdb/NativeLibraryLoaderTest.java | 2 +- 10 files changed, 67 insertions(+), 67 deletions(-) diff --git a/Makefile b/Makefile index c5d8f7155..b643f96a1 100644 --- a/Makefile +++ b/Makefile @@ -2109,18 +2109,18 @@ ifneq ($(origin JNI_LIBC), undefined) JNI_LIBC_POSTFIX = -$(JNI_LIBC) endif -ifeq (,$(FORSTDBJNILIB)) +ifeq (,$(FORSTJNILIB)) ifneq (,$(filter ppc% s390x arm64 aarch64 riscv64 sparc64 loongarch64, $(MACHINE))) - FORSTDBJNILIB = libforstdbjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so + FORSTJNILIB = libforstjni-linux-$(MACHINE)$(JNI_LIBC_POSTFIX).so else - FORSTDBJNILIB = libforstdbjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so + FORSTJNILIB = libforstjni-linux$(ARCH)$(JNI_LIBC_POSTFIX).so endif endif ROCKSDB_JAVA_VERSION ?= $(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH) -ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar -ROCKSDB_JAR_ALL = forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -ROCKSDB_JAVADOCS_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar -ROCKSDB_SOURCES_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-sources.jar +ROCKSDB_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar +ROCKSDB_JAR_ALL = forstjni-$(ROCKSDB_JAVA_VERSION).jar +ROCKSDB_JAVADOCS_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar +ROCKSDB_SOURCES_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-sources.jar SHA256_CMD = sha256sum ZLIB_VER ?= 1.3 @@ -2141,16 +2141,16 @@ ZSTD_DOWNLOAD_BASE ?= https://github.com/facebook/zstd/archive CURL_SSL_OPTS ?= --tlsv1 ifeq ($(PLATFORM), OS_MACOSX) -ifeq (,$(findstring libforstdbjni-osx,$(FORSTDBJNILIB))) +ifeq (,$(findstring libforstjni-osx,$(FORSTJNILIB))) ifeq ($(MACHINE),arm64) - FORSTDBJNILIB = libforstdbjni-osx-arm64.jnilib + FORSTJNILIB = libforstjni-osx-arm64.jnilib else ifeq ($(MACHINE),x86_64) - FORSTDBJNILIB = libforstdbjni-osx-x86_64.jnilib + FORSTJNILIB = libforstjni-osx-x86_64.jnilib else - FORSTDBJNILIB = libforstdbjni-osx.jnilib + FORSTJNILIB = libforstjni-osx.jnilib endif endif - ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-osx.jar + ROCKSDB_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-osx.jar SHA256_CMD = openssl sha256 -r ifneq ("$(wildcard $(JAVA_HOME)/include/darwin)","") JAVA_INCLUDE = -I$(JAVA_HOME)/include -I $(JAVA_HOME)/include/darwin @@ -2161,25 +2161,25 @@ endif ifeq ($(PLATFORM), OS_FREEBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/freebsd - FORSTDBJNILIB = libforstdbjni-freebsd$(ARCH).so - ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-freebsd$(ARCH).jar + FORSTJNILIB = libforstjni-freebsd$(ARCH).so + ROCKSDB_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-freebsd$(ARCH).jar endif ifeq ($(PLATFORM), OS_SOLARIS) - FORSTDBJNILIB = libforstdbjni-solaris$(ARCH).so - ROCKSDB_JAR = forstdbjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar + FORSTJNILIB = libforstjni-solaris$(ARCH).so + ROCKSDB_JAR = forstjni-$(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH)-solaris$(ARCH).jar JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/solaris SHA256_CMD = digest -a sha256 endif ifeq ($(PLATFORM), OS_AIX) JAVA_INCLUDE = -I$(JAVA_HOME)/include/ -I$(JAVA_HOME)/include/aix - FORSTDBJNILIB = libforstdbjni-aix.so + FORSTJNILIB = libforstjni-aix.so EXTRACT_SOURCES = gunzip < TAR_GZ | tar xvf - SNAPPY_MAKE_TARGET = libsnappy.la endif ifeq ($(PLATFORM), OS_OPENBSD) JAVA_INCLUDE = -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/openbsd - FORSTDBJNILIB = libforstdbjni-openbsd$(ARCH).so - ROCKSDB_JAR = forstdbjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar + FORSTJNILIB = libforstjni-openbsd$(ARCH).so + ROCKSDB_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar endif export SHA256_CMD @@ -2281,14 +2281,14 @@ endif rocksdbjavastaticosx: rocksdbjavastaticosx_archs cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) libforstdbjni-osx-x86_64.jnilib libforstdbjni-osx-arm64.jnilib + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) libforstjni-osx-x86_64.jnilib libforstjni-osx-arm64.jnilib cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 rocksdbjavastaticosx_ub: rocksdbjavastaticosx_archs - cd java/target; lipo -create -output libforstdbjni-osx.jnilib libforstdbjni-osx-x86_64.jnilib libforstdbjni-osx-arm64.jnilib + cd java/target; lipo -create -output libforstjni-osx.jnilib libforstjni-osx-x86_64.jnilib libforstjni-osx-arm64.jnilib cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) libforstdbjni-osx.jnilib + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) libforstjni-osx.jnilib cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 @@ -2304,7 +2304,7 @@ endif $(MAKE) clean-rocks ARCHFLAG="-arch $*" $(MAKE) rocksdbjavastatic_deps ARCHFLAG="-arch $*" $(MAKE) rocksdbjavastatic_libobjects - ARCHFLAG="-arch $*" FORSTDBJNILIB="libforstdbjni-osx-$*.jnilib" $(MAKE) rocksdbjavastatic_javalib + ARCHFLAG="-arch $*" FORSTJNILIB="libforstjni-osx-$*.jnilib" $(MAKE) rocksdbjavastatic_javalib ifeq ($(JAR_CMD),) ifneq ($(JAVA_HOME),) @@ -2315,18 +2315,18 @@ endif endif rocksdbjavastatic_javalib: cd java; $(MAKE) javalib - rm -f java/target/$(FORSTDBJNILIB) + rm -f java/target/$(FORSTJNILIB) $(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC \ - -o ./java/target/$(FORSTDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) \ + -o ./java/target/$(FORSTJNILIB) $(ALL_JNI_NATIVE_SOURCES) \ $(LIB_OBJECTS) $(COVERAGEFLAGS) \ $(JAVA_COMPRESSIONS) $(JAVA_STATIC_LDFLAGS) cd java/target;if [ "$(DEBUG_LEVEL)" == "0" ]; then \ - strip $(STRIPFLAGS) $(FORSTDBJNILIB); \ + strip $(STRIPFLAGS) $(FORSTJNILIB); \ fi rocksdbjava_jar: cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(FORSTDBJNILIB) + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(FORSTJNILIB) cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 @@ -2345,14 +2345,14 @@ rocksdbjavastatic_libobjects: $(LIB_OBJECTS) rocksdbjavastaticrelease: rocksdbjavastaticosx rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java/crossbuild && (vagrant destroy -f || true) && vagrant up linux32 && vagrant halt linux32 && vagrant up linux64 && vagrant halt linux64 && vagrant up linux64-musl && vagrant halt linux64-musl cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) libforstdbjni-*.so libforstdbjni-*.jnilib + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) libforstjni-*.so libforstjni-*.jnilib cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 rocksdbjavastaticreleasedocker: rocksdbjavastaticosx rocksdbjavastaticdockerx86 rocksdbjavastaticdockerx86_64 rocksdbjavastaticdockerx86musl rocksdbjavastaticdockerx86_64musl rocksdbjava_javadocs_jar rocksdbjava_sources_jar cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR_ALL) HISTORY*.md jar -uf java/target/$(ROCKSDB_JAR_ALL) HISTORY*.md - cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) libforstdbjni-*.so libforstdbjni-*.jnilib libforstdbjni-win64.dll + cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR_ALL) libforstjni-*.so libforstjni-*.jnilib libforstjni-win64.dll cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR_ALL) org/forstdb/*.class org/forstdb/util/*.class openssl sha1 java/target/$(ROCKSDB_JAR_ALL) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR_ALL).sha1 @@ -2439,21 +2439,21 @@ rocksdbjavastaticpublishdocker: rocksdbjavastaticreleasedocker rocksdbjavastatic ROCKSDB_JAVA_RELEASE_CLASSIFIERS = javadoc sources linux64 linux32 linux64-musl linux32-musl osx win64 rocksdbjavastaticpublishcentral: rocksdbjavageneratepom - mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -Dclassifier=$(classifier);) + mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/forstjni-$(ROCKSDB_JAVA_VERSION).jar + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), mvn gpg:sign-and-deploy-file -Durl=https://oss.sonatype.org/service/local/staging/deploy/maven2/ -DrepositoryId=sonatype-nexus-staging -DpomFile=java/pom.xml -Dfile=java/target/forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -Dclassifier=$(classifier);) rocksdbjavageneratepom: cd java;cat pom.xml.template | sed 's/\$${ROCKSDB_JAVA_VERSION}/$(ROCKSDB_JAVA_VERSION)/' > pom.xml rocksdbjavastaticnexusbundlejar: rocksdbjavageneratepom openssl sha1 -r java/pom.xml | awk '{ print $$1 }' > java/target/pom.xml.sha1 - openssl sha1 -r java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar | awk '{ print $$1 }' > java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), openssl sha1 -r java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar | awk '{ print $$1 }' > java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1;) + openssl sha1 -r java/target/forstjni-$(ROCKSDB_JAVA_VERSION).jar | awk '{ print $$1 }' > java/target/forstjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), openssl sha1 -r java/target/forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar | awk '{ print $$1 }' > java/target/forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1;) gpg --yes --output java/target/pom.xml.asc -ab java/pom.xml - gpg --yes -ab java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION).jar - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), gpg --yes -ab java/target/forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar;) - $(JAR_CMD) cvf java/target/nexus-bundle-forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java pom.xml -C java/target pom.xml.sha1 -C java/target pom.xml.asc -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION).jar.asc - $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), $(JAR_CMD) uf java/target/nexus-bundle-forstdbjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1 -C java/target forstdbjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.asc;) + gpg --yes -ab java/target/forstjni-$(ROCKSDB_JAVA_VERSION).jar + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), gpg --yes -ab java/target/forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar;) + $(JAR_CMD) cvf java/target/nexus-bundle-forstjni-$(ROCKSDB_JAVA_VERSION).jar -C java pom.xml -C java/target pom.xml.sha1 -C java/target pom.xml.asc -C java/target forstjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target forstjni-$(ROCKSDB_JAVA_VERSION).jar.sha1 -C java/target forstjni-$(ROCKSDB_JAVA_VERSION).jar.asc + $(foreach classifier, $(ROCKSDB_JAVA_RELEASE_CLASSIFIERS), $(JAR_CMD) uf java/target/nexus-bundle-forstjni-$(ROCKSDB_JAVA_VERSION).jar -C java/target forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar -C java/target forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.sha1 -C java/target forstjni-$(ROCKSDB_JAVA_VERSION)-$(classifier).jar.asc;) # A version of each $(LIBOBJECTS) compiled with -fPIC @@ -2466,10 +2466,10 @@ ifeq ($(JAVA_HOME),) $(error JAVA_HOME is not set) endif $(AM_V_GEN)cd java; $(MAKE) javalib; - $(AM_V_at)rm -f ./java/target/$(FORSTDBJNILIB) - $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/forstjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(FORSTDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) + $(AM_V_at)rm -f ./java/target/$(FORSTJNILIB) + $(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/forstjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(FORSTJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS) $(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md - $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(FORSTDBJNILIB) + $(AM_V_at)cd java/target; $(JAR_CMD) -uf $(ROCKSDB_JAR) $(FORSTJNILIB) $(AM_V_at)cd java/target/classes; $(JAR_CMD) -uf ../$(ROCKSDB_JAR) org/forstdb/*.class org/forstdb/util/*.class $(AM_V_at)openssl sha1 java/target/$(ROCKSDB_JAR) | sed 's/.*= \([0-9a-f]*\)/\1/' > java/target/$(ROCKSDB_JAR).sha1 diff --git a/java/Makefile b/java/Makefile index 66a039845..a73288a85 100644 --- a/java/Makefile +++ b/java/Makefile @@ -357,32 +357,32 @@ java: java-version sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBSample.java - $(AM_V_at)@rm -rf /tmp/forstdbjni - $(AM_V_at)@rm -rf /tmp/forstdbjni_not_found - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBSample /tmp/forstdbjni - $(AM_V_at)@rm -rf /tmp/forstdbjni - $(AM_V_at)@rm -rf /tmp/forstdbjni_not_found + $(AM_V_at)@rm -rf /tmp/forstjni + $(AM_V_at)@rm -rf /tmp/forstjni_not_found + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBSample /tmp/forstjni + $(AM_V_at)@rm -rf /tmp/forstjni + $(AM_V_at)@rm -rf /tmp/forstjni_not_found column_family_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/RocksDBColumnFamilySample.java - $(AM_V_at)@rm -rf /tmp/forstdbjni - $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBColumnFamilySample /tmp/forstdbjni - $(AM_V_at)@rm -rf /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstjni + $(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) RocksDBColumnFamilySample /tmp/forstjni + $(AM_V_at)@rm -rf /tmp/forstjni transaction_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/TransactionSample.java - $(AM_V_at)@rm -rf /tmp/forstdbjni - $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) TransactionSample /tmp/forstdbjni - $(AM_V_at)@rm -rf /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstjni + $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) TransactionSample /tmp/forstjni + $(AM_V_at)@rm -rf /tmp/forstjni optimistic_transaction_sample: java $(AM_V_GEN)mkdir -p $(SAMPLES_MAIN_CLASSES) $(AM_V_at)$(JAVAC_CMD) -cp $(MAIN_CLASSES) -d $(SAMPLES_MAIN_CLASSES) $(SAMPLES_MAIN_SRC)/OptimisticTransactionSample.java - $(AM_V_at)@rm -rf /tmp/forstdbjni - $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) OptimisticTransactionSample /tmp/forstdbjni - $(AM_V_at)@rm -rf /tmp/forstdbjni + $(AM_V_at)@rm -rf /tmp/forstjni + $(JAVA_CMD) -ea -Xcheck:jni -Djava.library.path=target -cp $(MAIN_CLASSES):$(SAMPLES_MAIN_CLASSES) OptimisticTransactionSample /tmp/forstjni + $(AM_V_at)@rm -rf /tmp/forstjni $(JAVA_TEST_LIBDIR): mkdir -p "$(JAVA_TEST_LIBDIR)" diff --git a/java/crossbuild/build-linux-alpine.sh b/java/crossbuild/build-linux-alpine.sh index 561d34141..646f9bff9 100755 --- a/java/crossbuild/build-linux-alpine.sh +++ b/java/crossbuild/build-linux-alpine.sh @@ -66,5 +66,5 @@ cd /tmp &&\ cd /rocksdb make jclean clean PORTABLE=1 make -j8 rocksdbjavastatic -cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build -cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build +cp /rocksdb/java/target/libforstjni-* /rocksdb-build +cp /rocksdb/java/target/forstjni-* /rocksdb-build diff --git a/java/crossbuild/build-linux-centos.sh b/java/crossbuild/build-linux-centos.sh index 176e3456c..e00729246 100755 --- a/java/crossbuild/build-linux-centos.sh +++ b/java/crossbuild/build-linux-centos.sh @@ -34,5 +34,5 @@ export PATH=$JAVA_HOME:/usr/local/bin:$PATH cd /rocksdb scl enable devtoolset-2 'make clean-not-downloaded' scl enable devtoolset-2 'PORTABLE=1 make -j8 rocksdbjavastatic' -cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build -cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build +cp /rocksdb/java/target/libforstjni-* /rocksdb-build +cp /rocksdb/java/target/forstjni-* /rocksdb-build diff --git a/java/crossbuild/build-linux.sh b/java/crossbuild/build-linux.sh index 74178adb5..34caa57ea 100755 --- a/java/crossbuild/build-linux.sh +++ b/java/crossbuild/build-linux.sh @@ -9,7 +9,7 @@ export JAVA_HOME=$(echo /usr/lib/jvm/java-7-openjdk*) cd /rocksdb make jclean clean make -j 4 rocksdbjavastatic -cp /rocksdb/java/target/librocksdbjni-* /rocksdb-build -cp /rocksdb/java/target/rocksdbjni-* /rocksdb-build +cp /rocksdb/java/target/libforstjni-* /rocksdb-build +cp /rocksdb/java/target/forstjni-* /rocksdb-build sudo shutdown -h now diff --git a/java/crossbuild/build-win.bat b/java/crossbuild/build-win.bat index 2925ec19a..d0bea9f80 100644 --- a/java/crossbuild/build-win.bat +++ b/java/crossbuild/build-win.bat @@ -12,5 +12,5 @@ cmake -G "Visual Studio 15 Win64" -DWITH_JNI=1 .. cd .. -copy build\java\Release\rocksdbjni-shared.dll librocksdbjni-win64.dll -echo Result is in librocksdbjni-win64.dll \ No newline at end of file +copy build\java\Release\forstjni-shared.dll libforstjni-win64.dll +echo Result is in libforstjni-win64.dll \ No newline at end of file diff --git a/java/crossbuild/docker-build-linux-alpine.sh b/java/crossbuild/docker-build-linux-alpine.sh index e3e852efe..fddef0ff1 100755 --- a/java/crossbuild/docker-build-linux-alpine.sh +++ b/java/crossbuild/docker-build-linux-alpine.sh @@ -14,4 +14,4 @@ cd /rocksdb-local-build make clean-not-downloaded PORTABLE=1 make -j2 rocksdbjavastatic -cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar java/target/rocksdbjni-*-linux*.jar.sha1 /rocksdb-java-target +cp java/target/libforstjni-linux*.so java/target/forstjni-*-linux*.jar java/target/forstjni-*-linux*.jar.sha1 /rocksdb-java-target diff --git a/java/crossbuild/docker-build-linux-centos.sh b/java/crossbuild/docker-build-linux-centos.sh index 16581dec7..30ab8c39e 100755 --- a/java/crossbuild/docker-build-linux-centos.sh +++ b/java/crossbuild/docker-build-linux-centos.sh @@ -34,5 +34,5 @@ else PORTABLE=1 make -j2 rocksdbjavastatic fi -cp java/target/librocksdbjni-linux*.so java/target/rocksdbjni-*-linux*.jar java/target/rocksdbjni-*-linux*.jar.sha1 /rocksdb-java-target +cp java/target/libforstjni-linux*.so java/target/forstjni-*-linux*.jar java/target/forstjni-*-linux*.jar.sha1 /rocksdb-java-target diff --git a/java/src/main/java/org/forstdb/NativeLibraryLoader.java b/java/src/main/java/org/forstdb/NativeLibraryLoader.java index 478119dcf..092588fba 100644 --- a/java/src/main/java/org/forstdb/NativeLibraryLoader.java +++ b/java/src/main/java/org/forstdb/NativeLibraryLoader.java @@ -16,7 +16,7 @@ public class NativeLibraryLoader { private static final NativeLibraryLoader instance = new NativeLibraryLoader(); private static boolean initialized = false; - private static final String ROCKSDB_LIBRARY_NAME = "forstdb"; + private static final String ROCKSDB_LIBRARY_NAME = "forst"; private static final String sharedLibraryName = Environment.getSharedLibraryName(ROCKSDB_LIBRARY_NAME); @@ -27,7 +27,7 @@ public class NativeLibraryLoader { Environment.getJniLibraryFileName(ROCKSDB_LIBRARY_NAME); private static final /* @Nullable */ String fallbackJniLibraryFileName = Environment.getFallbackJniLibraryFileName(ROCKSDB_LIBRARY_NAME); - private static final String tempFilePrefix = "libforstdbjni"; + private static final String tempFilePrefix = "libforstjni"; private static final String tempFileSuffix = Environment.getJniLibraryExtension(); /** diff --git a/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java b/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java index bf383c91f..4463f51f5 100644 --- a/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java +++ b/java/src/test/java/org/forstdb/NativeLibraryLoaderTest.java @@ -25,7 +25,7 @@ public void tempFolder() throws IOException { NativeLibraryLoader.getInstance().loadLibraryFromJarToTemp( temporaryFolder.getRoot().getAbsolutePath()); final Path path = Paths.get(temporaryFolder.getRoot().getAbsolutePath(), - Environment.getJniLibraryFileName("forstdb")); + Environment.getJniLibraryFileName("forst")); assertThat(Files.exists(path)).isTrue(); assertThat(Files.isReadable(path)).isTrue(); } From 2faec9e8b543cbf8aace84820ce42f7d47dd34af Mon Sep 17 00:00:00 2001 From: fredia Date: Fri, 18 Oct 2024 14:52:10 +0800 Subject: [PATCH 385/386] [FLINK-35928][build] Rename jclass to forst in *.cc --- java/forstjni/checkpoint.cc | 2 +- java/forstjni/columnfamilyhandle.cc | 2 +- java/forstjni/compact_range_options.cc | 2 +- java/forstjni/compaction_filter.cc | 2 +- java/forstjni/compaction_job_info.cc | 2 +- java/forstjni/env.cc | 2 +- java/forstjni/flink_compactionfilterjni.cc | 10 +++++----- java/forstjni/jnicallback.cc | 2 +- java/forstjni/options.cc | 4 ++-- java/forstjni/options_util.cc | 2 +- java/forstjni/rocks_callback_object.cc | 2 +- java/forstjni/rocksjni.cc | 8 ++++---- java/forstjni/slice.cc | 6 +++--- java/forstjni/statistics.cc | 2 +- java/forstjni/table_filter.cc | 2 +- java/forstjni/transaction.cc | 2 +- java/forstjni/transaction_db.cc | 2 +- java/forstjni/transaction_log.cc | 2 +- java/forstjni/ttl.cc | 2 +- java/forstjni/write_batch.cc | 2 +- java/forstjni/write_batch_with_index.cc | 4 ++-- utilities/flink/flink_compaction_filter_test.cc | 4 ++-- 22 files changed, 34 insertions(+), 34 deletions(-) diff --git a/java/forstjni/checkpoint.cc b/java/forstjni/checkpoint.cc index dd689b5aa..7a2fd3b8d 100644 --- a/java/forstjni/checkpoint.cc +++ b/java/forstjni/checkpoint.cc @@ -73,7 +73,7 @@ void Java_org_forstdb_Checkpoint_createCheckpoint(JNIEnv* env, jobject /*jobj*/, /* * Class: org_forstdb_Checkpoint * Method: exportColumnFamily - * Signature: (JJLjava/lang/String;)Lorg/rocksdb/ExportImportFilesMetaData; + * Signature: (JJLjava/lang/String;)Lorg/forstdb/ExportImportFilesMetaData; */ jlong Java_org_forstdb_Checkpoint_exportColumnFamily( JNIEnv* env, jobject /*jobj*/, jlong jcheckpoint_handle, diff --git a/java/forstjni/columnfamilyhandle.cc b/java/forstjni/columnfamilyhandle.cc index abca5ff5f..e548a1674 100644 --- a/java/forstjni/columnfamilyhandle.cc +++ b/java/forstjni/columnfamilyhandle.cc @@ -42,7 +42,7 @@ jint Java_org_forstdb_ColumnFamilyHandle_getID(JNIEnv* /*env*/, /* * Class: org_forstdb_ColumnFamilyHandle * Method: getDescriptor - * Signature: (J)Lorg/rocksdb/ColumnFamilyDescriptor; + * Signature: (J)Lorg/forstdb/ColumnFamilyDescriptor; */ jobject Java_org_forstdb_ColumnFamilyHandle_getDescriptor(JNIEnv* env, jobject /*jobj*/, diff --git a/java/forstjni/compact_range_options.cc b/java/forstjni/compact_range_options.cc index 89d8a9156..6f3e1c84e 100644 --- a/java/forstjni/compact_range_options.cc +++ b/java/forstjni/compact_range_options.cc @@ -280,7 +280,7 @@ void Java_org_forstdb_CompactRangeOptions_setFullHistoryTSLow(JNIEnv*, jobject, /* * Class: org_forstdb_CompactRangeOptions * Method: fullHistoryTSLow - * Signature: (J)Lorg/rocksdb/CompactRangeOptions/Timestamp; + * Signature: (J)Lorg/forstdb/CompactRangeOptions/Timestamp; */ jobject Java_org_forstdb_CompactRangeOptions_fullHistoryTSLow(JNIEnv* env, jobject, diff --git a/java/forstjni/compaction_filter.cc b/java/forstjni/compaction_filter.cc index f45234896..739447f96 100644 --- a/java/forstjni/compaction_filter.cc +++ b/java/forstjni/compaction_filter.cc @@ -12,7 +12,7 @@ #include "include/org_forstdb_AbstractCompactionFilter.h" -// +// /* * Class: org_forstdb_AbstractCompactionFilter diff --git a/java/forstjni/compaction_job_info.cc b/java/forstjni/compaction_job_info.cc index b6bef26e1..4097876ba 100644 --- a/java/forstjni/compaction_job_info.cc +++ b/java/forstjni/compaction_job_info.cc @@ -51,7 +51,7 @@ jbyteArray Java_org_forstdb_CompactionJobInfo_columnFamilyName(JNIEnv* env, /* * Class: org_forstdb_CompactionJobInfo * Method: status - * Signature: (J)Lorg/rocksdb/Status; + * Signature: (J)Lorg/forstdb/Status; */ jobject Java_org_forstdb_CompactionJobInfo_status(JNIEnv* env, jclass, jlong jhandle) { diff --git a/java/forstjni/env.cc b/java/forstjni/env.cc index bde4ed574..32a9bbe01 100644 --- a/java/forstjni/env.cc +++ b/java/forstjni/env.cc @@ -123,7 +123,7 @@ void Java_org_forstdb_Env_lowerThreadPoolCPUPriority(JNIEnv*, jobject, /* * Class: org_forstdb_Env * Method: getThreadList - * Signature: (J)[Lorg/rocksdb/ThreadStatus; + * Signature: (J)[Lorg/forstdb/ThreadStatus; */ jobjectArray Java_org_forstdb_Env_getThreadList(JNIEnv* env, jobject, jlong jhandle) { diff --git a/java/forstjni/flink_compactionfilterjni.cc b/java/forstjni/flink_compactionfilterjni.cc index 793c56698..0f17cd4b3 100644 --- a/java/forstjni/flink_compactionfilterjni.cc +++ b/java/forstjni/flink_compactionfilterjni.cc @@ -39,7 +39,7 @@ class JavaListElementFilter JavaListElementFilter(JNIEnv* env, jobject jlist_filter) : JniCallbackBase(env, jlist_filter) { jclass jclazz = ROCKSDB_NAMESPACE::JavaClass::getJClass( - env, "org/rocksdb/FlinkCompactionFilter$ListElementFilter"); + env, "org/forstdb/FlinkCompactionFilter$ListElementFilter"); if (jclazz == nullptr) { // exception occurred accessing class return; @@ -82,14 +82,14 @@ class JavaListElemenFilterFactory JavaListElemenFilterFactory(JNIEnv* env, jobject jlist_filter_factory) : JniCallbackBase(env, jlist_filter_factory) { jclass jclazz = ROCKSDB_NAMESPACE::JavaClass::getJClass( - env, "org/rocksdb/FlinkCompactionFilter$ListElementFilterFactory"); + env, "org/forstdb/FlinkCompactionFilter$ListElementFilterFactory"); if (jclazz == nullptr) { // exception occurred accessing class return; } m_jcreate_filter_methodid = env->GetMethodID( jclazz, "createListElementFilter", - "()Lorg/rocksdb/FlinkCompactionFilter$ListElementFilter;"); + "()Lorg/forstdb/FlinkCompactionFilter$ListElementFilter;"); assert(m_jcreate_filter_methodid != nullptr); } @@ -117,7 +117,7 @@ class JavaTimeProvider JavaTimeProvider(JNIEnv* env, jobject jtime_provider) : JniCallbackBase(env, jtime_provider) { jclass jclazz = ROCKSDB_NAMESPACE::JavaClass::getJClass( - env, "org/rocksdb/FlinkCompactionFilter$TimeProvider"); + env, "org/forstdb/FlinkCompactionFilter$TimeProvider"); if (jclazz == nullptr) { // exception occurred accessing class return; @@ -214,7 +214,7 @@ jlong Java_org_forstdb_FlinkCompactionFilter_createNewFlinkCompactionFilter0( /* * Class: org_forstdb_FlinkCompactionFilter * Method: configureFlinkCompactionFilter - * Signature: (JIIJJILorg/rocksdb/FlinkCompactionFilter$ListElementFilter;)Z + * Signature: (JIIJJILorg/forstdb/FlinkCompactionFilter$ListElementFilter;)Z */ jboolean Java_org_forstdb_FlinkCompactionFilter_configureFlinkCompactionFilter( JNIEnv* env, jclass /* jcls */, jlong handle, jint ji_state_type, diff --git a/java/forstjni/jnicallback.cc b/java/forstjni/jnicallback.cc index 51fe1f04c..29db5b960 100644 --- a/java/forstjni/jnicallback.cc +++ b/java/forstjni/jnicallback.cc @@ -4,7 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). // // This file implements the callback "bridge" between Java and C++ for -// JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject +// JNI Callbacks from C++ to sub-classes or org.forstdb.RocksCallbackObject #include "forstjni/jnicallback.h" diff --git a/java/forstjni/options.cc b/java/forstjni/options.cc index bc61f470d..02690f60f 100644 --- a/java/forstjni/options.cc +++ b/java/forstjni/options.cc @@ -1784,7 +1784,7 @@ static jobjectArray rocksdb_get_event_listeners_helper( /* * Class: org_forstdb_Options * Method: eventListeners - * Signature: (J)[Lorg/rocksdb/AbstractEventListener; + * Signature: (J)[Lorg/forstdb/AbstractEventListener; */ jobjectArray Java_org_forstdb_Options_eventListeners(JNIEnv* env, jclass, jlong jhandle) { @@ -7220,7 +7220,7 @@ void Java_org_forstdb_DBOptions_setEventListeners(JNIEnv* env, jclass, /* * Class: org_forstdb_DBOptions * Method: eventListeners - * Signature: (J)[Lorg/rocksdb/AbstractEventListener; + * Signature: (J)[Lorg/forstdb/AbstractEventListener; */ jobjectArray Java_org_forstdb_DBOptions_eventListeners(JNIEnv* env, jclass, jlong jhandle) { diff --git a/java/forstjni/options_util.cc b/java/forstjni/options_util.cc index 99c8328a1..8074c8411 100644 --- a/java/forstjni/options_util.cc +++ b/java/forstjni/options_util.cc @@ -141,7 +141,7 @@ jstring Java_org_forstdb_OptionsUtil_getLatestOptionsFileName( /* * Class: org_forstdb_OptionsUtil * Method: readTableFormatConfig - * Signature: (J)Lorg/rocksdb/TableFormatConfig; + * Signature: (J)Lorg/forstdb/TableFormatConfig; */ jobject Java_org_forstdb_OptionsUtil_readTableFormatConfig(JNIEnv* env, jclass, jlong jcf_options) { diff --git a/java/forstjni/rocks_callback_object.cc b/java/forstjni/rocks_callback_object.cc index 19a32866a..19e8fe948 100644 --- a/java/forstjni/rocks_callback_object.cc +++ b/java/forstjni/rocks_callback_object.cc @@ -4,7 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for -// JNI Callbacks from C++ to sub-classes or org.rocksdb.RocksCallbackObject +// JNI Callbacks from C++ to sub-classes or org.forstdb.RocksCallbackObject #include diff --git a/java/forstjni/rocksjni.cc b/java/forstjni/rocksjni.cc index e9c9b7915..52767ece4 100644 --- a/java/forstjni/rocksjni.cc +++ b/java/forstjni/rocksjni.cc @@ -2210,7 +2210,7 @@ jobjectArray Java_org_forstdb_RocksDB_multiGet__JJ_3_3B_3I_3I_3J( * Class: org_forstdb_RocksDB * Method: multiGet * Signature: - * (JJ[J[Ljava/nio/ByteBuffer;[I[I[Ljava/nio/ByteBuffer;[I[Lorg/rocksdb/Status;)V + * (JJ[J[Ljava/nio/ByteBuffer;[I[I[Ljava/nio/ByteBuffer;[I[Lorg/forstdb/Status;)V */ void Java_org_forstdb_RocksDB_multiGet__JJ_3J_3Ljava_nio_ByteBuffer_2_3I_3I_3Ljava_nio_ByteBuffer_2_3I_3Lorg_forstdb_Status_2( JNIEnv* env, jobject jdb, jlong jdb_handle, jlong jropt_handle, @@ -3587,7 +3587,7 @@ jobjectArray Java_org_forstdb_RocksDB_getLiveFiles(JNIEnv* env, jobject, /* * Class: org_forstdb_RocksDB * Method: getSortedWalFiles - * Signature: (J)[Lorg/rocksdb/LogFile; + * Signature: (J)[Lorg/forstdb/LogFile; */ jobjectArray Java_org_forstdb_RocksDB_getSortedWalFiles(JNIEnv* env, jobject, jlong jdb_handle) { @@ -3674,7 +3674,7 @@ void Java_org_forstdb_RocksDB_deleteFile(JNIEnv* env, jobject, jlong jdb_handle, /* * Class: org_forstdb_RocksDB * Method: getLiveFilesMetaData - * Signature: (J)[Lorg/rocksdb/LiveFileMetaData; + * Signature: (J)[Lorg/forstdb/LiveFileMetaData; */ jobjectArray Java_org_forstdb_RocksDB_getLiveFilesMetaData(JNIEnv* env, jobject, jlong jdb_handle) { @@ -3721,7 +3721,7 @@ jobjectArray Java_org_forstdb_RocksDB_getLiveFilesMetaData(JNIEnv* env, jobject, /* * Class: org_forstdb_RocksDB * Method: getColumnFamilyMetaData - * Signature: (JJ)Lorg/rocksdb/ColumnFamilyMetaData; + * Signature: (JJ)Lorg/forstdb/ColumnFamilyMetaData; */ jobject Java_org_forstdb_RocksDB_getColumnFamilyMetaData(JNIEnv* env, jobject, jlong jdb_handle, diff --git a/java/forstjni/slice.cc b/java/forstjni/slice.cc index 3d447562c..a72fa3f24 100644 --- a/java/forstjni/slice.cc +++ b/java/forstjni/slice.cc @@ -20,7 +20,7 @@ #include "forstjni/cplusplus_to_java_convert.h" #include "forstjni/portal.h" -// /* * Class: org_forstdb_AbstractSlice @@ -125,7 +125,7 @@ void Java_org_forstdb_AbstractSlice_disposeInternal(JNIEnv* /*env*/, // -// /* * Class: org_forstdb_Slice @@ -257,7 +257,7 @@ void Java_org_forstdb_Slice_disposeInternalBuf(JNIEnv* /*env*/, // -// /* * Class: org_forstdb_DirectSlice diff --git a/java/forstjni/statistics.cc b/java/forstjni/statistics.cc index c6d0c8257..eff45a3c3 100644 --- a/java/forstjni/statistics.cc +++ b/java/forstjni/statistics.cc @@ -180,7 +180,7 @@ jlong Java_org_forstdb_Statistics_getAndResetTickerCount(JNIEnv*, jobject, /* * Class: org_forstdb_Statistics * Method: getHistogramData - * Signature: (JB)Lorg/rocksdb/HistogramData; + * Signature: (JB)Lorg/forstdb/HistogramData; */ jobject Java_org_forstdb_Statistics_getHistogramData(JNIEnv* env, jobject, jlong jhandle, diff --git a/java/forstjni/table_filter.cc b/java/forstjni/table_filter.cc index ac234d889..208aca21a 100644 --- a/java/forstjni/table_filter.cc +++ b/java/forstjni/table_filter.cc @@ -4,7 +4,7 @@ // (found in the LICENSE.Apache file in the root directory). // // This file implements the "bridge" between Java and C++ for -// org.rocksdb.AbstractTableFilter. +// org.forstdb.AbstractTableFilter. #include diff --git a/java/forstjni/transaction.cc b/java/forstjni/transaction.cc index 5212d2ad8..7b64a7458 100644 --- a/java/forstjni/transaction.cc +++ b/java/forstjni/transaction.cc @@ -1700,7 +1700,7 @@ jboolean Java_org_forstdb_Transaction_isDeadlockDetect(JNIEnv* /*env*/, /* * Class: org_forstdb_Transaction * Method: getWaitingTxns - * Signature: (J)Lorg/rocksdb/Transaction/WaitingTransactions; + * Signature: (J)Lorg/forstdb/Transaction/WaitingTransactions; */ jobject Java_org_forstdb_Transaction_getWaitingTxns(JNIEnv* env, jobject jtransaction_obj, diff --git a/java/forstjni/transaction_db.cc b/java/forstjni/transaction_db.cc index 0d8ae20c6..e7df93ebd 100644 --- a/java/forstjni/transaction_db.cc +++ b/java/forstjni/transaction_db.cc @@ -357,7 +357,7 @@ jobject Java_org_forstdb_TransactionDB_getLockStatusData(JNIEnv* env, jobject, /* * Class: org_forstdb_TransactionDB * Method: getDeadlockInfoBuffer - * Signature: (J)[Lorg/rocksdb/TransactionDB/DeadlockPath; + * Signature: (J)[Lorg/forstdb/TransactionDB/DeadlockPath; */ jobjectArray Java_org_forstdb_TransactionDB_getDeadlockInfoBuffer( JNIEnv* env, jobject jobj, jlong jhandle) { diff --git a/java/forstjni/transaction_log.cc b/java/forstjni/transaction_log.cc index 8ddc64322..1a4719d75 100644 --- a/java/forstjni/transaction_log.cc +++ b/java/forstjni/transaction_log.cc @@ -68,7 +68,7 @@ void Java_org_forstdb_TransactionLogIterator_status(JNIEnv* env, /* * Class: org_forstdb_TransactionLogIterator * Method: getBatch - * Signature: (J)Lorg/rocksdb/TransactionLogIterator$BatchResult + * Signature: (J)Lorg/forstdb/TransactionLogIterator$BatchResult */ jobject Java_org_forstdb_TransactionLogIterator_getBatch(JNIEnv* env, jobject /*jobj*/, diff --git a/java/forstjni/ttl.cc b/java/forstjni/ttl.cc index 4621c245f..98cd83b20 100644 --- a/java/forstjni/ttl.cc +++ b/java/forstjni/ttl.cc @@ -179,7 +179,7 @@ void Java_org_forstdb_TtlDB_closeDatabase(JNIEnv* /* env */, jclass, /* * Class: org_forstdb_TtlDB * Method: createColumnFamilyWithTtl - * Signature: (JLorg/rocksdb/ColumnFamilyDescriptor;[BJI)J; + * Signature: (JLorg/forstdb/ColumnFamilyDescriptor;[BJI)J; */ jlong Java_org_forstdb_TtlDB_createColumnFamilyWithTtl(JNIEnv* env, jobject, jlong jdb_handle, diff --git a/java/forstjni/write_batch.cc b/java/forstjni/write_batch.cc index d9dc5557a..aa6c5f226 100644 --- a/java/forstjni/write_batch.cc +++ b/java/forstjni/write_batch.cc @@ -639,7 +639,7 @@ void Java_org_forstdb_WriteBatch_markWalTerminationPoint(JNIEnv* /*env*/, /* * Class: org_forstdb_WriteBatch * Method: getWalTerminationPoint - * Signature: (J)Lorg/rocksdb/WriteBatch/SavePoint; + * Signature: (J)Lorg/forstdb/WriteBatch/SavePoint; */ jobject Java_org_forstdb_WriteBatch_getWalTerminationPoint(JNIEnv* env, jobject /*jobj*/, diff --git a/java/forstjni/write_batch_with_index.cc b/java/forstjni/write_batch_with_index.cc index e4ed9a449..e13c750e1 100644 --- a/java/forstjni/write_batch_with_index.cc +++ b/java/forstjni/write_batch_with_index.cc @@ -486,7 +486,7 @@ void Java_org_forstdb_WriteBatchWithIndex_setMaxBytes(JNIEnv* /*env*/, /* * Class: org_forstdb_WriteBatchWithIndex * Method: getWriteBatch - * Signature: (J)Lorg/rocksdb/WriteBatch; + * Signature: (J)Lorg/forstdb/WriteBatch; */ jobject Java_org_forstdb_WriteBatchWithIndex_getWriteBatch(JNIEnv* env, jobject /*jobj*/, @@ -898,7 +898,7 @@ jlongArray Java_org_forstdb_WBWIRocksIterator_entry1(JNIEnv* env, results[0] = ROCKSDB_NAMESPACE::WriteTypeJni::toJavaWriteType(we.type); // NOTE: key_slice and value_slice will be freed by - // org.rocksdb.DirectSlice#close + // org.forstdb.DirectSlice#close auto* key_slice = new ROCKSDB_NAMESPACE::Slice(we.key.data(), we.key.size()); results[1] = GET_CPLUSPLUS_POINTER(key_slice); diff --git a/utilities/flink/flink_compaction_filter_test.cc b/utilities/flink/flink_compaction_filter_test.cc index 26613ae68..192191cd3 100644 --- a/utilities/flink/flink_compaction_filter_test.cc +++ b/utilities/flink/flink_compaction_filter_test.cc @@ -131,8 +131,8 @@ void Deinit() { delete filter; } TEST(FlinkStateTtlTest, CheckStateTypeEnumOrder) { // NOLINT // if the order changes it also needs to be adjusted in Java client: - // in org.rocksdb.FlinkCompactionFilter - // and in org.rocksdb.FlinkCompactionFilterTest + // in org.forstdb.FlinkCompactionFilter + // and in org.forstdb.FlinkCompactionFilterTest EXPECT_EQ(DISABLED, 0); EXPECT_EQ(VALUE, 1); EXPECT_EQ(LIST, 2); From b1015fe746963780b9a53a0d0b10a3918f50b635 Mon Sep 17 00:00:00 2001 From: Zakelly Date: Wed, 23 Oct 2024 15:45:47 +0800 Subject: [PATCH 386/386] [build] Fix packaging error --- HISTORY.md | 0 Makefile | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 HISTORY.md diff --git a/HISTORY.md b/HISTORY.md new file mode 100644 index 000000000..e69de29bb diff --git a/Makefile b/Makefile index b643f96a1..fa6948417 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ #----------------------------------------------- -FORST_VERSION ?= 0.1.0 +FORST_VERSION ?= 0.1.2-beta BASH_EXISTS := $(shell which bash) SHELL := $(shell which bash) @@ -2117,6 +2117,7 @@ else endif endif ROCKSDB_JAVA_VERSION ?= $(ROCKSDB_MAJOR).$(ROCKSDB_MINOR).$(ROCKSDB_PATCH) +ROCKSDB_JAVA_VERSION = $(FORST_VERSION) ROCKSDB_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-linux$(ARCH)$(JNI_LIBC_POSTFIX).jar ROCKSDB_JAR_ALL = forstjni-$(ROCKSDB_JAVA_VERSION).jar ROCKSDB_JAVADOCS_JAR = forstjni-$(ROCKSDB_JAVA_VERSION)-javadoc.jar